Administrator
发布于 2024-04-11 / 10 阅读
0

Rust编程 Masscan扫描器XML结果解析方法 - 曲速引擎(Warp Drive)

描述

在使用Rust语言开发端口监控程序的时候,调用第三方扫描器masscan需要将结果反序列化为rust的结构体并且进行操作,以下是解决文件的读取、解析、操作的算法分享。

masscan xml 结果原始文件 ffb51432-1fd0-4450-bede-6b58dff4a323.xml

<?xml version="1.0"?>
<!-- masscan v1.0 scan -->
<nmaprun scanner="masscan" start="1710990049" version="1.0-BETA"  xmloutputversion="1.03">
<scaninfo type="syn" protocol="tcp" />
<host endtime="1710990049"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="5672"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990069"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="25"><state state="open" reason="syn-ack" reason_ttl="64"/></port></ports></host>
<host endtime="1710990096"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="5601"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990118"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="22"><state state="open" reason="syn-ack" reason_ttl="64"/></port></ports></host>
<host endtime="1710990163"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="3306"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990180"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="6379"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990300"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="9200"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990317"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="15672"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<runstats>
<finished time="1710990363" timestr="2024-03-21 11:06:03" elapsed="336" />
<hosts up="8" down="0" total="8" />
</runstats>
</nmaprun>

解析过程

通过设计一个结构体,并且将结果反序列化到这个结构体即可对内容进行访问。

内容分析

如何对内容进行反序列化,首先要分析xml的结构是如何的

原始文件<nmaprun></nmaprun> 这样的东西叫做一级元素,它的下面存在 scaninfo、host、runstats等子元素,而各个子元素下面又有自己的元素。而<nmaprun scanner="masscan" start="1710990049" version="1.0-BETA"  xmloutputversion="1.03"> 在元素里面的scanner、start、version、xmloutputversion叫做属性,每个元素中有不同的属性。

在设计结构体的时候需要注意元素和属性的区别,在 Rust 中,NmapRun 结构体是用来表示 XML 文件中 <nmaprun> 元素的数据模型。这个结构体使用了 serde 库来进行序列化和反序列化操作,即将 Rust 结构体实例转换回 XML 数据或者将 XML 数据转换为 Rust 结构体实例。

每个字段前的 #[serde(rename = "@attribute_name")] 属于属性宏,它告诉 serde 库在解析 XML 时,应该将带有特定属性名的 XML 属性值映射到结构体的对应字段上。例如,#[serde(rename = "@scanner")] 表示 XML 中的 scanner 属性应该映射到 NmapRun 结构体的 scanner 字段。

根据上面的方法,设计结构体的时候如下:

/**
 * 解析masscan xml 文件的结构体
 */
#[derive(Debug, Deserialize, PartialEq)]
struct NmapRun {
    #[serde(rename = "@scanner")]
    scanner: String,
    #[serde(rename = "@start")]
    start: String,
    #[serde(rename = "@version")]
    version: String,
    #[serde(rename = "@xmloutputversion")] // @指定属性
    xmloutputversion: String,
    scaninfo: ScanInfo,
    host: Vec<Host>,
    runstats: RunStats, 
}

#[derive(Debug, Deserialize, PartialEq)]
struct ScanInfo {
    #[serde(rename = "@type")]
    stype: String,
    #[serde(rename = "@protocol")]
    sprotocol: String,
}

#[derive(Debug, Deserialize, PartialEq)]
struct Host {
    #[serde(rename = "@endtime")]
    endtime: String,
    address: Address,
    ports: Ports,
}
 
#[derive(Debug, Deserialize, PartialEq)]
struct RunStats {
    finished: Finished,
    hosts: Hosts,
}

#[derive(Debug, Deserialize, PartialEq)]
struct Address {
    #[serde(rename = "@addr")]
    addr: String,
    #[serde(rename = "@addrtype")]
    addrtype: String,
}

#[derive(Debug, Deserialize, PartialEq)]
struct Ports {
    port: Port
}

#[derive(Debug, Deserialize, PartialEq)]
struct Finished {
    #[serde(rename = "@time")]
    time: String,
    #[serde(rename = "@timestr")]
    timestr: String,
    #[serde(rename = "@elapsed")]
    elapsed: String
}

#[derive(Debug, Deserialize, PartialEq)]
struct Hosts {
    #[serde(rename = "@up")]
    up: String,
    #[serde(rename = "@down")]
    down: String,
    #[serde(rename = "@total")]
    total: String
}

#[derive(Debug, Deserialize, PartialEq)]
struct Port {
    #[serde(rename = "@protocol")]
    protocol: String,
    #[serde(rename = "@portid")]
    portid: String,
    state: State
}

#[derive(Debug, Deserialize, PartialEq)]
struct State {
    #[serde(rename = "@state")]
    state: String,
    #[serde(rename = "@reason")]
    reason: String,
    #[serde(rename = "@reason_ttl")]
    reason_ttl: String
}​

算法实现

完整的算法如下

需要使用到的依赖库和版本

Cargo.toml

uuid = { version = "1.7.0",features = ["v4"] }
quick-xml = { version = "0.31.0",features=["serde","serialize"] }
serde_derive = "1.0"
serde = { version = "1.0", features = ["derive"] }
tokio = { version = "1.36.0", features = ["full"] }

​​

use std::error::Error;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use std::process::{Command, Output};
use tokio::runtime::Runtime;
use uuid::Uuid;
use quick_xml::de::from_reader;
use quick_xml::DeError;
use serde::Deserialize;




/**
 * 解析masscan xml 文件的结构体
 */
#[derive(Debug, Deserialize, PartialEq)]
struct NmapRun {
    #[serde(rename = "@scanner")]
    scanner: String,
    #[serde(rename = "@start")]
    start: String,
    #[serde(rename = "@version")]
    version: String,
    #[serde(rename = "@xmloutputversion")] // @指定属性
    xmloutputversion: String,
    scaninfo: ScanInfo,
    host: Vec<Host>,
    runstats: RunStats, 
}

//本文由 曲速引擎(Warp Drive)个人博客、曲速引擎(Warp Drive)CSDN技术博客、曲速引擎 Warp Drive 微信公众号联合创作,转载请说明出处谢谢
#[derive(Debug, Deserialize, PartialEq)]
struct ScanInfo {
    #[serde(rename = "@type")]
    stype: String,
    #[serde(rename = "@protocol")]
    sprotocol: String,
}

#[derive(Debug, Deserialize, PartialEq)]
struct Host {
    #[serde(rename = "@endtime")]
    endtime: String,
    address: Address,
    ports: Ports,
}
 
#[derive(Debug, Deserialize, PartialEq)]
struct RunStats {
    finished: Finished,
    hosts: Hosts,
}

#[derive(Debug, Deserialize, PartialEq)]
struct Address {
    #[serde(rename = "@addr")]
    addr: String,
    #[serde(rename = "@addrtype")]
    addrtype: String,
}

#[derive(Debug, Deserialize, PartialEq)]
struct Ports {
    port: Port
}

#[derive(Debug, Deserialize, PartialEq)]
struct Finished {
    #[serde(rename = "@time")]
    time: String,
    #[serde(rename = "@timestr")]
    timestr: String,
    #[serde(rename = "@elapsed")]
    elapsed: String
}

#[derive(Debug, Deserialize, PartialEq)]
struct Hosts {
    #[serde(rename = "@up")]
    up: String,
    #[serde(rename = "@down")]
    down: String,
    #[serde(rename = "@total")]
    total: String
}

#[derive(Debug, Deserialize, PartialEq)]
struct Port {
    #[serde(rename = "@protocol")]
    protocol: String,
    #[serde(rename = "@portid")]
    portid: String,
    state: State
}

#[derive(Debug, Deserialize, PartialEq)]
struct State {
    #[serde(rename = "@state")]
    state: String,
    #[serde(rename = "@reason")]
    reason: String,
    #[serde(rename = "@reason_ttl")]
    reason_ttl: String
}

/**
 * 写一个执行解析 masscan 的函数,并且解析结构体
 */
async fn command_linux_portscan(ip: String) -> Result<(),Box<dyn Error>> {
    let id = Uuid::new_v4().to_string();
    let command = format!("./bin/Linux/masscan -p0-65535 {} --rate=400 -oX ./output/{}.xml",ip,id);
	//本文由 曲速引擎(Warp Drive)个人博客、曲速引擎(Warp Drive)CSDN技术博客、曲速引擎 Warp Drive 微信公众号联合创作,转载请说明出处谢谢
    println!("masscan command: {}",command);
    let output = Command::new("sh")
        .arg("-c")
        .arg(&command)
        .output()
        .expect("masscan failed to execute process");

    println!("masscan output: {:?}",output);
    Ok(())
}

/**
 * 解析结果xml文件数据
 */
async fn parser_result_xml() -> Result<(),Box<dyn Error>> {
    let id = "ffb51432-1fd0-4450-bede-6b58dff4a323";
    let file_xml = format!("./output/{}.xml",id);
    if !Path::new(&file_xml).exists() {
        return Ok(());
    }

    let file = File::open(file_xml)?;
    let file: BufReader<File> = BufReader::new(file);
    // 明确指定 `from_reader` 应该反序列化为 `NmapRun` 类型
    let nmaprun: Result<NmapRun, DeError> = from_reader(file);

    match nmaprun {
        Ok(result) => {
            println!("完整结果序列化 => {:?}",result);
            println!("-------------本文由 曲速引擎(Warp Drive)个人博客、曲速引擎(Warp Drive)CSDN技术博客、曲速引擎 Warp Drive 微信公众号联合创作,转载请说明出处谢谢----------------");
            println!("读取结果HOST向量 => {:?}",result.host);
            for h in result.host {
                println!("HOST => {:?}",h);
            }
        },
        Err(e) => {
            println!("nmaprun => {:?}",e);
        }
    }
    Ok(())
}

pub fn cmder() -> Result<(),Box<dyn Error>> {
    let rt = Runtime::new()?;

    let _ = rt.block_on(async {
        let _ = command_linux_portscan("172.16.100.182".to_string()).await;
        let _ = parser_result_xml().await;
    });

    Ok(())
}

​通过main函数直接调用cmder即可使用