描述
在使用Rust语言开发端口监控程序的时候,调用第三方扫描器masscan需要将结果反序列化为rust的结构体并且进行操作,以下是解决文件的读取、解析、操作的算法分享。
masscan xml 结果原始文件 ffb51432-1fd0-4450-bede-6b58dff4a323.xml
<?xml version="1.0"?>
<!-- masscan v1.0 scan -->
<nmaprun scanner="masscan" start="1710990049" version="1.0-BETA" xmloutputversion="1.03">
<scaninfo type="syn" protocol="tcp" />
<host endtime="1710990049"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="5672"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990069"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="25"><state state="open" reason="syn-ack" reason_ttl="64"/></port></ports></host>
<host endtime="1710990096"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="5601"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990118"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="22"><state state="open" reason="syn-ack" reason_ttl="64"/></port></ports></host>
<host endtime="1710990163"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="3306"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990180"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="6379"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990300"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="9200"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<host endtime="1710990317"><address addr="172.16.100.182" addrtype="ipv4"/><ports><port protocol="tcp" portid="15672"><state state="open" reason="syn-ack" reason_ttl="63"/></port></ports></host>
<runstats>
<finished time="1710990363" timestr="2024-03-21 11:06:03" elapsed="336" />
<hosts up="8" down="0" total="8" />
</runstats>
</nmaprun>
解析过程
通过设计一个结构体,并且将结果反序列化到这个结构体即可对内容进行访问。
内容分析
如何对内容进行反序列化,首先要分析xml的结构是如何的
原始文件<nmaprun></nmaprun> 这样的东西叫做一级元素,它的下面存在 scaninfo、host、runstats等子元素,而各个子元素下面又有自己的元素。而<nmaprun scanner="masscan" start="1710990049" version="1.0-BETA" xmloutputversion="1.03"> 在元素里面的scanner、start、version、xmloutputversion叫做属性,每个元素中有不同的属性。
在设计结构体的时候需要注意元素和属性的区别,在 Rust 中,NmapRun 结构体是用来表示 XML 文件中 <nmaprun> 元素的数据模型。这个结构体使用了 serde 库来进行序列化和反序列化操作,即将 Rust 结构体实例转换回 XML 数据或者将 XML 数据转换为 Rust 结构体实例。
每个字段前的 #[serde(rename = "@attribute_name")] 属于属性宏,它告诉 serde 库在解析 XML 时,应该将带有特定属性名的 XML 属性值映射到结构体的对应字段上。例如,#[serde(rename = "@scanner")] 表示 XML 中的 scanner 属性应该映射到 NmapRun 结构体的 scanner 字段。
根据上面的方法,设计结构体的时候如下:
/**
* 解析masscan xml 文件的结构体
*/
#[derive(Debug, Deserialize, PartialEq)]
struct NmapRun {
#[serde(rename = "@scanner")]
scanner: String,
#[serde(rename = "@start")]
start: String,
#[serde(rename = "@version")]
version: String,
#[serde(rename = "@xmloutputversion")] // @指定属性
xmloutputversion: String,
scaninfo: ScanInfo,
host: Vec<Host>,
runstats: RunStats,
}
#[derive(Debug, Deserialize, PartialEq)]
struct ScanInfo {
#[serde(rename = "@type")]
stype: String,
#[serde(rename = "@protocol")]
sprotocol: String,
}
#[derive(Debug, Deserialize, PartialEq)]
struct Host {
#[serde(rename = "@endtime")]
endtime: String,
address: Address,
ports: Ports,
}
#[derive(Debug, Deserialize, PartialEq)]
struct RunStats {
finished: Finished,
hosts: Hosts,
}
#[derive(Debug, Deserialize, PartialEq)]
struct Address {
#[serde(rename = "@addr")]
addr: String,
#[serde(rename = "@addrtype")]
addrtype: String,
}
#[derive(Debug, Deserialize, PartialEq)]
struct Ports {
port: Port
}
#[derive(Debug, Deserialize, PartialEq)]
struct Finished {
#[serde(rename = "@time")]
time: String,
#[serde(rename = "@timestr")]
timestr: String,
#[serde(rename = "@elapsed")]
elapsed: String
}
#[derive(Debug, Deserialize, PartialEq)]
struct Hosts {
#[serde(rename = "@up")]
up: String,
#[serde(rename = "@down")]
down: String,
#[serde(rename = "@total")]
total: String
}
#[derive(Debug, Deserialize, PartialEq)]
struct Port {
#[serde(rename = "@protocol")]
protocol: String,
#[serde(rename = "@portid")]
portid: String,
state: State
}
#[derive(Debug, Deserialize, PartialEq)]
struct State {
#[serde(rename = "@state")]
state: String,
#[serde(rename = "@reason")]
reason: String,
#[serde(rename = "@reason_ttl")]
reason_ttl: String
}
算法实现
完整的算法如下
需要使用到的依赖库和版本
Cargo.toml
uuid = { version = "1.7.0",features = ["v4"] }
quick-xml = { version = "0.31.0",features=["serde","serialize"] }
serde_derive = "1.0"
serde = { version = "1.0", features = ["derive"] }
tokio = { version = "1.36.0", features = ["full"] }
use std::error::Error;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use std::process::{Command, Output};
use tokio::runtime::Runtime;
use uuid::Uuid;
use quick_xml::de::from_reader;
use quick_xml::DeError;
use serde::Deserialize;
/**
* 解析masscan xml 文件的结构体
*/
#[derive(Debug, Deserialize, PartialEq)]
struct NmapRun {
#[serde(rename = "@scanner")]
scanner: String,
#[serde(rename = "@start")]
start: String,
#[serde(rename = "@version")]
version: String,
#[serde(rename = "@xmloutputversion")] // @指定属性
xmloutputversion: String,
scaninfo: ScanInfo,
host: Vec<Host>,
runstats: RunStats,
}
//本文由 曲速引擎(Warp Drive)个人博客、曲速引擎(Warp Drive)CSDN技术博客、曲速引擎 Warp Drive 微信公众号联合创作,转载请说明出处谢谢
#[derive(Debug, Deserialize, PartialEq)]
struct ScanInfo {
#[serde(rename = "@type")]
stype: String,
#[serde(rename = "@protocol")]
sprotocol: String,
}
#[derive(Debug, Deserialize, PartialEq)]
struct Host {
#[serde(rename = "@endtime")]
endtime: String,
address: Address,
ports: Ports,
}
#[derive(Debug, Deserialize, PartialEq)]
struct RunStats {
finished: Finished,
hosts: Hosts,
}
#[derive(Debug, Deserialize, PartialEq)]
struct Address {
#[serde(rename = "@addr")]
addr: String,
#[serde(rename = "@addrtype")]
addrtype: String,
}
#[derive(Debug, Deserialize, PartialEq)]
struct Ports {
port: Port
}
#[derive(Debug, Deserialize, PartialEq)]
struct Finished {
#[serde(rename = "@time")]
time: String,
#[serde(rename = "@timestr")]
timestr: String,
#[serde(rename = "@elapsed")]
elapsed: String
}
#[derive(Debug, Deserialize, PartialEq)]
struct Hosts {
#[serde(rename = "@up")]
up: String,
#[serde(rename = "@down")]
down: String,
#[serde(rename = "@total")]
total: String
}
#[derive(Debug, Deserialize, PartialEq)]
struct Port {
#[serde(rename = "@protocol")]
protocol: String,
#[serde(rename = "@portid")]
portid: String,
state: State
}
#[derive(Debug, Deserialize, PartialEq)]
struct State {
#[serde(rename = "@state")]
state: String,
#[serde(rename = "@reason")]
reason: String,
#[serde(rename = "@reason_ttl")]
reason_ttl: String
}
/**
* 写一个执行解析 masscan 的函数,并且解析结构体
*/
async fn command_linux_portscan(ip: String) -> Result<(),Box<dyn Error>> {
let id = Uuid::new_v4().to_string();
let command = format!("./bin/Linux/masscan -p0-65535 {} --rate=400 -oX ./output/{}.xml",ip,id);
//本文由 曲速引擎(Warp Drive)个人博客、曲速引擎(Warp Drive)CSDN技术博客、曲速引擎 Warp Drive 微信公众号联合创作,转载请说明出处谢谢
println!("masscan command: {}",command);
let output = Command::new("sh")
.arg("-c")
.arg(&command)
.output()
.expect("masscan failed to execute process");
println!("masscan output: {:?}",output);
Ok(())
}
/**
* 解析结果xml文件数据
*/
async fn parser_result_xml() -> Result<(),Box<dyn Error>> {
let id = "ffb51432-1fd0-4450-bede-6b58dff4a323";
let file_xml = format!("./output/{}.xml",id);
if !Path::new(&file_xml).exists() {
return Ok(());
}
let file = File::open(file_xml)?;
let file: BufReader<File> = BufReader::new(file);
// 明确指定 `from_reader` 应该反序列化为 `NmapRun` 类型
let nmaprun: Result<NmapRun, DeError> = from_reader(file);
match nmaprun {
Ok(result) => {
println!("完整结果序列化 => {:?}",result);
println!("-------------本文由 曲速引擎(Warp Drive)个人博客、曲速引擎(Warp Drive)CSDN技术博客、曲速引擎 Warp Drive 微信公众号联合创作,转载请说明出处谢谢----------------");
println!("读取结果HOST向量 => {:?}",result.host);
for h in result.host {
println!("HOST => {:?}",h);
}
},
Err(e) => {
println!("nmaprun => {:?}",e);
}
}
Ok(())
}
pub fn cmder() -> Result<(),Box<dyn Error>> {
let rt = Runtime::new()?;
let _ = rt.block_on(async {
let _ = command_linux_portscan("172.16.100.182".to_string()).await;
let _ = parser_result_xml().await;
});
Ok(())
}
通过main函数直接调用cmder即可使用