本帖最后由 cOldpure 于 2019-12-25 21:24 编辑
初学 Rust 再写个壁纸爬取的工具
学的不精 可能不够健壮
网站在国外 延迟肯定是有的 timeout也很随机
[package]
name = "wall_heaven_cc"
version = "0.1.0"
authors = ["coldpure9ev"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
reqwest = {version = "0.10.0-alpha.2", features = ["blocking", "json"]}
select = {version = "0.4.3"}
extern crate select;
use select::document::Document;
use select::predicate::{Predicate, Attr, Class, Name};
use std::ops::Deref;
use std::io::Write;
fn get_html_code(api: &str) -> Result<String, Box<dyn std::error::Error>> {
let client = reqwest::blocking::Client::new();
let res = client.get(api).send()?.text()?;
Ok(res)
}
fn download_image(pic_url: &str) -> Result<String, Box<dyn std::error::Error>> {
let client = reqwest::blocking::Client::new();
let mut buf: Vec<u8> = vec![];
let res = client.get(pic_url).send()?.copy_to(&mut buf)?;
let filename = pic_url.split("/").collect::<Vec<&str>>();
println!("{:?}", filename);
let mut file = std::fs::File::create( "wallhaven/".to_string() + filename[filename.len() - 1]).unwrap();
println!("文件打开成功:{:?}",file);
file.write_all(buf.as_slice()).expect("write failed");
Ok(format!("文件下载成功 {0}", pic_url))
}
fn wall_heaven(index: i32) {
let api = format!("https://wallhaven.cc/search?categories=111&purity=100&resolutions=1920x1080&topRange=1M&sorting=toplist-beta&order=desc&page={}", index);
match get_html_code(&api) {
Ok(res) => {
let document = Document::from(res.deref());
let mut img_list = Vec::new();
println!("# Start");
for node in document.find(Class("thumb-listing-page")) {
for img_node in node.find(Name("img")) {
img_list.push(img_node.attr("data-src").unwrap().get(33..39).unwrap())
}
}
for pic in img_list {
let pic_html_url = format!("https://wallhaven.cc/w/{0}", pic);
// let pic_html = get_html_code(&pic_html_url).unwrap();
match get_html_code(&pic_html_url) {
Ok(n) => {
let document_pic = Document::from(n.deref());
let pic_url = document_pic.find(Attr("id", "wallpaper")).next().unwrap().attr("src").unwrap();
match download_image(pic_url) {
Ok(n) => {
println!("下载成功! {}", n)
},
Err(e) => {
println!("Dowload_image Error! {}", e);
}
}
},
Err(e) => {
println!("get_html_code Error! {}", e);
}
};
}
},
Err(e) => {
println!("get_html_code Error! {}", e);
}
}
}
fn main() {
print!("开始拉取wallheavn 壁纸.......");
for n in 0..101 {
println!("当前抓取页数: {}", n);
wall_heaven(n)
}
println!("拉取完毕!")
}
https://www.lanzouj.com/i89l8ri
使用的时候在同目录新建一个wallhaven文件夹即可 不然可能报错文件目录找不到 |