本帖最后由 笨笨家的唯一 于 2021-11-25 14:49 编辑
有些时候在微信中找到了一些图片资源,想都按顺序保存下来,但是吧,又懒得一个个的保存或者提取缓存,就搞了一个这个,保存的时候是按照图片出现的顺序来命名保存的,应该也算是一个爬虫吧
主要添加了 async包,用来控制并发数。默认保存在当前目录下的 weixinimg 文件夹中[JavaScript] 纯文本查看 复制代码 const cheerio = require("cheerio");
const axios = require("axios");
const fs = require("fs");
const async = require("async");
const path = require("path");
const downloadImg = (url, paths) => {
return new Promise((resolve, reject) => {
let config = {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36",
},
responseType: "stream",
};
let res = axios.get(url, config);
res.then(respone => {
let fsw = fs.createWriteStream(paths);
respone.data.pipe(fsw);
fsw.on("close", () => {
resolve(`${path.basename(paths)}保存完毕!`);
});
});
res.catch(err => {
console.log("no such file!");
reject(0);
});
});
};
const baseurl = "https://mp.weixin.qq.com/s/8gkCrpbP3pitfQN2L9FTBA";//*****这里改成自己需要的内容*****
let result = {
pathNames: [],
urls: [],
};
axios
.get(baseurl, {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36",
},
})
.then(res => {
let $ = cheerio.load(res.data);
$("img.rich_pages").each((i, value) => {
result.pathNames.push("./weixinimg/" + i + "." + $(value).attr("data-type"));
result.urls.push($(value).attr("data-src"));
});
async.mapLimit(result.urls, 20, imgFun, allImgFun);
});
function imgFun(url, callback) {
let index = result.urls.indexOf(url);
downloadImg(url, result.pathNames[index])
.then(res => {
console.log(res);
callback(null, null);
})
.catch(err => {
console.log("no such file");
callback(null, null);
});
}
function allImgFun(err, resultes) {
console.log("图片保存在 weixingimg 文件夹下,请注意检查!");
}
才疏学浅,有不对的还希望大家批评指正 |