本帖最后由 wangbingbing 于 2019-3-22 17:01 编辑
jsoup 是真的好用,就像用 JQuery 操作 DOM 一样
// myURL 改为 https://bing.ioliu.cn/ranking 下载榜
另外发现有的图片出现了这种情况,求大佬解答。。
下面贴下代码:
[Java] 纯文本查看 复制代码 import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.net.ssl.*;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.security.SecureRandom;
import java.security.cert.X509Certificate;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class Main {
public static String myURL = "https://bing.ioliu.cn/";
public static String filePath = "E:\\bingPicture\\";
// 安全证书 信任所有站点
public static InputStream getByDisableCertValidation(String url) {
System.out.println("正在获取网站安全证书,请稍等....");
TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
public void checkClientTrusted(X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(X509Certificate[] certs, String authType) {
}
}};
HostnameVerifier hv = new HostnameVerifier() {
public boolean verify(String hostname, SSLSession session) {
return true;
}
};
try {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
HttpsURLConnection.setDefaultHostnameVerifier(hv);
URL uRL = new URL(url);
HttpsURLConnection urlConnection = (HttpsURLConnection) uRL.openConnection();
InputStream is = urlConnection.getInputStream();
return is;
} catch (Exception e) {
}
return null;
}
// 输出流
public static void readInputStream(InputStream inStream, String path)
throws Exception {
File file = new File(path);
FileOutputStream fos = new FileOutputStream(file);
byte[] buffer = new byte[102400];
int len = 0;
while ((len = inStream.read(buffer)) != -1) {
fos.write(buffer, 0, len);
}
inStream.close();
fos.flush();
fos.close();
}
// 文件夹是否存在
public boolean fileIfExists(String path){
File f=new File(path);
if(f.exists()){
return true;
}else{
f.mkdir();
return false;
}
}
// 请求页面
public Connection getCollection(String url) {
getByDisableCertValidation(url);
Connection conn = null;
Map<String, String> map = new HashMap<>();
map.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
map.put("Accept-Encoding", "gzip, deflate, br");
map.put("Referer", "https://bing.ioliu.cn/ranking");
conn = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36") // http请求的浏览器设置
.timeout(5000) // http连接时长
.headers(map)
.method(Connection.Method.POST);
// 请求类型是get请求,http请求还是post,delete等方式
return conn;
}
public static void main(String[] args) {
Scanner s = new Scanner(System.in);
System.out.println("请输入页数:");
int pageNum = s.nextInt();
Main main = new Main();
Connection conn = main.getCollection(myURL + "?p=" + pageNum);
Document doc = null;
try {
doc = conn.get();
} catch (IOException e1) {
System.out.println("在连接官网时报错。。");
}
if (!main.fileIfExists(filePath)) {
System.out.println("文件夹不存在,创建了一个指定目录的文件夹...");
}
Element body = doc.body();
Elements lis = body.getElementsByClass("progressive__img progressive--not-loaded");
final Map<String, String> urlMap = new HashMap<>();
int i = 0;
for (Element a : lis) {
urlMap.put("url" + i, a.attr("data-progressive").replace("800x480", "1920x1080"));
i++;
}
ExecutorService pool = Executors.newCachedThreadPool(); //创建一个可缓存线程池,如果线程池长度超过处理需要,可灵活回收空闲线程,若无可回收,则新建线程。
for (int j = 0; j < urlMap.size(); j++) {
final int finalJ = j;
Runnable run = new Runnable() { // 使用多线程
public void run() {
String imgUrl = urlMap.get("url" + finalJ).toString();
try {
URL url = new URL(imgUrl);
URLConnection con = url.openConnection();
con.setConnectTimeout(5000);
con.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36");
InputStream inStream = con.getInputStream();
readInputStream(inStream, filePath + System.currentTimeMillis() + ".jpg");
System.out.println(imgUrl + " 下载成功");
} catch (Exception e) {
System.out.println(imgUrl + " 下载失败!!!");
}
}
};
pool.execute(run);
}
pool.shutdown();
}
}
jar 包 下载:
https://jsoup.org/packages/jsoup-1.11.3.jar
|