[Java] 纯文本查看 复制代码 package com.example.demo;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class MayiBugApplicationTests {
public static void main(String[] args) {
x();
}
public static void x() {
//输入浏览器的请求cookie,请自己更换
String cookieStr="cookie_member_token=TOKEN_MEMBER_f5099567-f8c8-47d1-a2ef-fb4dd378aef3; JSESSIONID=EE147E273ABD7B3A54D105552FB8F47C; Hm_lvt_eaa952b8610db6f155ab0febd442e89a=1555342970,1555345413,1555394706; Hm_lpvt_eaa952b8610db6f155ab0febd442e89a=1555394706";
String [] cookiestrs=cookieStr.split(";");
try {
Map<String, String> cookies = new HashMap<String, String>();
for (String str : cookiestrs) {
String[] strsplit=str.split("=");
cookies.put(strsplit[0].trim(),strsplit[1].trim());
}
String html=Jsoup.connect("http://www.mayikt.com/course/video/2350")
.cookies(cookies)
.header("Content-Type", "application/x-www-form-urlencoded")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8")
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36")
.post().html();
//处理html
Document document= Jsoup.parse(html);
//获取标题 标签
Elements lh_menu_second = document.getElementsByClass("lh-menu-second");
Elements a= lh_menu_second.select("a");
for (Element element : a) {
//a标签地址
String url="http://www.mayikt.com/"+element.attr("href");
//获取标题
String title=element.addClass("cou-tit-txt").text();
//请求url获取地址
String html2=Jsoup.connect(url)
.cookies(cookies)
.header("Content-Type", "application/x-www-form-urlencoded")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8")
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36")
.post().html();
//获取视频地址
String st=regMatch(html2, "player.updateSrc\\\\(\\\\[[^~]*\\\\]\\\\)");
try {
StringBuffer filehtml=new StringBuffer();
filehtml.append(title+"\r\n");
filehtml.append("原始地址:"+url+"\r\n");
filehtml.append("视频地址:"+st+"\r\n");
charOutStream(filehtml.toString());
} catch (Exception e) {
e.printStackTrace();
}
}
System.out.println("voer");
} catch (IOException e) {
e.printStackTrace();
}
}
public static String regMatch(String withinText, String regString) {
String regex1 = "player.updateSrc\\(\\[[^~]*\\]\\)";
Matcher m1 = Pattern.compile(regex1).matcher(withinText);
if (m1.find()) {
return withinText.substring(m1.start(), m1.end());
}else {
return "";
}
}
public static void charOutStream(String html){
// 1:利用File类找到要操作的对象
File file = new File("D:" + File.separator + "demo" + File.separator + "test.txt");
if(!file.getParentFile().exists()){
file.getParentFile().mkdirs();
}
FileWriter fw = null;
try {
fw = new FileWriter(file, true);
//2:准备输出流
PrintWriter pw = new PrintWriter (fw);
pw.println(html);
pw.flush();
fw.flush();
pw.close();
fw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
蚂蚁视频抓取
抓取样子
pc播放:vlc播放器
请给我来点赞👍 谢谢 |