爬取目标
https://h5.m.taobao.com/ocean/privatenode/shop.html?&sellerId=50852803
需要sellerId=50852803的50852803
获取数据地址
https://acs.m.taobao.com/h5/mtop.taobao.social.feed.aggregate/1.0/?appKey=12574478&t=1582778795899&sign=367a770e5a56cfaafc350da1da6b7d76&api=mtop.taobao.social.feed.aggregate&v=1.0&timeout=300000&timer=300000&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22params%22%3A%22%7B%5C%22nodeId%5C%22%3A%5C%22%5C%22%2C%5C%22sellerId%5C%22%3A%5C%2250852803%5C%22%7D%22%2C%22cursor%22%3A%221%22%2C%22pageNum%22%3A%221%22%2C%22pageId%22%3A5703%2C%22env%22%3A%221%22%7D
其中
t为当前时间戳
sign 为 (token + "&" + t + "&" + appKey + "&" + data) 这几个参数拼接后转成MD5
我们需要获取的就只有token,而token是服务器传过来的
所以伪造一次访问获取返回的token,然后再访问数据
pom.xml
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.11</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.62</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
TbBuyerShow.java
@Data
@NoArgsConstructor
public class TbBuyerShow {
private String sellerId; //店铺类别ID
private String title; //店铺名称
private String userName; //用户名称
private String userUrl; //用户链接
private String userTitle; //用户评论
private String imgId; //图片ID
private String imgUrl; //图片衔接
private String targetUrl; //图片来源
private Integer pageNum;
}
BuyerShowReptile.Java
public class BuyerShowReptile {
public static void main(String[] args) {
List<TbBuyerShow> reptile = reptile("50852803", 1, 20);
reptile.forEach(tbBuyerShow -> System.out.println(tbBuyerShow.getImgUrl()));
}
//ID,第几页,固定参数
public static List<TbBuyerShow> reptile(String sellerId, int index, int num) {
String url = "https://acs.m.taobao.com/h5/mtop.taobao.social.feed.aggregate/1.0/?";
String appKey = "12574478";
String t = String.valueOf(new Date().getTime());
String sign = "af1fde903d6e32e57aaf3377e6a68f3a";
String data = "{\"params\":" +
"\"{\\\"nodeId\\\":" +
"\\\"\\\",\\\"sellerId\\\":" +
"\\\"" + sellerId + "\\\",\\\"pagination\\\":" +
"{\\\"direction\\\":" +
"\\\"1\\\",\\\"hasMore\\\":" +
"\\\"true\\\",\\\"pageNum\\\":" +
"\\\"" + index + "\\\",\\\"pageSize\\\":" +
"\\\"" + num + "\\\"}}\",\"cursor\":" +
"\"" + index + "\",\"pageNum\":" +
"\"" + index + "\",\"pageId\":" +
"5703,\"env\":" +
"\"1\"}";
Params params = newParams(appKey, t, sign, data);
String str = htmlUrl(url, params);
String mh5tk = "";
String mh5tkenc = "";
String token = "";
String u;
CookieStore cookieStore = new BasicCookieStore();
CloseableHttpClient httpClient = HttpClientBuilder.create().setDefaultCookieStore(cookieStore).build();
HttpGet httpGet = new HttpGet(str);
CloseableHttpResponse response = null;
try {
response = httpClient.execute(httpGet);
List<Cookie> cookies = cookieStore.getCookies();
for (Cookie cookie : cookies) {
if ("_m_h5_tk".equals(cookie.getName())) {
mh5tk = cookie.getValue();
token = mh5tk.split("_")[0];
}
if ("_m_h5_tk_enc".equals(cookie.getName())) {
mh5tkenc = cookie.getValue();
}
}
u = token + "&" + params.getT() + "&" + appKey + "&" + data;
sign = DigestUtils.md5DigestAsHex(u.getBytes());
params = newParams(appKey, t, sign, data);
str = htmlUrl(url, params);
Cookie cookie = new BasicClientCookie("_m_h5_tk", mh5tk);
((BasicClientCookie) cookie).setAttribute("_m_h5_tk_enc", mh5tkenc);
cookieStore.addCookie(cookie);
httpClient = HttpClientBuilder.create().setDefaultCookieStore(cookieStore).build();
httpGet = new HttpGet(str);
response = httpClient.execute(httpGet);
HttpEntity entity = response.getEntity();
String conResult = EntityUtils.toString(entity, "UTF-8");
return newTbBuyerShow(conResult, sellerId, index);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (httpClient != null) {
httpClient.close();
}
if (response != null) {
response.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
static List<TbBuyerShow> newTbBuyerShow(String conResult, String sellerId, Integer index) {
List<TbBuyerShow> tbBuyerShows = new ArrayList<>();
String title = ""; //店铺名称
String userName = ""; //用户名称
String userUrl = ""; //用户链接
String userTitle = ""; //用户评论
String imgId; //图片ID
String imgUrl; //图片衔接
String targetUrl = ""; //图片来源
Integer pageNum = index; //页码
if (!StringUtils.isEmpty(conResult)) {
conResult = conResult.replace("mtopjsonp(", "");
conResult = conResult.replace(")", "");
JSONObject jsonObject = JSON.parseObject(conResult);
jsonObject = jsonObject.getJSONObject("data");
if (!StringUtils.isEmpty(jsonObject)) {
JSONObject header = jsonObject.getJSONObject("header");
if (!StringUtils.isEmpty(header)) {
title = (String) header.get("title");
}
JSONArray userList = jsonObject.getJSONArray("list");
if (!StringUtils.isEmpty(userList)) {
for (int i = 0; i < userList.size(); i++) {
JSONObject list = userList.getJSONObject(i);
JSONObject user = list.getJSONObject("user");
if (!StringUtils.isEmpty(user)) {
userName = (String) user.get("userNick");
userUrl = (String) user.get("userUrl");
}
if (!StringUtils.isEmpty(list.get("title"))) {
userTitle = (String) list.get("title");
}
if (!StringUtils.isEmpty(list.get("targetUrl"))) {
targetUrl = (String) list.get("targetUrl");
}
JSONArray picsList = list.getJSONArray("pics");
if (!StringUtils.isEmpty(picsList)) {
for (int j = 0; j < picsList.size(); j++) {
TbBuyerShow tbBuyerShow = new TbBuyerShow();
JSONObject pics = picsList.getJSONObject(j);
imgId = (String) pics.get("id");
imgUrl = (String) pics.get("path");
tbBuyerShow.setSellerId(sellerId);
tbBuyerShow.setTitle(title);
tbBuyerShow.setUserName(userName);
tbBuyerShow.setUserUrl(userUrl);
tbBuyerShow.setUserTitle(userTitle);
tbBuyerShow.setImgId(imgId);
tbBuyerShow.setImgUrl(imgUrl);
tbBuyerShow.setTargetUrl(targetUrl);
tbBuyerShow.setPageNum(pageNum);
tbBuyerShows.add(tbBuyerShow);
}
}
}
}
}
}
return tbBuyerShows;
}
static Params newParams(String appkey, String t, String sign, String data) {
Params params = new Params();
params.setAppKey(appkey);
params.setT(t);
params.setSign(sign);
params.setApi("mtop.taobao.social.feed.aggregate");
params.setV("1.0");
params.setTimeout("300000");
params.setTimer("300000");
params.setType("jsonp");
params.setDataType("jsonp");
params.setCallback("mtopjsonp");
params.setData(data);
return params;
}
/**
* * https://acs.m.taobao.com/h5/mtop.taobao.social.feed.aggregate/1.0/
* * ?appKey=12574478
* * &t=1581927984172
* * &sign=e83a3add7b5fc1b70b0601a2ccd133e9
* * &api=mtop.taobao.social.feed.aggregate
* * &v=1.0
* * &timeout=300000
* * &timer=300000
* * &type=jsonp
* * &dataType=jsonp
* * &callback=mtopjsonp1
* * &data=%7B%22params%22%3A%22%7B%5C%22nodeId%5C%22%3A%5C%22%5C%22%2C%5C%22sellerId%5C%22%3A%5C%22109043255%5C%22%7D%22%2C%22cursor%22%3A%221%22%2C%22pageNum%22%3A%221%22%2C%22pageId%22%3A5703%2C%22env%22%3A%221%22%7D
* *
*
* @param url
* @return
*/
static String htmlUrl(String url, Params params) {
StringBuffer buffer = new StringBuffer();
try {
buffer.append(url)
.append("appkey=" + URLEncoder.encode(params.getAppKey(), "utf-8"))
.append("&t=" + URLEncoder.encode(params.getT(), "utf-8"))
.append("&sign=" + URLEncoder.encode(params.getSign(), "utf-8"))
.append("&api=" + URLEncoder.encode(params.getApi(), "utf-8"))
.append("&v=" + URLEncoder.encode(params.getV(), "utf-8"))
.append("&timeout=" + URLEncoder.encode(params.getTimeout(), "utf-8"))
.append("&timer=" + URLEncoder.encode(params.getTimer(), "utf-8"))
.append("&type=" + URLEncoder.encode(params.getType(), "utf-8"))
.append("&dataType=" + URLEncoder.encode(params.getDataType(), "utf-8"))
.append("&callback=" + URLEncoder.encode(params.getCallback(), "utf-8"))
.append("&data=" + URLEncoder.encode(params.getData(), "utf-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return buffer.toString();
}
}