​
基于JAVA实现网络爬虫并且将实时数据存储到数据库中
打开网页http://www.weather.com.cn/​​
​
点击今天可查看详细天气信息
​
按F12查看网络接口,发现这个接口就是我们需要的信息了
http://d1.weather.com.cn/sk_2d/101110101.html?_=1646981448529
​
这个101110101是城市编码,可在city.js中查看各个城市的编码
https://j.i8tq.com/weather2020/search/city.js
​
调试接口:可直接复制当前接口到Postman或者Apifox中测试
​​​
Apifox:
​​
​
Postman:
​​
​​
​
测试接口可将多余的请求参数或者请求头删除,之后即可用Postman或者Apifox生成JAVAj代码
Postman:
​​
Apifox:
​​
​ ​
复制这些代码可直接放入项目中使用​[Java] 纯文本查看 复制代码 OkHttpClient client = new OkHttpClient().newBuilder()
.build();
Request request = new Request.Builder()
.url("http://d1.weather.com.cn/sk_2d/101110101.html")
.method("GET", null)
.addHeader("Referer", "http://www.weather.com.cn/")
.build();
Response response = client.newCall(request).execute();
创建数据库表
[SQL] 纯文本查看 复制代码 CREATE TABLE `weather` (
`id` varchar(36) NOT NULL,
`city_name` varchar(32) DEFAULT NULL COMMENT '城市名称',
`city` varchar(32) DEFAULT NULL COMMENT '城市编码',
`temp` int(11) DEFAULT NULL COMMENT '温度',
`tempf` int(11) DEFAULT NULL COMMENT '华氏度',
`wd` varchar(32) DEFAULT NULL COMMENT '风向',
`ws` varchar(32) DEFAULT NULL COMMENT '风力',
`wse` varchar(32) DEFAULT NULL COMMENT '风速',
`sd` varchar(32) DEFAULT NULL COMMENT '湿度',
`date_time` datetime DEFAULT NULL COMMENT '日期时间',
`weather` varchar(32) DEFAULT NULL COMMENT '天气',
`qy` varchar(32) DEFAULT NULL COMMENT '气压',
`njd` varchar(32) DEFAULT NULL COMMENT '能见度',
`rain` decimal(10,2) DEFAULT NULL COMMENT '降雨量',
`rain24h` decimal(10,2) DEFAULT NULL COMMENT '降雨量24小时',
`aqi` int(11) DEFAULT NULL COMMENT '空气质量',
`limitnumber` varchar(32) DEFAULT NULL COMMENT '限号',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
java业务代码
[Java] 纯文本查看 复制代码 /**
* 获取数据业务代码
*/
@SneakyThrows
@Scheduled(cron = "0 0/3 * * * ? ")//每3分钟执行一次
public void getWeather(){
OkHttpClient client = new OkHttpClient().newBuilder()
.build();
Request request = new Request.Builder()
.url("http://d1.weather.com.cn/sk_2d/101110101.html")
.method("GET", null)
.addHeader("Referer", "http://www.weather.com.cn/")
.build();
Response response = client.newCall(request).execute();
String body = response.body().string();
//去除多余字符串
body = body.substring(body.indexOf('{'));
JSONObject jsonObject = JSONObject.parseObject(body);
//创建实体类
Weather weather = new Weather();
weather.setCityName(jsonObject.getString("cityname"));
weather.setCity(jsonObject.getString("city"));
weather.setTemp(jsonObject.getInteger("temp"));
weather.setTempf(jsonObject.getInteger("tempf"));
weather.setWd(jsonObject.getString("WD"));
weather.setWs(jsonObject.getString("WS"));
weather.setWse(jsonObject.getString("wse"));
weather.setSd(jsonObject.getString("SD"));
weather.setQy(jsonObject.getString("qy"));
weather.setQy(jsonObject.getString("qy"));
weather.setNjd(jsonObject.getString("njd"));
weather.setDateTime(dateFormat(jsonObject.getString("date"),jsonObject.getString("time")));
weather.setRain(jsonObject.getBigDecimal("rain"));
weather.setRain24h(jsonObject.getBigDecimal("rain24h"));
weather.setAqi(jsonObject.getInteger("aqi"));
weather.setWeather(jsonObject.getString("weather"));
weather.setLimitnumber(jsonObject.getString("limitnumber"));
//查看库中是否有重复数据
QueryWrapper<Weather> queryWrapper = new QueryWrapper<>();
queryWrapper.eq("city",weather.getCity());
queryWrapper.eq("date_time",weather.getDateTime());
List<Weather> weathers = baseMapper.selectList(queryWrapper);
if(weathers.size()==0){
baseMapper.insert(weather);
}
}
/**
* 日期格式转换
* [url=home.php?mod=space&uid=952169]@Param[/url] dateStr 日期
* @param timeStr 时间
* @return
*/
@SneakyThrows
private Date dateFormat(String dateStr, String timeStr){
Calendar cal = Calendar.getInstance();
int year = cal.get(Calendar.YEAR);
String dateString = year+"年"+dateStr.substring(0,dateStr.indexOf("("))+timeStr;
DateFormat format = new SimpleDateFormat("yyyy年MM月dd日HH:mm");
Date date;
date = format.parse(dateString);
return date;
}
​
数据库中插入的数据,数据是以每5分钟一条更新。
​​ |