本帖最后由 songjing 于 2022-2-21 09:53 编辑
[md][md][md][md]所用到技术
- springboot
- mysql
- mybatisplus
- jsoup
/*
Navicat Premium Data Transfer
Source Server : 127.0.0.1
Source Server Type : MariaDB
Source Server Version : 100510
Source Host : 127.0.0.1:3306
Source Schema : country_city
Target Server Type : MariaDB
Target Server Version : 100510
File Encoding : 65001
Date: 19/02/2022 18:05:31
*/
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for area
-- ----------------------------
DROP TABLE IF EXISTS `area`;
CREATE TABLE `area` (
`id` int(11) NOT NULL COMMENT 'id',
`name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '地址名称',
`parent_id` int(11) NULL DEFAULT NULL COMMENT '属于那个城市',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;
java实现
package com.jsoup.jsoup;
import lombok.SneakyThrows;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
/*** Description:
* @author: whz
* @date: 2022/2/19 17:48
* @param:
* @return:*/
@RestController
public class CityController {
@Autowired
private AreaService areaService;
@SneakyThrows
@GetMapping("test")
private void city() {
//国家统计局地址
String url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/index.html";
//链接到目标地址
Connection connect = Jsoup.connect(url);
//设置useragent,设置超时时间,并以get请求方式请求服务器
Document document = null;
{
try {
document = connect.userAgent("Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)").
timeout(6000).ignoreContentType(true).get();
} catch (IOException e) {
e.printStackTrace();
}
}
//获取省份列表
ListIterator<Element> elements = document.getElementsByClass("provincetr").listIterator();
List areaList = new ArrayList<>();
int i = 0;
while (elements.hasNext()) {
Element tr = elements.next();
ListIterator<Element> tds = tr.getElementsByTag("td").listIterator();
while (tds.hasNext()) {
Area province = new Area();
i++;
Element td = tds.next();
province.setName(td.text());
province.setId(i);
province.setParent_id(0);
areaList.add(province);
Elements a1 = td.getElementsByTag("a");
url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/" + a1.attr("href");
connect = Jsoup.connect(url);
document = connect.
timeout(6000).ignoreContentType(true).get();
// 获取省份下得城市
ListIterator<Element> countys = document.getElementsByClass("citytr").listIterator();
while (countys.hasNext()) {
ListIterator<Element> couna = countys.next().getElementsByTag("td").listIterator();
Area county = new Area();
while (couna.hasNext()) {
Element a = couna.next();
i++;
county.setParent_id(province.getId());
county.setId(i);
county.setName(a.text().trim());
}
areaList.add(county);
}
}
}
areaService.saveBatch(areaList);
}
}
工作需要 所以要有个中国地图,但是不需要精确到县级和街道 所以只获取到了市级,记录一下
爬取网址是这个 http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/index.html
国家统计局的 肯定正确 好像是一年更新一次 2022得还没有出来 所以用的2021得
如果需要经纬度的话 可以看看这个大佬写的
java实现调用百度接口将大量数据库中保存的地址转换为经纬度
参考链接1
参考链接2
# **百目无她,百书智花,君当醒悟,建我中华**
gitee地址 https://gitee.com/wanghongzhan/jsoup.git |