go房价爬取,土方法
package mainimport (
"fmt"
"github.com/PuerkitoBio/goquery"
"net/http"
"strconv"
"strings"
"time"
)
var url string = "https://cd.lianjia.com/ershoufang/"
// 区域
var chenghua string = "chenghua"
var Cookie string = ""
var UserAgent string = ""
func main() {
var newUrl string
for i := 1; i < 2; i++ { // 页数
newUrl = url + chenghua + "/pg" + strconv.Itoa(i)
listUrl := GetListUrl(newUrl)
fmt.Println("======================")
for _, v := range listUrl { //遍历每个详情页面的信息
fmt.Println("+++++++++++++++++++++++++")
HouseInfo(v)
}
fmt.Println("======================")
}
}
func HouseInfo(url string) {
client := &http.Client{}
m := make(map[string]string)
// 提交请求
request, err := http.NewRequest("GET", url, nil)
// 增加header
request.Header.Add("Cookie", Cookie)
request.Header.Add("User-Agent", UserAgent)
if err != nil {
panic(err)
}
// 处理结果
response, _ := client.Do(request)
time.Sleep(time.Second * 5)
if response.StatusCode != 200 {
fmt.Println("请求失败,返回StatusCode值为:", response.StatusCode)
}
defer response.Body.Close()
doc, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
panic(err)
}
// 价格地理信息
var area = ""
doc.Find("#introduction div.base div.content li:nth-child(3)").Each(func(i int, s *goquery.Selection) {
area = s.Text()
})
area = area[12:]
index := strings.LastIndex(area, "㎡")
m["建筑面积"] = area[:index]
var price = ""
doc.Find("div.price span.total").Each(func(i int, s *goquery.Selection) {
price = s.Text()
})
areaInt, err := strconv.ParseFloat(area[:index], 64)
priceInt, err := strconv.ParseFloat(price, 64)
fmt.Println("建筑面积㎡:" ,areaInt)
fmt.Println("总价:" ,priceInt)
if areaInt < 80 || priceInt > 120 {
return
}
fmt.Println("地址链接:",url)
var single = ""
doc.Find("div.price div.unitPrice span.unitPriceValue").Each(func(i int, s *goquery.Selection) {
single = s.Text()
})
fmt.Println("单价:" + single)
var subInfo = ""
doc.Find("div.houseInfo div.subInfo").Each(func(i int, s *goquery.Selection) {
subInfo = s.Text()
})
fmt.Println("修建时间:" + subInfo)
var hx = ""
doc.Find("#introduction div.base div.content li:nth-child(1)").Each(func(i int, s *goquery.Selection) {
hx = s.Text()
})
fmt.Println("房屋户型:" + hx[12:])
m["房屋户型"] = hx[12:]
var lc = ""
doc.Find("#introduction div.base div.content li:nth-child(2)").Each(func(i int, s *goquery.Selection) {
lc = s.Text()
})
fmt.Println("所在楼层:" + lc[12:])
m["所在楼层"] = lc[12:]
var hxjg = ""
doc.Find("#introduction div.base div.content li:nth-child(4)").Each(func(i int, s *goquery.Selection) {
hxjg = s.Text()
})
fmt.Println("户型结构:" + hxjg[12:])
m["户型结构"] = hxjg[12:]
var tnmj = ""
doc.Find("#introduction div.base div.content li:nth-child(5)").Each(func(i int, s *goquery.Selection) {
tnmj = s.Text()
})
fmt.Println("套内面积:" + tnmj[12:])
m["套内面积"] = tnmj[12:]
var jzlx = ""
doc.Find("#introduction div.base div.content li:nth-child(6)").Each(func(i int, s *goquery.Selection) {
jzlx = s.Text()
})
fmt.Println("建筑类型:" + jzlx[12:])
m["建筑类型"] = jzlx[12:]
var fwcx = ""
doc.Find("#introduction div.base div.content li:nth-child(7)").Each(func(i int, s *goquery.Selection) {
fwcx = s.Text()
})
fmt.Println("房屋朝向:" + fwcx[12:])
m["房屋朝向"] = fwcx[12:]
var jzjg = ""
doc.Find("#introduction div.base div.content li:nth-child(8)").Each(func(i int, s *goquery.Selection) {
jzjg = s.Text()
})
fmt.Println("建筑结构:" + jzjg[12:])
m["建筑结构"] = jzjg[12:]
var zx = ""
doc.Find("#introduction div.base div.content li:nth-child(9)").Each(func(i int, s *goquery.Selection) {
zx = s.Text()
})
fmt.Println("装修情况:" + zx[12:])
m["装修情况"] = zx[12:]
var thbl = ""
doc.Find("#introduction div.base div.content li:nth-child(10)").Each(func(i int, s *goquery.Selection) {
thbl = s.Text()
})
fmt.Println("梯户比例:" + thbl[12:])
m["梯户比例"] = thbl[12:]
var dt = ""
doc.Find("#introduction div.base div.content li:nth-child(11)").Each(func(i int, s *goquery.Selection) {
dt = s.Text()
})
fmt.Println("配备电梯:" + dt)
fmt.Println("配备电梯:" + dt[12:])
m["配备电梯"] = dt[12:]
var gpsj = ""
doc.Find("#introduction div.transaction div.content li:nth-child(1) span:nth-child(2) ").Each(func(i int, s *goquery.Selection) {
gpsj = s.Text()
})
fmt.Println("挂牌时间:" + gpsj)
m["挂牌时间"] = gpsj
var jysx = ""
doc.Find("#introduction div.transaction div.content li:nth-child(2) span:nth-child(2) ").Each(func(i int, s *goquery.Selection) {
jysx = s.Text()
})
fmt.Println("交易权属:" + jysx)
m["交易权属"] = jysx
var scjy = ""
doc.Find("#introduction div.transaction div.content li:nth-child(3) span:nth-child(2) ").Each(func(i int, s *goquery.Selection) {
scjy = s.Text()
})
fmt.Println("上次交易:" + scjy)
m["上次交易"] = scjy
var fwsx = ""
doc.Find("#introduction div.transaction div.content li:nth-child(4) span:nth-child(2) ").Each(func(i int, s *goquery.Selection) {
fwsx = s.Text()
})
fmt.Println("房屋用途:" + fwsx)
m["房屋用途"] = fwsx
var fwnx = ""
doc.Find("#introduction div.transaction div.content li:nth-child(5) span:nth-child(2) ").Each(func(i int, s *goquery.Selection) {
fwnx = s.Text()
})
fmt.Println("房屋年限:" + fwnx)
m["房屋年限"] = fwnx
var cqss = ""
doc.Find("#introduction div.transaction div.content li:nth-child(6) span:nth-child(2) ").Each(func(i int, s *goquery.Selection) {
cqss = s.Text()
})
fmt.Println("产权所属:" + cqss)
m["产权所属"] = cqss
var dyxx = ""
doc.Find("#introduction div.transaction div.content li:nth-child(7) span").Each(func(i int, s *goquery.Selection) {
dyxx, _ = s.Attr("title")
})
fmt.Println("抵押信息:" + dyxx)
m["抵押信息"] = dyxx
}
// 解析每个房屋的详细信息url
func GetListUrl(url string) []string {
client := &http.Client{}
// 提交请求
request, err := http.NewRequest("GET", url, nil)
// 增加header
request.Header.Add("Cookie", Cookie)
request.Header.Add("User-Agent", UserAgent)
if err != nil {
panic(err)
}
// 处理结果
response, _ := client.Do(request)
if response.StatusCode != 200 {
fmt.Println("请求失败,返回StatusCode值为:", response.StatusCode)
}
defer response.Body.Close()
doc, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
panic(err)
}
var urls []string
doc.Find("#content uldiv.title a").Each(func(i int, s *goquery.Selection) {
herf, _ := s.Attr("href")
urls = append(urls, herf)
})
return urls
}
大佬,问下这个要怎么才能运行 这个怎么运行? golang天下第一
页:
[1]