吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 3782|回复: 8
收起左侧

[C&C++ 原创] C语言版爬虫(爬取大乐透往期全部中奖号码)

[复制链接]
xiaochou56 发表于 2021-9-8 14:30
C语言
编译及运行环境:Ubuntu16.04 64位
依赖:libcurl.so.4.6.0 libgumbo.so.1.0.0
运行示例:

运行截图

运行截图

爬取数据展示:
1631082430(1).jpg
附代码:
[C] 纯文本查看 复制代码
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <curl/curl.h>
#include <include/gumbo.h>

#define DLT_FILE_PATH "dlt.txt"

#define CLOSE   printf("\033[0m");
#define RED     printf("\033[31m");
#define GREEN   printf("\033[36m");
#define YELLOW  printf("\033[33m");
#define BLUE    printf("\033[34m");

typedef struct {
  uint8_t *memory;
  size_t size;
} MyResp;

char url[256] = "";

char *getURLStr(int32_t start, int32_t end){
    snprintf(url, 256, "http://datachart.500.com/dlt/history/newinc/history.php?start=%d&end=%d&sort=1", start, end);
    return url;
}

static size_t writeMemFunc(void *contents, size_t size, size_t num, void *userData) {

	size_t realsize = size * num;
	MyResp *mem = (MyResp *)userData;
 
	uint8_t *ptr = realloc(mem->memory, mem->size + realsize + 1);
	if(!ptr) {
	    /* out of memory! */
    	printf("Not enough memory (realloc returned NULL).\n");
    	return 0;
	}
 
  	mem->memory = ptr;
  	memcpy(&(mem->memory[mem->size]), contents, realsize);
  	mem->size += realsize;
  	mem->memory[mem->size] = 0;
 
  	return realsize;
}

void print_data(GumboNode *node,GumboAttribute *attr)
{
    GumboNode *ip=(GumboNode *)(&node->v.element.children)->data[0];

    char data[256] = "";

    if(strcmp(attr->value,"t_tr1") == 0){
        if(ip->type == GUMBO_NODE_TEXT) {
            if(strstr(ip->v.text.text, "-"))
                sprintf(data, "| %s\n", ip->v.text.text);
        }
    }else if(strcmp(attr->value,"cfont2") == 0){
        if(ip->type == GUMBO_NODE_TEXT && strlen(ip->v.text.text) == 2)
            sprintf(data, "%s ",ip->v.text.text);
    }
    else if(strcmp(attr->value,"cfont4") == 0){
        if(ip->type == GUMBO_NODE_TEXT)
            sprintf(data, "%s ",ip->v.text.text);
    }
    FILE *file = NULL;
    file = fopen(DLT_FILE_PATH, "a+");
    if (file == NULL) return;
    fprintf(file, "%s", data);
    fclose(file);
}

void get_data(GumboNode *node, GumboTag tag)
{
    if(node->type != GUMBO_NODE_ELEMENT) return;
    GumboAttribute *attr;
    if(attr=gumbo_get_attribute(&node->v.element.attributes,"class"))
        print_data(node, attr);

    GumboVector *children = &node->v.element.children;

    if(node->v.element.tag == GUMBO_TAG_DT)
        for(int i=0; i < children->length; ++i)
            get_data(children->data[i],GUMBO_TAG_DD);
    for(int i=0; i < children->length; ++i)
        get_data(children->data[i], GUMBO_TAG_DT);
}

int32_t parseResponse(uint8_t *resp, uint32_t resLen)
{
    GumboOutput *output;
    uint32_t len = resLen;

    char *ustr = (char *)malloc(len);
    if(ustr == NULL) return -1;
    memset(ustr, 0, len);
    memcpy(ustr, resp, len);
    output = gumbo_parse(ustr);
    get_data(output->root, GUMBO_TAG_DT);
    gumbo_destroy_output(&kGumboDefaultOptions, output);
    free(ustr);
    ustr = NULL;

    return 0;
}

int32_t httpRequest(int32_t start, int32_t end)
{
    CURL *curl = curl_easy_init();
    if(curl == NULL){
        printf("curl init failed.\n");
        return -1;
    }

    struct curl_slist *headers = curl_slist_append(NULL, "User-Agent:Linux");
    curl_slist_append(headers, "Connection:keep-alive");
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);

    MyResp resBuff;
    memset(&resBuff, 0, sizeof(MyResp));
    curl_easy_setopt(curl, CURLOPT_URL, getURLStr(start, end));
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeMemFunc);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resBuff);

    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
    CURLcode ret;
    ret = curl_easy_perform(curl);
    curl_slist_free_all(headers);
    if (ret != CURLE_OK) {
        free(resBuff.memory);
        curl_easy_cleanup(curl);
        printf("curl_easy_perform failed.Ret:%d", ret);
        return -1;
    }

    long info;
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &info);
    curl_easy_cleanup(curl);
    RED
    printf("RESPONSE_CODE: %ld RESP_SIZE: %lu\n", info, resBuff.size);
    CLOSE
    ret = parseResponse(resBuff.memory, resBuff.size);
    free(resBuff.memory);

    return 0;
}

int main(int argc, char *argv[])
{
    remove(DLT_FILE_PATH);

    httpRequest(7001, 70001);

    return 0;
}

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

whathell 发表于 2021-9-8 16:19
都是人工控制的,
wanghao6912 发表于 2021-9-8 16:42
刘伟坤 发表于 2021-9-8 17:03
mckof 发表于 2021-9-8 17:10
当做学习交流就好了,除非兄台找到必胜的办法
吾爱福利 发表于 2021-9-8 17:13
哈哈哈 二等奖以下的小奖适当放一点
alien0774 发表于 2021-9-8 17:14
再来个大数据选号
SomerHalder 发表于 2021-9-8 17:38
吾爱福利 发表于 2021-9-8 17:13
哈哈哈 二等奖以下的小奖适当放一点

放也不是普通人三天两头能碰到的 太真实了哈哈哈哈
转一圈 发表于 2021-10-20 09:47
感谢分享
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-11-25 11:34

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表