[原创]网站TDK扫描器，加IP显示，这不就是做站的救星么

小奥2014 · 发表于 2022-11-3 17:43

[Python] 纯文本查看 复制代码

# -*- coding:utf-8 -*-
import re
import os
import requests
import yagmail
import urllib3
import logging
from concurrent.futures import ThreadPoolExecutor
import time
import threading
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import xlwt
import xlrd
import socket
from xlutils.copy import copy


#获取状态码、标题
header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
    }


def get_ip(url):
    url = url.strip('\n').replace('http://','')
    myaddr = socket.getaddrinfo(url, 'http')
    return myaddr[0][4][0]

def get_codetitle(url):
    code = "无法访问"
    title = " "
    resurl = " "
    try:
        urllib3.disable_warnings()
        requests.adapters.DEFAULT_RETRIES = 5
        res = requests.get(url, headers=header, verify=False, allow_redirects=True, timeout=(3,12))
        res.encoding = res.apparent_encoding
        code = res.status_code
        title = re.findall(r"(?<=\<title\>)(?:.|\n)+?(?=\<)", res.text, re.IGNORECASE)[0].strip()
        description = re.findall(r"(?<=\<meta name=\"description\" content=\")(?:.|\n)+?(?=\" \/\>|\"\/\>)", res.text, re.IGNORECASE)[0].strip()
        keywords = re.findall(r"(?<=\<meta name=\"keywords\" content=\")(?:.|\n)+?(?=\" \/\>|\"\/\>)", res.text, re.IGNORECASE)[0].strip()
        resurl = res.url
    except Exception as error:
        print('%s网址无效或者IP被封锁'%(url))
    
    try:
        ip = get_ip(url)
    except:
        ip = 'null'

    return resurl,code,title,description,keywords,ip


def write(url):
    codetitle = get_codetitle(url)
    resurl=str(codetitle[0])
    code=str(codetitle[1])
    title=str(codetitle[2])
    description=str(codetitle[3])
    keywords=str(codetitle[4])
    ip=str(codetitle[5])
    print(url+ "|" +code+ "|" +title+  "|" +ip)
    with lock:
        word_book = xlrd.open_workbook(path+savefilename+'.xls')    
        sheets = word_book.sheet_names()
        work_sheet = word_book.sheet_by_name(sheets[0])
        old_rows = work_sheet.nrows
        heads = work_sheet.row_values(0)
        new_work_book = copy(word_book)
        new_sheet = new_work_book.get_sheet(0)
        i = old_rows
        new_sheet.write(i, 0, url)
        new_sheet.write(i, 1, resurl)
        new_sheet.write(i, 2, code)
        new_sheet.write(i, 3, title)
        new_sheet.write(i, 4, description)
        new_sheet.write(i, 5, keywords)
        new_sheet.write(i, 6, ip)
        new_work_book.save(path + savefilename+'.xls')    




n = 0
path = os.getcwd()
logging.captureWarnings(True)
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
start = time.time()
lock = threading.Lock()
savefilename = time.strftime("%Y-%m-%d %H.%M.%S")
myxls=xlwt.Workbook()
sheet1=myxls.add_sheet(u'title',cell_overwrite_ok=True)
sheet1.write(0,0,"源地址")
sheet1.write(0,1,"跳转地址")
sheet1.write(0,2,"状态码")
sheet1.write(0,3,"标题")
sheet1.write(0,4,"描述")
sheet1.write(0,5,"关键词")
sheet1.write(0,6,"IP")
myxls.save(path + savefilename+'.xls')

#url.txt中ip:port格式转换成http、https格式，保存到url-run.txt中

with open(path + "\\url.txt","r") as f:
    line = f.readlines()

with open(path + "\\url-run.txt","w") as f2:
    for i in line:
        i=i.strip('\n')
        if 'http://' not in i:       
            f2.write('http://'+i+'\n')
        else:
            f2.write(i+'\n')

#获取url列表
with open(path + '\\url-run.txt', 'r', encoding='utf-8') as f:
    urls_data = [data.strip().strip('\\') for data in f] 
#多线程
with ThreadPoolExecutor(max_workers=100) as executor:
    for urls in urls_data:
        executor.submit(
            write, url=urls
        )

end = time.time()
print("总耗时:",end - start,"秒")


# 发送邮箱提醒
try:
    yag = yagmail.SMTP(user="xxxxxx@163.com", password="密码你的", host='smtp.163.com')#smtp的邮箱和密码
    contents = ['TDK获取时间：%s'%(end - start)] #主要内容
    DDOSD_Sender = f'TDK获取完成通知' #标题
    receiver =["xxxx@qq.com"] # 接受的邮箱  多个邮箱
    yag.send(receiver, DDOSD_Sender, contents) #提交发送
    yag.close() #结束进程
except:
    print('smtp 同个时间发送超过10条或者过期')

纯干货，原创源码，使用方法也很简单，请将py文件和url.txt保存在一个文件夹即可，会生成xlsx的报告文件

这里提供成品软件下载：https://www.123pan.com/s/ePprVv-dPoJ

效果图：

速度很快，大概一分钟能跑几千个网站，准确率绝对没问题，不存在缓存尊重原创哈，感谢大家！喜欢多多支持FiimeROM的奥奥，我也会多给大家贡献工具的

小奥2014 · 发表于 2022-11-15 19:16

zzhxjh 发表于 2022-11-14 13:27
这个不错，如果再校对提示，就可以当做监控，防止被挂马，防止一些问题发生，也可以当做备份，出问题可以快 ...

我就是挂的宝塔每天自动跑一份按日期分类做TDK备份

小奥2014 · 发表于 2022-11-3 23:42

jokony 发表于 2022-11-3 20:39
可以做仿站吗？

不是不是你想错了就是拿来读title keywords和描述的也能读IP 站群用
对于做SEO上分的兄弟来说可以

隔壁家的王二狗 · 发表于 2022-11-3 17:45

这玩意有什么用看起来像是扫网站所有url的

小奥2014 · 发表于 2022-11-3 17:47

隔壁家的王二狗发表于 2022-11-3 17:45
这玩意有什么用看起来像是扫网站所有url的

对做站群的看TDK有没有中毒被改就挺有用的

隔壁家的王二狗 · 发表于 2022-11-3 17:48

小奥2014 发表于 2022-11-3 17:47
对做站群的看TDK有没有中毒被改就挺有用的

哈哈不太了解这个顶一下帖子！

小奥2014 · 发表于 2022-11-3 17:48

隔壁家的王二狗发表于 2022-11-3 17:48
哈哈不太了解这个顶一下帖子！

多做贡献才能优雅摸鱼我工作需要而已

dokuro · 发表于 2022-11-3 17:58

帮顶一下吧

开创者 · 发表于 2022-11-3 20:20

感谢发布原创作品，论坛因你更精彩！

jokony · 发表于 2022-11-3 20:39

可以做仿站吗？

li645944229 · 发表于 2022-11-4 12:51

站群是用来引流的吗

帐号		自动登录	找回密码
密码			注册[Register]

[Python 原创] [原创]网站TDK扫描器，加IP显示，这不就是做站的救星么

免费评分