[原创]网站TDK扫描器,加IP显示,这不就是做站的救星么
# -*- coding:utf-8 -*-import re
import os
import requests
import yagmail
import urllib3
import logging
from concurrent.futures import ThreadPoolExecutor
import time
import threading
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import xlwt
import xlrd
import socket
from xlutils.copy import copy
#获取状态码、标题
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
}
def get_ip(url):
url = url.strip('\n').replace('http://','')
myaddr = socket.getaddrinfo(url, 'http')
return myaddr
def get_codetitle(url):
code = "无法访问"
title = " "
resurl = " "
try:
urllib3.disable_warnings()
requests.adapters.DEFAULT_RETRIES = 5
res = requests.get(url, headers=header, verify=False, allow_redirects=True, timeout=(3,12))
res.encoding = res.apparent_encoding
code = res.status_code
title = re.findall(r"(?<=\<title\>)(?:.|\n)+?(?=\<)", res.text, re.IGNORECASE).strip()
description = re.findall(r"(?<=\<meta name=\"description\" content=\")(?:.|\n)+?(?=\" \/\>|\"\/\>)", res.text, re.IGNORECASE).strip()
keywords = re.findall(r"(?<=\<meta name=\"keywords\" content=\")(?:.|\n)+?(?=\" \/\>|\"\/\>)", res.text, re.IGNORECASE).strip()
resurl = res.url
except Exception as error:
print('%s网址无效或者IP被封锁'%(url))
try:
ip = get_ip(url)
except:
ip = 'null'
return resurl,code,title,description,keywords,ip
def write(url):
codetitle = get_codetitle(url)
resurl=str(codetitle)
code=str(codetitle)
title=str(codetitle)
description=str(codetitle)
keywords=str(codetitle)
ip=str(codetitle)
print(url+ "|" +code+ "|" +title+"|" +ip)
with lock:
word_book = xlrd.open_workbook(path+savefilename+'.xls')
sheets = word_book.sheet_names()
work_sheet = word_book.sheet_by_name(sheets)
old_rows = work_sheet.nrows
heads = work_sheet.row_values(0)
new_work_book = copy(word_book)
new_sheet = new_work_book.get_sheet(0)
i = old_rows
new_sheet.write(i, 0, url)
new_sheet.write(i, 1, resurl)
new_sheet.write(i, 2, code)
new_sheet.write(i, 3, title)
new_sheet.write(i, 4, description)
new_sheet.write(i, 5, keywords)
new_sheet.write(i, 6, ip)
new_work_book.save(path + savefilename+'.xls')
n = 0
path = os.getcwd()
logging.captureWarnings(True)
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
start = time.time()
lock = threading.Lock()
savefilename = time.strftime("%Y-%m-%d %H.%M.%S")
myxls=xlwt.Workbook()
sheet1=myxls.add_sheet(u'title',cell_overwrite_ok=True)
sheet1.write(0,0,"源地址")
sheet1.write(0,1,"跳转地址")
sheet1.write(0,2,"状态码")
sheet1.write(0,3,"标题")
sheet1.write(0,4,"描述")
sheet1.write(0,5,"关键词")
sheet1.write(0,6,"IP")
myxls.save(path + savefilename+'.xls')
#url.txt中ip:port格式转换成http、https格式,保存到url-run.txt中
with open(path + "\\url.txt","r") as f:
line = f.readlines()
with open(path + "\\url-run.txt","w") as f2:
for i in line:
i=i.strip('\n')
if 'http://' not in i:
f2.write('http://'+i+'\n')
else:
f2.write(i+'\n')
#获取url列表
with open(path + '\\url-run.txt', 'r', encoding='utf-8') as f:
urls_data =
#多线程
with ThreadPoolExecutor(max_workers=100) as executor:
for urls in urls_data:
executor.submit(
write, url=urls
)
end = time.time()
print("总耗时:",end - start,"秒")
# 发送邮箱提醒
try:
yag = yagmail.SMTP(user="xxxxxx@163.com", password="密码你的", host='smtp.163.com')#smtp的邮箱和密码
contents = ['TDK获取时间:%s'%(end - start)] #主要内容
DDOSD_Sender = f'TDK获取完成通知' #标题
receiver =["xxxx@qq.com"] # 接受的邮箱多个邮箱
yag.send(receiver, DDOSD_Sender, contents) #提交发送
yag.close() #结束进程
except:
print('smtp 同个时间发送超过10条或者过期')
纯干货,原创源码,使用方法也很简单,请将py文件和url.txt保存在一个文件夹即可,会生成xlsx的报告文件
https://blog.fiime.cn/data/image/2022/11/03/34563_zay4_1055.png
这里提供成品软件下载:https://www.123pan.com/s/ePprVv-dPoJ
效果图:
https://blog.fiime.cn/data/image/2022/11/03/34669_c3mp_6940.png
https://blog.fiime.cn/data/image/2022/11/03/34682_lyop_8872.png
https://blog.fiime.cn/data/image/2022/11/03/34695_nmfk_8711.png
https://blog.fiime.cn/data/image/2022/11/03/34769_nur9_3645.png
速度很快,大概一分钟能跑几千个网站,准确率绝对没问题,不存在缓存尊重原创哈,感谢大家!喜欢多多支持FiimeROM的奥奥,我也会多给大家贡献工具的 zzhxjh 发表于 2022-11-14 13:27
这个不错,如果再校对提示,就可以当做监控,防止被挂马,防止一些问题发生,也可以当做备份,出问题可以快 ...
我就是挂的宝塔 每天自动跑一份 按日期分类 做TDK备份 jokony 发表于 2022-11-3 20:39
可以做仿站吗?
不是不是 你想错了 就是拿来读title keywords和描述的 也能读IP 站群用
对于做SEO上分的兄弟来说可以 这玩意有什么用 看起来像是扫网站所有url的 隔壁家的王二狗 发表于 2022-11-3 17:45
这玩意有什么用 看起来像是扫网站所有url的
对 做站群的 看TDK有没有中毒被改 就挺有用的 小奥2014 发表于 2022-11-3 17:47
对 做站群的 看TDK有没有中毒被改 就挺有用的
哈哈不太了解 这个 顶一下帖子! 隔壁家的王二狗 发表于 2022-11-3 17:48
哈哈不太了解 这个 顶一下帖子!
:lol 多做贡献 才能优雅摸鱼 我工作需要而已 帮顶一下吧 感谢发布原创作品,论坛因你更精彩! 可以做仿站吗? 站群是用来引流的吗