因为项目需要,长时间使用360的quake和奇安信的hunter进行资产收集。但是收集到的资产很多无法访问,hunter的都是200。所以写了一个简单的脚本,让quake和hunter进行对比,去掉重复部分,然后再探测存活,存活url导入新的文本。
# -*- couding: utf-8 -*-from modulefinder import STORE_GLOBAL
import os
from tkinter import E
import requestsdef qc(url1,url2):#读取奇安信的文件内容for i in url1.readlines():str1.append(i.replace("\n",""))#读取360的文件内容for j in url2.readlines():str2.append(j.replace("\n",""))#取出重复的内容url_re = [] #定义重复的数组for line in str2:if line in str1:url_re.append(line)#str_all = set(str1 + str2) #将两个文件合并#for i in re:#if i in str_all:#str_all.remove(i) #去掉重复的部分#去除360.txt重复的部分print("============开始去重================")for g in url_re:if g in str2:print("重复的url为:" + g)str2.remove(g)#重新写入文件#for str in str2:# print(str)# with open("360_quChong.txt","a+",encoding="utf-8") as f:# f.write(str + '\n')url_wancheng = str2return url_wanchengdef url_scan(urls):headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0'}url_save = open("360_OK.txt",'w')num = 0s1 = 0for url in urls:num += 1try:head = requests.get(url,headers = headers,timeout = 5 ).status_codeif head == 200:s1 += 1print("\033[0;31;40m目标存活:\033[0m" + url)url_save.write(url + '\n')except:print("\033[0;32;40m访问失败:\033[0m") print("去重后共有url:%d 存活:%d" %(num,s1))if __name__ == "__main__":hunter = "qianxin.txt" quake = "360.txt"#定义两个数组分别装两个文件的urlstr1 = []str2 = []url1 = open(hunter,'r')url2 = open(quake,'r')url_list = qc(url1,url2)print("=============开始探测存活===============")url_scan(url_list)print("\033[0;32;40m=====探测结束,存活已导入360_OK.txt=====\033[0m")
代码很简单,只探测了200的返回值。有不足的地方还望各位大佬们指正。