一般网站登录的时候会有验证码的问题,下面是将验证码下载到本地,手动输入模拟登录。在请求的时候使用会话是为了保证获取的验证码、表单令牌等数据一致
import requestsfrom bs4 import BeautifulSoupheaders = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36'' (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}def download_code(s, url):resp = s.get(url, headers=headers)text_code = resp.encodingtext = resp.content.decode(text_code, 'ignore')html = BeautifulSoup(text, 'lxml')# 获取图片链接img_src = 'https://so.gushiwen.org' + html.find(id='imgCode')['src']# 下载图片img = s.get(img_src, headers=headers)img_text = img.contentwith open("yan.jpg", 'wb') as f:f.write(img_text)def get_vic(s):url = "https://so.gushiwen.org/user/login.aspx"# 获取表单登录令牌resp = s.get(url, headers&#