import time
from selenium import webdriver
from lxml import etreedef Run(keywords, numPage):# 构造无头的ChromeDriveroption = webdriver.ChromeOptions()option.add_argument('headless')driver = webdriver.Chrome(executable_path='D:\\Project\\Pycharm\\Driver\\chromedriver.exe', chrome_options=option)# 存放数据list = []for i in range(numPage):url = 'https://uland.taobao.com/sem/tbsearch?refpid=mm_26632258_3504122_32538762&keyword=' + keywords + '&clk1=4ccf3c1994a6f9cc579ec534e8b1bec2&upsId=4ccf3c1994a6f9cc579ec534e8b1bec2&spm=a2e0b.20350158.31919782.1&pid=mm_26632258_3504122_32538762&union_lens=recoveryid%3A201_11.27.9.205_6368855_1626600603206%3Bprepvid%3A201_11.175.82.69_6378466_1626601143040&pnum=' + str(i)# ChromeDriver访问指定URLdriver.get(url)# 等待ChromeDriver打开URL页面time.sleep(3)# ChromeDriver解析指定URL页面html = driver.page_source# 构造etreee = etree.HTML(html)for j in range(1, 61):productName = e.xpath('/html/body/div[2]/div[1]/div[2]/ul/li[' + str(j) + ']/a/div[1]/span/text()')salesVolume = e.xpath('/html/body/div[2]/div[1]/div[2]/ul/li[' + str(j) + ']/a/div[4]/div[2]/text()')shopName = e.xpath('/html/body/div[2]/div[1]/div[2]/ul/li[' + str(j) + ']/a/div[3]/div/text()')price = e.xpath('/html/body/div[2]/div[1]/div[2]/ul/li[' + str(j) + ']/a/div[2]/span[2]/text()')list_tmp = shopName[0], productName[0], price[0], salesVolume[0]list.append(list_tmp)return listif __name__ == '__main__':# 搜索的关键词keywords = "遮阳伞女"# 设置翻页数量numPage = 2data = Run(keywords, numPage)for i in range(len(data)):print(data[i])
默认你的selenium已经适配好ChromeDriver了
内容格式如下: