代碼
import requests #導入第三方庫
from bs4 import BeautifulSoup as bs
import re
import os
import random
import json
import openpyxl
import time
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'cookie':'這里放上自己的cookie'
}
# 用來存放商品信息的列表
auctions = []
# 爬取的最大頁面數
maxPage = 10
for i in range(maxPage):
params = {
'q': '鼠標',
's': str(i*44),
}
response = requests.get('https://s.taobao.com/search', params=params, headers=headers)
# 拿到html的文本
content = response.text
# 拿到g_page_config的json字符串
g_page_config = re.findall(r'g_page_config = ({.*})', content)[0]
# 轉化成字典dict
g_page_config = json.loads(g_page_config)
# 拿到auctions,商品信息的描述列表
auctions.extend(g_page_config['mods']['itemlist']['data']['auctions'])
print(len(auctions))
time.sleep(4)
workbook = openpyxl.Workbook()
sheet = workbook.active
# 表頭
sheet.cell(1,1).value = '商品名稱'
sheet.cell(1,2).value = '價格'
sheet.cell(1,3).value = '銷量'
sheet.cell(1,4).value = '運費'
sheet.cell(1,5).value = '發貨地點'
sheet.cell(1,6).value = '店鋪名'
sheet.cell(1,7).value = '圖片url'
#寫入數據
for i in range(len(auctions)):
print(i)
sheet.cell(i+2, 1).value = auctions[i]['raw_title']
sheet.cell(i+2, 2).value = auctions[i]['view_price']
sheet.cell(i+2, 3).value = auctions[i]['view_sales']
sheet.cell(i+2, 4).value = auctions[i]['view_fee']
sheet.cell(i+2, 5).value = auctions[i]['item_loc']
sheet.cell(i+2, 6).value = auctions[i]['nick']
sheet.cell(i+2, 7).value = auctions[i]['pic_url']
workbook.save('taobao.xlsx')
結果
![]()