python實現網頁轉為pdf
簡單記錄一下,避免以后將代碼丟失
from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait # pdf下載 def save_webpage_as_pdf(url, output_path="webpage.pdf"): chrome_options = Options() chrome_options.add_argument("--headless=new") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") driver = webdriver.Chrome(options=chrome_options) driver.get(url) try: WebDriverWait(driver, 15).until( EC.presence_of_element_located((By.ID, "js_content")) ) except Exception as e: print("加載內容超時或發生錯誤:", e) # 1. 取消懶加載 driver.execute_script(""" const images = document.querySelectorAll('img'); images.forEach(img => { if(img.hasAttribute('data-src')) { img.src = img.getAttribute('data-src'); } if(img.hasAttribute('data-srcset')) { img.srcset = img.getAttribute('data-srcset'); } img.loading = 'eager'; // 強制立即加載 }); """) time.sleep(2) # 適當等待 # 2. 模擬滾動,強制觸發懶加載機制 scroll_height = driver.execute_script("return document.body.scrollHeight") for i in range(0, scroll_height, 300): driver.execute_script(f"window.scrollTo(0, {i});") time.sleep(0.2) # 給圖片加載時間 # 滾到頁面底部 driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(3) # 等待最后圖片加載 # 3. 確認圖片是否真的加載(所有圖片都必須完成) images_not_loaded = driver.execute_script(""" const imgs = Array.from(document.images); return imgs.filter(img => !img.complete || img.naturalHeight === 0).length; """) if images_not_loaded > 0: print(f"有 {images_not_loaded} 張圖片仍未加載,等待3秒重試...") time.sleep(3) # 再等等 else: print("所有圖片已加載完畢。") # 4. 計算PDF高度 paper_height = driver.execute_script("return document.body.scrollHeight / 96") result = driver.execute_cdp_cmd("Page.printToPDF", { "printBackground": True, "paperWidth": 8.27, "paperHeight": paper_height, "marginTop": 0, "marginBottom": 0, "marginLeft": 0, "marginRight": 0, }) pdf_data = base64.b64decode(result['data']) with open(output_path, "wb") as f: f.write(pdf_data) print(f"網頁已保存為 PDF:{output_path}") driver.quit()

浙公網安備 33010602011771號