pdf轉word(以圖片形式)
import fitz # PyMuPDF
from docx import Document
from docx.shared import Inches
import io
def pdf_to_word_screenshots(pdf_path, zoom=10):
"""
使用PyMuPDF將PDF轉換為Word中的圖片
參數:
pdf_path: 輸入的PDF文件路徑
zoom: 縮放因子(默認2,提高分辨率)
"""
doc = Document()
pdf_doc = fitz.open(pdf_path)
for page_num in range(len(pdf_doc)):
page = pdf_doc.load_page(page_num)
mat = fitz.Matrix(zoom, zoom) # 提高分辨率
pix = page.get_pixmap(matrix = mat)
img_bytes = pix.tobytes("png")
# 將圖片添加到Word
img_io = io.BytesIO(img_bytes)
doc.add_picture(img_io, width = Inches(6))
doc.add_page_break()
word_path = pdf_path + ".docx"
doc.save(word_path)
print(f"Word文檔已保存到: {word_path}")
# 安裝PyMuPDF: pip install pymupdf
pdf_to_word_screenshots(r"C:\Users\12997\Desktop\xxxPDF.pdf")
############################################################################ 一下是將pdf轉為png保存
import fitz # PyMuPDF
import os
def pdf_to_png(pdf_path, output_folder, zoom=2):
"""
將PDF每一頁保存為PNG圖片
參數:
pdf_path: 輸入的PDF文件路徑
output_folder: 輸出圖片的文件夾路徑
zoom: 縮放因子(默認2,提高分辨率)
"""
# 創建輸出文件夾(如果不存在)
# os.makedirs(output_folder, exist_ok = True)
# 打開PDF文件
pdf_doc = fitz.open(pdf_path)
for page_num in range(len(pdf_doc)):
page = pdf_doc.load_page(page_num)
mat = fitz.Matrix(zoom, zoom) # 設置分辨率
# 獲取頁面像素圖
pix = page.get_pixmap(matrix = mat)
# 構建輸出文件名
output_path = os.path.join(output_folder, f"page_{page_num + 1}.png")
# 保存為PNG
pix.save(output_path)
print(f"已保存: {output_path}")
# 使用示例
pdf_path = r"C:\Users\xxx\Desktop\PDF.pdf"
output_folder = r"C:\Users\xxx\Desktop"
pdf_to_png(pdf_path, output_folder, zoom = 4)
本文來自博客園,作者:{archer},轉載請注明原文鏈接:http://www.rzrgm.cn/archer-mowei/p/18925614

浙公網安備 33010602011771號