以下代码用于将word转换为jpg图片,默认为A4纸尺寸
import os
import tempfile
from PIL import Image
import comtypes.client
import fitz # PyMuPDF库,用于PDF处理
def word_to_images(input_file, output_folder=None, dpi=300):
"""
将Word文档转换为图片,保持原始尺寸。
参数:
input_file (str): Word文档路径
output_folder (str, optional): 输出图片文件夹路径,默认为None(创建临时文件夹)
dpi (int, optional): 图片DPI,默认为300
返回:
list: 生成的图片路径列表
"""
# 创建输出文件夹
if output_folder is None:
output_folder = tempfile.mkdtemp()
os.makedirs(output_folder, exist_ok=True)
# 获取文件名(不带扩展名)
base_name = os.path.splitext(os.path.basename(input_file))[0]
try:
# 第一步:将Word转换为PDF
pdf_path = os.path.join(tempfile.gettempdir(), f"{base_name}.pdf")
# 创建Word应用实例
word = comtypes.client.CreateObject('Word.Application')
word.Visible = False
# 打开文档
doc = word.Documents.Open(os.path.abspath(input_file))
# 保存为PDF
doc.SaveAs(pdf_path, FileFormat=17) # 17代表PDF格式
# 关闭文档和应用
doc.Close()
word.Quit()
# 第二步:从PDF生成图片
image_paths = []
# 打开PDF
pdf_document = fitz.open(pdf_path)
# 处理每一页
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
pix = page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72)) # 设置DPI
# 创建图片路径
image_path = os.path.join(output_folder, f"{base_name}_page{page_num+1}.png")
# 保存图片
pix.save(image_path)
image_paths.append(image_path)
print(f"已生成图片: {image_path}")
# 检查图片尺寸是否为A4
is_a4 = check_a4_size(image_path, dpi)
size_info = "A4尺寸" if is_a4 else "非A4尺寸"
print(f" - 图片尺寸: {size_info}")
# 关闭PDF文档
pdf_document.close()
# 删除临时PDF文件
os.remove(pdf_path)
return image_paths
except Exception as e:
print(f"致命错误: {str(e)}")
# 确保清理资源
try:
if 'word' in locals() and word:
word.Quit()
if 'pdf_document' in locals() and pdf_document:
pdf_document.close()
# 删除可能残留的临时文件
if os.path.exists(pdf_path):
os.remove(pdf_path)
except:
pass
return []
def check_a4_size(image_path, dpi=300, tolerance=0.02):
"""
检查图片是否符合A4尺寸标准
参数:
image_path (str): 图片路径
dpi (int): 图片DPI,默认为300
tolerance (float): 容差范围,默认为0.02 (2%)
返回:
bool: 如果图片尺寸在A4标准范围内返回True,否则返回False
"""
# A4纸的标准尺寸(mm)
a4_width_mm = 210
a4_height_mm = 297
# 计算A4纸在指定DPI下的像素尺寸
a4_width_px = round((a4_width_mm / 25.4) * dpi)
a4_height_px = round((a4_height_mm / 25.4) * dpi)
# 打开图片获取尺寸
with Image.open(image_path) as img:
width, height = img.size
# 计算允许的误差范围
width_min = a4_width_px * (1 - tolerance)
width_max = a4_width_px * (1 + tolerance)
height_min = a4_height_px * (1 - tolerance)
height_max = a4_height_px * (1 + tolerance)
# 检查图片尺寸是否在允许的误差范围内
# 考虑到图片可能被旋转
return (
(width_min <= width <= width_max and height_min <= height <= height_max) or
(width_min <= height <= width_max and height_min <= width <= height_max)
)
if __name__ == "__main__":
# 使用示例
input_file = "Doc1.docx" # 替换为你的Word文档路径
output_folder = "word_images" # 替换为你想要的输出文件夹
dpi = 300 # 设置DPI值
images = word_to_images(input_file, output_folder, dpi)
if images:
print(f"成功生成 {len(images)} 张图片")
else:
print("转换失败")