python+PyPDF2实现PDF的文本内容读取、多文件合并、旋转、裁剪、缩放、加解密、添加水印
目录
读取内容
合并文件
旋转
缩放
裁剪
加密和解密
添加水印
安装:pip install PyPDF2 -i https://pypi.tuna.tsinghua.edu.cn/simple
读取内容
from PyPDF2 import PdfReader, PdfMerger, PdfWriter
def read_pdf(pdf_path):
pdf_reader = PdfReader(pdf_path)
# print(len(pdf_reader.pages)) # 获取页数
# one_page = pdf_reader.pages[0] # 获取第一页对象
for page in pdf_reader.pages:
print(page.extract_text()) # 读取内容
合并文件
def merger_pdf(pdf_path: str, pdf_names: list):
pdf_mg = PdfMerger() # 创建合并对象
for pdf_name in pdf_names:
pdf_mg.append(rf'{pdf_path}\{pdf_name}')
# position(或page_number,position与page_number使用效果一样,不能同时使用)将目标pdf中的目标页内容合并到指定页,fileobj 要被合并的pdf文件,pages(n,m) 要合并文件的第{n+1}页到第{m}页
pdf_mg.merge(page_number=1, fileobj=rf'{pdf_path}\9.pdf', pages=(3, 6))
pdf_mg.write(rf'{pdf_path}\合并后的pdf.pdf')
pdf_mg.close()
旋转
def rotate_pdf(pdf_path: str, pdf_names: list):
reader = PdfReader(rf'{pdf_path}\{pdf_names[1]}')
writer = PdfWriter()
# 将读取的页对象旋转90,旋转参数必须是90的倍数(顺时针旋转)
page = reader.pages[0].rotate(90)
writer.add_page(page)
# 将写入的内容旋转-90度(逆时针旋转)
writer.add_page(reader.pages[0])
writer.pages[1].rotate(-90)
writer.append(rf'{pdf_path}\{pdf_names[2]}') # 追加其他pdf到文件中
writer.append(rf'{pdf_path}\{pdf_names[-1]}', pages=(2, 7)) # pages参数可指定要追加的页对象(实例为第3页到第6页)
with open(rf'{pdf_path}\旋转测试.pdf', 'wb') as f:
writer.write(f)
writer.close()
缩放
def scale_pdf(pdf_path: str, pdf_names: list):
writer = PdfWriter()
writer.append(rf'{pdf_path}\{pdf_names[-1]}', pages=(2, 7)) # pages参数可指定要追加的页对象(实例为第3页到第6页)
page_last = writer.pages[-1]
print(page_last.mediabox.width) # 打印页面尺寸
print(page_last.mediabox.height) # 打印页面尺寸
# page_last.scale_to(200,255) # 修改宽高为指定的大小
# page_last.scale(2,2) # 按照原来页面大小成倍缩放,可分别指定要缩放的宽高比例倍数
page_last.scale_by(3) # 成倍缩放,不能指定宽高
with open(rf'{pdf_path}\缩放测试.pdf', 'wb') as f:
writer.write(f)
writer.close()
裁剪
def tailor_pdf(pdf_path: str, pdf_names: list):
writer = PdfWriter()
writer.append(rf'{pdf_path}\{pdf_names[-1]}', pages=(2, 7)) # pages参数可指定要追加的页对象(实例为第3页到第6页)
page_last = writer.pages[-1]
# 裁剪(单位为磅)
# 1磅 = 1 / 72英寸
# 1英寸 = 25.4毫米
page_two = writer.pages[1]
page_two.mediabox.upper_left = (0, 0)
page_two.mediabox.upper_right = (page_last.mediabox.width / 2, 0)
page_two.mediabox.lower_left = (0, page_last.mediabox.height / 2)
page_two.mediabox.lower_right = (page_last.mediabox.width / 2, page_last.mediabox.height / 2)
with open(rf'{pdf_path}\裁剪测试.pdf', 'wb') as f:
writer.write(f)
writer.close()
加密和解密
def crypt_pdf(pdf_path: str, pdf_names: list):
writer = PdfWriter()
writer.append(rf'{pdf_path}\{pdf_names[-1]}', pages=(2, 7)) # pages参数可指定要追加的页对象(实例为第3页到第6页)
writer.encrypt('adc123') # 加密
with open(rf'{pdf_path}\加密文件.pdf', 'wb') as f:
writer.write(f)
writer.close()
encrypt_file = PdfReader(rf'{pdf_path}\加密文件.pdf')
if encrypt_file.is_encrypted: # 判断是否加密
encrypt_file.decrypt('adc123') # 解密
for page in encrypt_file.pages:
print(page.extract_text()) # 读取内容
添加水印
merge_page的功能用来合并页面,可找一个只有水印的空白页,作为合并对象
def watermark_pdf(pdf_path: str, pdf_names: list):
writer = PdfWriter()
reader = PdfReader(rf'{pdf_path}\9.pdf')
watermark_page = PdfReader(rf'{pdf_path}\{pdf_names[2]}').pages[0] # 获取一个水印页面对象
for p in reader.pages: # 循环将水印页面增加到每一页
p.merge_page(watermark_page)
writer.add_page(p)
with open(rf'{pdf_path}\水印测试.pdf', 'wb') as f:
writer.write(f)
writer.close()
其他方法
writer.remove_text() # 清空文本
writer.remove_images() # 清空图片
writer.remove_links() # 清空链接
writer.add_blank_page(100,500) # 新增空白页,可指定空白页的宽高