元数据(Metadata)

读取元数据

from pypdf import PdfReader  

reader = PdfReader("example.pdf")  

meta = reader.metadata  

# 以下所有字段都可能为 None!  
print(meta.title)  
print(meta.author)  
print(meta.subject)  
print(meta.creator)  
print(meta.producer)  
print(meta.creation_date)  
print(meta.modification_date)  

写入元数据

from datetime import datetime  
from pypdf import PdfReader, PdfWriter  

reader = PdfReader("example.pdf")  
writer = PdfWriter()  

# 将所有页面添加到 writer  
for page in reader.pages:  
    writer.add_page(page)  

# 如果需要保留原有元数据,添加以下两行代码  
if reader.metadata is not None:  
    writer.add_metadata(reader.metadata)  

# 格式化当前日期和时间用于元数据  
utc_time = "-05'00'"  # 可选的 UTC 时间偏移  
time = datetime.now().strftime(f"D\072%Y%m%d%H%M%S{utc_time}")  

# 添加新的元数据  
writer.add_metadata(  
    {  
        "/Author": "Martin",  
        "/Producer": "Libre Writer",  
        "/Title": "Title",  
        "/Subject": "Subject",  
        "/Keywords": "Keywords",  
        "/CreationDate": time,  
        "/ModDate": time,  
        "/Creator": "Creator",  
        "/CustomField": "CustomField",  
    }  
)  

# 将新 PDF 保存到文件  
with open("meta-pdf.pdf", "wb") as f:  
    writer.write(f)  

更新元数据

from pypdf import PdfWriter  

writer = PdfWriter(clone_from="example.pdf")  

# 修改部分元数据  
writer.add_metadata(  
    {  
        "/Author": "Martin",  
        "/Producer": "Libre Writer",  
        "/Title": "Title",  
    }  
)  

# 清空所有数据但保留 PDF 中的条目  
writer.metadata = {}  

# 替换所有条目为新的一组元数据  
writer.metadata = {  
    "/Author": "Martin",  
    "/Producer": "Libre Writer",  
}  

# 将新 PDF 保存到文件  
with open("meta-pdf.pdf", "wb") as f:  
    writer.write(f)  

删除元数据条目

from pypdf import PdfWriter  

writer = PdfWriter("example.pdf")  

# 删除元数据(/Info 条目)  
writer.metadata = None  

# 将新 PDF 保存到文件  
with open("meta-pdf.pdf", "wb") as f:  
    writer.write(f)