PDF-Replacer/
├── main.py
├── core/
│ ├── import fitz
import pdfplumber
from reportlab.pdfgen import canvas
from io import BytesIO
class PDFProcessor:
def __init__(self, pdf_path):
self.doc = fitz.open(pdf_path)
self.pdfplumber_pdf = pdfplumber.open(pdf_path)
def get_text_properties(self, page_num, text):
"""获取原文本的格式属性(字体、颜色、旋转等)"""
with self.pdfplumber_pdf.pages[page_num] as page:
for char in page.chars:
if char["text"] == text:
return {
"font": char["fontname"],
"size": char["size"],
"color": char["non_stroking_color"],
"rotation": char["upright"]
}
return None
def erase_and_replace(self, output_path, replacements):
for page_num in range(len(self.doc)):
page = self.doc[page_num]
for old_text, new_text in replacements:
text_instances = page.search_for(old_text)
for rect in text_instances:
self._erase_text(page, rect)
self._insert_text(page, rect, new_text, page_num, old_text)
self.doc.save(output_path)
def _erase_text(self, page, rect):
fill_color = (1, 1, 1)
page.draw_rect(rect, color=fill_color, fill=fill_color, overlay=False)
def _insert_text(self, page, rect, new_text, page_num, old_text):
props = self.get_text_properties(page_num, old_text)
packet = BytesIO()
can = canvas.Canvas(packet)
can.setFont(props["font"], props["size"])
can.setFillColorRGB(*props["color"])
can.drawString(rect.x0, rect.y0, new_text)
can.save()
packet.seek(0)
new_pdf = fitz.open("pdf", packet.read())
page.show_pdf_page(rect, new_pdf, 0)
│ └── def process_tables(page, replacements):
tables = detect_tables(page)
for table in tables:
erase_table(table)
new_table = rebuild_table(table, replacements)
insert_table(page, new_table)
├── ui/
│ └── from PyQt5.QtWidgets import QMainWindow, QFileDialog, QVBoxLayout, QWidget, QPushButton, QLabel, QTableWidget, QTableWidgetItem
import pandas as pd
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("PDF文本替换工具")
self.layout = QVBoxLayout()
self.btn_upload_pdf = QPushButton("上传PDF文件", self)
self.btn_upload_pdf.clicked.connect(self.upload_pdf)
self.layout.addWidget(self.btn_upload_pdf)
self.btn_upload_excel = QPushButton("上传Excel替换表", self)
self.btn_upload_excel.clicked.connect(self.upload_excel)
self.layout.addWidget(self.btn_upload_excel)
self.table = QTableWidget()
self.layout.addWidget(self.table)
self.btn_run = QPushButton("开始替换", self)
self.btn_run.clicked.connect(self.run_replacement)
self.layout.addWidget(self.btn_run)
container = QWidget()
container.setLayout(self.layout)
self.setCentralWidget(container)
def upload_pdf(self):
file_path, _ = QFileDialog.getOpenFileName(self, "选择PDF文件", "", "PDF Files (*.pdf)")
self.pdf_path = file_path
def upload_excel(self):
file_path, _ = QFileDialog.getOpenFileName(self, "选择Excel文件", "", "Excel Files (*.xlsx)")
self.replacements = pd.read_excel(file_path).to_dict("records")
self.load_table()
def load_table(self):
self.table.setRowCount(len(self.replacements))
self.table.setColumnCount(2)
self.table.setHorizontalHeaderLabels(["原文本", "新文本"])
for row, item in enumerate(self.replacements):
self.table.setItem(row, 0, QTableWidgetItem(item["old_text"]))
self.table.setItem(row, 1, QTableWidgetItem(item["new_text"]))
└── requirements.txt