编辑代码

PDF-Replacer/
├── main.py          # 主程序入口
├── core/
│   ├── import fitz
import pdfplumber
from reportlab.pdfgen import canvas
from io import BytesIO

class PDFProcessor:
    def __init__(self, pdf_path):
        self.doc = fitz.open(pdf_path)
        self.pdfplumber_pdf = pdfplumber.open(pdf_path)
        
    def get_text_properties(self, page_num, text):
        """获取原文本的格式属性(字体、颜色、旋转等)"""
        with self.pdfplumber_pdf.pages[page_num] as page:
            for char in page.chars:
                if char["text"] == text:
                    return {
                        "font": char["fontname"],
                        "size": char["size"],
                        "color": char["non_stroking_color"],
                        "rotation": char["upright"]
                    }
            return None

    def erase_and_replace(self, output_path, replacements):
        for page_num in range(len(self.doc)):
            page = self.doc[page_num]
            # 擦除旧文本并插入新文本
            for old_text, new_text in replacements:
                text_instances = page.search_for(old_text)
                for rect in text_instances:
                    # 擦除原文本(获取背景色)
                    self._erase_text(page, rect)
                    # 插入新文本
                    self._insert_text(page, rect, new_text, page_num, old_text)
        self.doc.save(output_path)

    def _erase_text(self, page, rect):
        # 获取背景色(需根据实际PDF解析)
        fill_color = (1, 1, 1)  # 假设白色背景
        page.draw_rect(rect, color=fill_color, fill=fill_color, overlay=False)

    def _insert_text(self, page, rect, new_text, page_num, old_text):
        props = self.get_text_properties(page_num, old_text)
        # 使用reportlab生成精确格式的文本
        packet = BytesIO()
        can = canvas.Canvas(packet)
        can.setFont(props["font"], props["size"])
        can.setFillColorRGB(*props["color"])
        can.drawString(rect.x0, rect.y0, new_text)
        can.save()
        packet.seek(0)
        new_pdf = fitz.open("pdf", packet.read())
        page.show_pdf_page(rect, new_pdf, 0)
│   └── def process_tables(page, replacements):
    # 需实现表格检测与重建(此处为伪代码)
    tables = detect_tables(page)
    for table in tables:
        erase_table(table)
        new_table = rebuild_table(table, replacements)
        insert_table(page, new_table)
├── ui/
│   └── from PyQt5.QtWidgets import QMainWindow, QFileDialog, QVBoxLayout, QWidget, QPushButton, QLabel, QTableWidget, QTableWidgetItem
import pandas as pd

class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("PDF文本替换工具")
        self.layout = QVBoxLayout()
        
        # PDF上传按钮
        self.btn_upload_pdf = QPushButton("上传PDF文件", self)
        self.btn_upload_pdf.clicked.connect(self.upload_pdf)
        self.layout.addWidget(self.btn_upload_pdf)
        
        # Excel映射表上传
        self.btn_upload_excel = QPushButton("上传Excel替换表", self)
        self.btn_upload_excel.clicked.connect(self.upload_excel)
        self.layout.addWidget(self.btn_upload_excel)
        
        # 显示替换规则表格
        self.table = QTableWidget()
        self.layout.addWidget(self.table)
        
        # 执行替换按钮
        self.btn_run = QPushButton("开始替换", self)
        self.btn_run.clicked.connect(self.run_replacement)
        self.layout.addWidget(self.btn_run)
        
        container = QWidget()
        container.setLayout(self.layout)
        self.setCentralWidget(container)

    def upload_pdf(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择PDF文件", "", "PDF Files (*.pdf)")
        self.pdf_path = file_path

    def upload_excel(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "选择Excel文件", "", "Excel Files (*.xlsx)")
        self.replacements = pd.read_excel(file_path).to_dict("records")
        self.load_table()

    def load_table(self):
        self.table.setRowCount(len(self.replacements))
        self.table.setColumnCount(2)
        self.table.setHorizontalHeaderLabels(["原文本", "新文本"])
        for row, item in enumerate(self.replacements):
            self.table.setItem(row, 0, QTableWidgetItem(item["old_text"]))
            self.table.setItem(row, 1, QTableWidgetItem(item["new_text"]))
└── requirements.txt