您的位置:首页 > 健康 > 美食 > 推广联系方式_政府平台公司是什么意思_百度云搜索引擎入口官方_天津做网站的网络公司

推广联系方式_政府平台公司是什么意思_百度云搜索引擎入口官方_天津做网站的网络公司

2025/1/1 21:44:47 来源:https://blog.csdn.net/Clay_K/article/details/144725470  浏览:    关键词:推广联系方式_政府平台公司是什么意思_百度云搜索引擎入口官方_天津做网站的网络公司
推广联系方式_政府平台公司是什么意思_百度云搜索引擎入口官方_天津做网站的网络公司

Python PDF转换工具箱(PDF转图片,word,拆分,删除,提取)

1.简介:

使用Python自写的pdf工具箱,包括pdf转word,图片,合并,页面拆分,页面删除,页面提取、
转换word,图片功能,支持文件拖入。文章末尾已附源码以及打包好的exe文件,大家需要可自行下载学习,喜欢的话给博主点个小小的关注哦,主页还将会更新更多Python相关干货资源,关注不迷路哦!

功能介绍:
合并:添加顺序就是合并顺序,可多次添加。
拆分:将输入页码的范围拆分成每个独立的pdf,单次可输入多个范围。
删除:将输入页码的范围删除,单次可输入多个范围,保存删除后的文件。
提取:将输入页码的范围提取成独立的pdf,单次可输入多个范围。

2.运行效果:在这里插入图片描述

3.相关源码:

import os
import re
import sys
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QVBoxLayout, QWidget, QFileDialog, QListWidget, \QMessageBox, QLineEdit, QHBoxLayout
from PyQt5.QtCore import QThread, pyqtSignal
from PyPDF2 import PdfReader, PdfWriter, PdfMerger
from pdf2docx import Converter
import fitz  # 用于PDF转JPG的处理class CustomListWidget(QListWidget):def __init__(self, parent=None):super().__init__(parent)self.setAcceptDrops(True)self.parentWindow = parentdef dragEnterEvent(self, event):if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()):event.acceptProposedAction()def dragMoveEvent(self, event):if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()):event.acceptProposedAction()def dropEvent(self, event):pdf_files = [url.toLocalFile() for url in event.mimeData().urls() if url.toString().lower().endswith('.pdf')]for f in pdf_files:self.parentWindow.addPDFFile(f)class Worker(QThread):finished = pyqtSignal(str)error = pyqtSignal(str)def __init__(self, pdf_files, range_str=None, save_path=None, operation=None):super().__init__()self.pdf_files = pdf_filesself.range_str = range_strself.save_path = save_pathself.operation = operationdef run(self):try:if self.operation == 'merge':self.merge_pdfs()elif self.operation == 'split':self.split_pdfs()elif self.operation == 'delete':self.delete_pages()elif self.operation == 'extract':self.extract_pages()elif self.operation == 'jpg':self.pdf_to_jpg()elif self.operation == 'word':self.pdf_to_word()except Exception as e:self.error.emit(str(e))def merge_pdfs(self):merger = PdfMerger()for pdf in self.pdf_files:merger.append(pdf)merger.write(self.save_path)merger.close()self.finished.emit('PDF文件已成功合并。')def split_pdfs(self):ranges = self.parse_ranges(self.range_str)reader = PdfReader(self.pdf_files[0])os.makedirs(self.save_path, exist_ok=True)  # 确保目标文件夹存在file_index = 1  # 用于创建唯一的文件名for range_index, (start_page, end_page) in enumerate(ranges):# 对于每个范围,拆分出来的每个页面为一个单独的PDF文件for page_num in range(start_page, end_page + 1):writer = PdfWriter()writer.add_page(reader.pages[page_num])# 使用文件索引来确保每个文件的名称都是唯一的split_save_path = os.path.join(self.save_path, f'split_page_{file_index}.pdf')with open(split_save_path, 'wb') as f:writer.write(f)file_index += 1self.finished.emit('PDF文件已成功拆分并保存。')def delete_pages(self):ranges = self.parse_ranges(self.range_str)reader = PdfReader(self.pdf_files[0])writer = PdfWriter()pages_to_delete = {page for start, end in ranges for page in range(start, end + 1)}for i in range(len(reader.pages)):if i not in pages_to_delete:writer.add_page(reader.pages[i])with open(self.save_path, 'wb') as f:writer.write(f)self.finished.emit('指定页面已从PDF中删除。')def extract_pages(self):ranges = self.parse_ranges(self.range_str)reader = PdfReader(self.pdf_files[0])os.makedirs(self.save_path, exist_ok=True)  # 在循环外提前确保目录存在for i, (start_page, end_page) in enumerate(ranges):writer = PdfWriter()for page_num in range(start_page, end_page + 1):writer.add_page(reader.pages[page_num])extract_save_path = os.path.join(self.save_path, f'extract_{i + 1}.pdf')with open(extract_save_path, 'wb') as f:writer.write(f)self.finished.emit('指定页面已从PDF中提取。')def pdf_to_jpg(self):for file in self.pdf_files:pdf = fitz.open(file)img_folder = os.path.join(self.save_path, os.path.splitext(os.path.basename(file))[0])os.makedirs(img_folder, exist_ok=True)for pg in range(pdf.page_count):page = pdf[pg]trans = fitz.Matrix(2, 2)  # 设置转换矩阵为放大2倍pm = page.get_pixmap(matrix=trans, alpha=False)pic_name = f'Page_{pg + 1}.jpg'pic_path = os.path.join(img_folder, pic_name)pm.save(pic_path)self.finished.emit('PDF文件已成功转换为图片。')def pdf_to_word(self):for file in self.pdf_files:docx_name = os.path.splitext(file)[0] + '.docx'cv = Converter(file)cv.convert(docx_name, start=0, end=None)cv.close()self.finished.emit('PDF文件已成功转换为Word文档。')def parse_ranges(self, ranges_str):ranges = []for part in re.split(',|,', ranges_str):if '-' in part:start_page, end_page = map(int, part.split('-'))ranges.append((start_page - 1, end_page - 1))else:page = int(part)ranges.append((page - 1, page - 1))return rangesclass PDFMergerApp(QMainWindow):def __init__(self):super().__init__()self.initUI()self.pdf_files = []def initUI(self):self.setWindowTitle('PDF 工具箱')self.setGeometry(100, 100, 800, 600)mainLayout = QVBoxLayout()self.addButton = QPushButton('添加 PDF', self)self.addButton.clicked.connect(self.addPDF)mainLayout.addWidget(self.addButton)self.listWidget = CustomListWidget(self)mainLayout.addWidget(self.listWidget)deleteLayout = QHBoxLayout()self.removeButton = QPushButton('删除选定', self)self.removeButton.clicked.connect(self.removeSelected)deleteLayout.addWidget(self.removeButton)self.removeAllButton = QPushButton('删除全部', self)self.removeAllButton.clicked.connect(self.removeAll)deleteLayout.addWidget(self.removeAllButton)mainLayout.addLayout(deleteLayout)convertLayout = QHBoxLayout()self.convertJPGButton = QPushButton('转换为图片', self)self.convertJPGButton.clicked.connect(self.convertToJPG)convertLayout.addWidget(self.convertJPGButton)self.convertWordButton = QPushButton('转换为Word', self)self.convertWordButton.clicked.connect(self.convertToWord)convertLayout.addWidget(self.convertWordButton)mainLayout.addLayout(convertLayout)self.mergeButton = QPushButton('合并 PDFs', self)self.mergeButton.clicked.connect(self.mergePDFs)mainLayout.addWidget(self.mergeButton)splitLayout = QHBoxLayout()self.splitInput = QLineEdit(self)self.splitInput.setPlaceholderText('输入拆分页码范围可输入多个范围,如1,3-4,8-15')splitLayout.addWidget(self.splitInput)self.splitButton = QPushButton('拆分页面', self)self.splitButton.clicked.connect(self.splitPDF)splitLayout.addWidget(self.splitButton)mainLayout.addLayout(splitLayout)deletePageLayout = QHBoxLayout()self.deleteInput = QLineEdit(self)self.deleteInput.setPlaceholderText('输入删除页码范围可输入多个范围,如1,3-4,8-15')deletePageLayout.addWidget(self.deleteInput)self.deleteButton = QPushButton('删除页面', self)self.deleteButton.clicked.connect(self.deletePages)deletePageLayout.addWidget(self.deleteButton)mainLayout.addLayout(deletePageLayout)extractLayout = QHBoxLayout()self.extractInput = QLineEdit(self)self.extractInput.setPlaceholderText('输入提取页码范围可输入多个范围,如1,3-4,8-15')extractLayout.addWidget(self.extractInput)self.extractButton = QPushButton('提取页面', self)self.extractButton.clicked.connect(self.extractPages)extractLayout.addWidget(self.extractButton)mainLayout.addLayout(extractLayout)container = QWidget()container.setLayout(mainLayout)self.setCentralWidget(container)def addPDF(self):files, _ = QFileDialog.getOpenFileNames(self, '打开文件', '', 'PDF files (*.pdf)')for file_path in files:self.addPDFFile(file_path)def addPDFFile(self, file_path):if file_path and file_path not in self.pdf_files:self.pdf_files.append(file_path)self.listWidget.addItem(file_path)def removeSelected(self):for item in self.listWidget.selectedItems():self.pdf_files.remove(item.text())self.listWidget.takeItem(self.listWidget.row(item))def removeAll(self):self.pdf_files.clear()self.listWidget.clear()def mergePDFs(self):save_path, _ = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')if save_path:self.thread = Worker(self.pdf_files, save_path=save_path, operation='merge')self.thread.finished.connect(self.onFinished)self.thread.error.connect(self.onError)self.thread.start()def splitPDF(self):if len(self.pdf_files) != 1:QMessageBox.warning(self, "错误", "请只选择一个PDF文件进行拆分。")returnrange_str = self.splitInput.text().strip()folder_path = self.getFolderName()if range_str and folder_path:self.thread = Worker(self.pdf_files, range_str=range_str, save_path=folder_path, operation='split')self.thread.finished.connect(self.onFinished)self.thread.error.connect(self.onError)self.thread.start()def deletePages(self):if len(self.pdf_files) != 1:QMessageBox.warning(self, "错误", "请只选择一个PDF文件进行删除操作。")returnrange_str = self.deleteInput.text().strip()save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0]if save_path and range_str:self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='delete')self.thread.finished.connect(self.onFinished)self.thread.error.connect(self.onError)self.thread.start()def extractPages(self):if len(self.pdf_files) != 1:QMessageBox.warning(self, "错误", "请只选择一个PDF文件进行提取操作。")returnrange_str = self.extractInput.text().strip()save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0]if save_path and range_str:self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='extract')self.thread.finished.connect(self.onFinished)self.thread.error.connect(self.onError)self.thread.start()def convertToJPG(self):save_path = QFileDialog.getExistingDirectory(self, "选择保存图片的位置")if save_path:self.thread = Worker(self.pdf_files, save_path=save_path, operation='jpg')self.thread.finished.connect(self.onFinished)self.thread.error.connect(self.onError)self.thread.start()def convertToWord(self):save_path = QFileDialog.getExistingDirectory(self, "选择保存Word的位置")if save_path:self.thread = Worker(self.pdf_files, save_path=save_path, operation='word')self.thread.finished.connect(self.onFinished)self.thread.error.connect(self.onError)self.thread.start()def getFolderName(self):folder_path = QFileDialog.getExistingDirectory(self, "选择保存拆分文件的位置")return folder_pathdef onFinished(self, message):QMessageBox.information(self, "操作完成", message)self.clear_pdf_list()self.clear_text_inputs()def onError(self, error_message):QMessageBox.warning(self, "操作失败", error_message)def clear_pdf_list(self):self.pdf_files.clear()self.listWidget.clear()def clear_text_inputs(self):# 清除所有的QLineEdit控件内容self.splitInput.clear()self.deleteInput.clear()self.extractInput.clear()
def main():app = QApplication(sys.argv)ex = PDFMergerApp()ex.show()sys.exit(app.exec_())if __name__ == '__main__':main()

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com