
| import tkinter as tk from tkinter import filedialog, messagebox, ttk, Toplevel, StringVar, Checkbutton, Button import logging import pdfplumber import re import os import subprocess import xlwt pdf_files_folder = None # 配置日志记录 logging.basicConfig(filename='app.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s', level=logging.INFO) reverse = False #定义排序函数 def sorter(tree, column, data_type, reverse): l = [(tree.set(k, column), k) for k in tree.get_children('')] if data_type == 'num': try: l = [(float(x), k) for x, k in l] except ValueError as e: pass l.sort(reverse=reverse) for index, (val, k) in enumerate(l): tree.move(k, '', index) def column_sorter(tree, column, data_type='str'): global reverse reverse = not reverse sorter(tree, column, data_type, reverse) def read_pdf_content(file_path): logging.info('开始函数 read_pdf_content') with pdfplumber.open(file_path) as pdf: full_text = "".join(page.extract_text() for page in pdf.pages) invoice_number = re.findall(r'(?:发票号码)\s*[::]*\s*([^\s]+)', full_text) name = re.findall(r'(?:名称|名\s{1}称|名\s{1}称\s{1}:)\s*[::]*\s*([^\s]+)', full_text) date = re.findall(r'\d\s*\d\s*\d\s*\d\s*年\s*\d\s*\d?\s*月\s*\d\s*\d?\s*日', full_text) category = re.findall(r'\*([\u4e00-\u9fa5a-zA-Z]+)\*([\u4e00-\u9fa5a-zA-Z]+)', full_text) logging.info('结束函数 read_pdf_content') return { "text": full_text, "invoice_number": invoice_number[0] if invoice_number else "", "name": name[0] if name else "", "date": date[0] if date else "", "category": category if category else [] } def get_pdf_files(pdf_dir): logging.info('Starting function get_pdf_files') pdf_files = [] for root, dirs, files in os.walk(pdf_dir): for file in files: if file.endswith(".pdf"): # 在此处对文件路径进行处理,确保路径格式正确 filepath = os.path.normpath(os.path.join(root, file)) pdf_files.append(filepath) logging.info(f'Finished function get_pdf_files. Found {len(pdf_files)} pdf files.') return pdf_files def rename_pdf_file(file_path, new_value): logging.info('Starting function rename_pdf_file') dir_path = os.path.dirname(file_path) new_file_name = f"{new_value}.pdf" new_file_path = os.path.join(dir_path, new_file_name) os.rename(file_path, new_file_path) logging.info(f'Finished function rename_pdf_file. File {file_path} renamed to {new_file_path}') return new_file_path # Your remaining function definitions and script body here... # Don't forget to add logging statements to the rest of your functions and to your script body. def sort_amount(tree, prev_sort=None): if prev_sort is None: prev_sort = {"column": "Amount", "reverse": False} tree_items = tree.get_children() reverse = not prev_sort["reverse"] if prev_sort["column"] == "Amount" else False # 排序并更新序号 sorted_items = sorted(tree_items, key=lambda item: float(tree.set(item, "Amount")), reverse=reverse) for index, item in enumerate(sorted_items, start=1): tree.move(item, '', index - 1) tree.item(item, values=(index, *tree.item(item, "values")[1:])) prev_sort["column"] = "Amount" prev_sort["reverse"] = reverse return prev_sort def open_pdf(path): if os.name == 'nt': # For Windows os.startfile(path) else: # For MacOS and Linux opener = 'open' if os.name == 'posix' else 'xdg-open' subprocess.call([opener, path]) def display_results(values, total_amount, input_root): # 输入界面的根窗口销毁 input_root.destroy() # 创建新的根窗口 root = tk.Tk() root.title("发票金额统计") center_window(root, width=950, height=300) # 主框架设置 main_frame = ttk.Frame(root, padding="10 10 10 10") main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S)) main_frame.columnconfigure(0, weight=1) main_frame.rowconfigure(0, weight=1) # 定义重命名选中文件的功能 def rename_selected_files(): from tkinter import Toplevel, StringVar, Checkbutton, Button selected_items = tree.selection() if not selected_items: messagebox.showerror("错误", "请先选择一个或多个PDF文件进行重命名") return # 创建对话框并注册变量 dialog = Toplevel(root) dialog.title("选择需要的字段") include_amount = StringVar(value='no') include_category = StringVar(value='no') include_invoice_number = StringVar(value='no') include_name = StringVar(value='no') Checkbutton(dialog, text='金额', variable=include_amount, onvalue='yes', offvalue='no', anchor='w').pack(fill='x') Checkbutton(dialog, text='类别', variable=include_category, onvalue='yes', offvalue='no', anchor='w').pack(fill='x') Checkbutton(dialog, text='发票号码', variable=include_invoice_number, onvalue='yes', offvalue='no', anchor='w').pack(fill='x') Checkbutton(dialog, text='公司名称', variable=include_name, onvalue='yes', offvalue='no', anchor='w').pack(fill='x') def on_ok(): # 对话框确定按钮的回调函数 for item in selected_items: print(f"Processing item: {item}") item_values = tree.item(item, 'values') new_name_parts = [] if include_amount.get() == 'yes': new_name_parts.append(item_values[1]) if include_category.get() == 'yes': new_name_parts.append(item_values[5]) # Category if include_invoice_number.get() == 'yes': new_name_parts.append(item_values[2]) # Invoice Number if include_name.get() == 'yes': new_name_parts.append(item_values[3]) # Name if not new_name_parts: # 如果用户没有选择任何字段,显示错误信息 messagebox.showerror("错误", "至少需要选择一个字段来重命名文件。") dialog.destroy() return new_file_name = "_".join(new_name_parts) current_file_path = item_values[6] # 原始的pdf路径 new_file_path = rename_pdf_file(current_file_path, new_file_name) tree.set(item, column="文件路径", value=new_file_path) messagebox.showinfo("完成", "选中的文件已重命名。") dialog.destroy() Button(dialog, text='确定', command=on_ok).pack() dialog.update_idletasks() width = dialog.winfo_reqwidth() height = dialog.winfo_reqheight() x = (root.winfo_screenwidth() // 2) - (width // 2) y = (root.winfo_screenheight() // 2) - (height // 2) dialog.geometry('+{}+{}'.format(x, y)) dialog.transient(root) dialog.grab_set() dialog.wait_window() # 等待对话框关闭 def export_to_xls(): global pdf_files_folder if pdf_files_folder is None: messagebox.showerror("错误", "PDF文件目录未设置。") return xls_file_path = os.path.join(pdf_files_folder, '发票数据.xls') # 创建一个Workbook对象 workbook = xlwt.Workbook() # 添加一个sheet sheet = workbook.add_sheet('发票数据') sheet.col(0).width = 256 * 5 # 序号列设为20个字符宽度 sheet.col(1).width = 256 * 10 # 金额列 sheet.col(2).width = 256 * 30 # 发票号码列 sheet.col(3).width = 256 * 30 # 公司列 sheet.col(4).width = 256 * 20 # 开票日期列 sheet.col(5).width = 256 * 25 # 类别列 sheet.col(6).width = 256 * 10 # 文件路径列 style = xlwt.easyxf('align: vert centre, horiz centre') # 将Treeview列标题写入sheet的第一行 for i, header in enumerate(headers): sheet.write(0, i, header, style) # 写入数据 for i, item in enumerate(tree.get_children(), start=1): # 获取每一行的数据 row_values = tree.item(item, 'values') for j, value in enumerate(row_values): # 假设你想将所有的单元格格式设置为文本,请确保这里的value是字符串 sheet.write(i, j, str(value), style) # 写入数据,注意i是从1开始的因为0是标题行 # 保存xls文件 workbook.save(xls_file_path) messagebox.showinfo("完成", f"数据成功导出至 '{xls_file_path}' 文件。") # 定义复制总金额到剪贴板的功能 def copy_total_amount_to_clipboard(total_amount): root.clipboard_clear() root.clipboard_append(f"{total_amount:.2f}") # 树视图设置 tree = ttk.Treeview(main_frame, columns=("序号", "金额", "发票号码", "公司", "开票日期", "类别", "文件路径"), show="headings") tree.grid(column=0, row=0, pady=5, padx=5, sticky=(tk.N, tk.S, tk.E, tk.W)) tree.bind('<Double-1>', lambda event: open_pdf(tree.item(tree.selection())['values'][6])) # 默认的列宽和标题设置 columns = ("序号", "金额", "发票号码", "公司", "开票日期", "类别", "文件路径") headers = ("序号", "金额", "发票号码", "公司", "开票日期", "类别", "文件路径") column_widths = {"序号": 50, "金额": 100, "发票号码": 200, "公司": 180, "开票日期": 100, "类别": 100, "文件路径": 50} for col in tree['columns']: # 使用列的名字作为键来从字典中获取列的宽度 tree.column(col, width=column_widths[col]) tree.heading(col, text=col) tree.column(col, anchor='center') # 插入数据到树视图 for index, value in enumerate(values, start=1): tree.insert("", "end", values=(index, *value)) # 右侧按钮容器 button_frame = ttk.Frame(main_frame) button_frame.grid(column=1, row=0, padx=5, sticky=(tk.N, tk.S)) button_frame.rowconfigure(0, weight=1) button_frame.rowconfigure(1, weight=1) button_frame.rowconfigure(2, weight=1) button_frame.rowconfigure(3, weight=1) # 创建按钮并添加到右侧按钮容器 rename_button = ttk.Button(button_frame, text="重命名选中文件", command=rename_selected_files) rename_button.grid(column=0, row=0, pady=5, sticky=(tk.N, tk.S)) copy_button = ttk.Button(button_frame, text="复制总金额", command=lambda: copy_total_amount_to_clipboard(total_amount)) copy_button.grid(column=0, row=1, pady=5, sticky=(tk.N, tk.S)) export_button = ttk.Button(button_frame, text="导出到XLS", command=export_to_xls) export_button.grid(column=0, row=2, pady=5, sticky=(tk.N, tk.S)) # 注意:row的索引是3,因为我们已经有三个按钮了 exit_button = ttk.Button(button_frame, text="退出", command=root.destroy) exit_button.grid(column=0, row=3, pady=5, sticky=(tk.N, tk.S)) # 累计金额标签 total_amount_label = ttk.Label(main_frame, text=f"所有发票的累计金额: {total_amount:.2f}") total_amount_label.grid(column=0, row=1, pady=5, sticky=tk.W) # 启动事件循环前更新窗口 root.update() # 启动事件循环 root.mainloop() logging.info('Exited main loop') def copy_total_amount_to_clipboard(): root.clipboard_clear() root.clipboard_append(f"{total_amount:.2f}") root.mainloop() logging.info('Exited main loop') tree = None root = None def resize_treeview_columns(event): global tree, root new_width = event.width tree.column("Index", width=int(new_width*0.05), anchor=tk.CENTER) tree.column("Amount", width=int(new_width*0.05), anchor=tk.CENTER) tree.column("Invoice Number", width=int(new_width*0.15), anchor=tk.CENTER) tree.column("Name", width=int(new_width*0.15), anchor=tk.CENTER) tree.column("Date", width=int(new_width*0.1), anchor=tk.CENTER) tree.column("Category", width=int(new_width*0.15), anchor=tk.CENTER) tree.column("Renamed File", width=int(new_width*0.15), anchor=tk.CENTER) tree.update_idletasks() total_amount_label = ttk.Label(main_frame, text=f"所有发票的累计金额: {total_amount:.2f}") total_amount_label.grid(column=0, row=1, pady=5, sticky=tk.W) def copy_total_amount_to_clipboard(): root.clipboard_clear() root.clipboard_append(total_amount_label["text"].split(": ")[1]) copy_button = ttk.Button(main_frame, text="复制总金额", command=copy_total_amount_to_clipboard) copy_button.grid(column=1, row=1, pady=5, padx=5, sticky=tk.W) ttk.Button(main_frame, text="退出", command=root.destroy).grid(column=1, row=2, pady=10, sticky=tk.E) root.columnconfigure(0, weight=1) root.rowconfigure(0, weight=1) root.mainloop() def center_window(root, width=420, height=100): screen_width = root.winfo_screenwidth() screen_height = root.winfo_screenheight() x = (screen_width - width) // 2 y = (screen_height - height) // 2 root.geometry(f"{width}x{height}+{x}+{y}") def browse_folder(entry): folder = filedialog.askdirectory(title="请选择发票PDF文件夹路径") if folder: entry.delete(0, tk.END) entry.insert(0, folder) def start_processing(entry, input_root): logging.info('Start processing') folder = entry.get() if not folder: messagebox.showerror("错误", "请先选择或输入一个文件夹路径") return global pdf_files_folder pdf_files = get_pdf_files(folder) if pdf_files: pdf_files_folder = os.path.dirname(pdf_files[0]) values = [] renamed_pdf_files = [] for pdf_file in pdf_files: pdf_content = read_pdf_content(pdf_file) pdf_text = pdf_content["text"] pattern = r'[¥¥](\d+(?:\.\d{1,2})?)' result = re.findall(pattern, pdf_text) if result: max_value = max([float(i) for i in result]) values.append((max_value, pdf_content["invoice_number"], pdf_content["name"], pdf_content["date"], pdf_content["category"], pdf_file)) # 将原始pdf路径保存而不是重命名后的路径 logging.info(f'Values: {values}') logging.info(f'Renamed pdf files: {renamed_pdf_files}') # Extract the first element of each tuple in values amounts = [value[0] for value in values] total_amount = sum(amounts) logging.info(f'Total amount: {total_amount}') logging.info('Calling display_results function') display_results(values, total_amount, input_root) logging.info('Finished processing') def main(): logging.info('Application start') root = tk.Tk() root.title("发票金额统计") center_window(root) main_frame = ttk.Frame(root, padding="10 10 10 10") main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S)) folder_entry = ttk.Entry(main_frame, width=40) folder_entry.grid(column=0, row=0, padx=5, pady=5, sticky=tk.W) browse_button = ttk.Button(main_frame, text="打开", command=lambda: browse_folder(folder_entry)) browse_button.grid(column=1, row=0, padx=5, pady=5, sticky=tk.W) start_button = ttk.Button(main_frame, text="开始处理", command=lambda: start_processing(folder_entry, root)) start_button.grid(column=1, row=1, padx=5, pady=5, sticky=tk.W) root.columnconfigure(0, weight=1) root.rowconfigure(0, weight=1) root.mainloop() logging.info('Application finished') if __name__ == "__main__": main()
|