1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
| import tkinter as tk from tkinter import filedialog, messagebox, ttk, Toplevel, StringVar, Checkbutton, Button import logging import pdfplumber import re import os import subprocess import xlwt pdf_files_folder = None # 配置日志记录 logging.basicConfig(filename='app.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s', level=logging.INFO) reverse = False #定义排序函数 def sorter(tree, column, data_type, reverse): l = [(tree.set(k, column), k) for k in tree.get_children('')] if data_type == 'num': try: l = [(float(x), k) for x, k in l] except ValueError as e: pass l.sort(reverse=reverse) for index, (val, k) in enumerate(l): tree.move(k, '', index) def column_sorter(tree, column, data_type='str'): global reverse reverse = not reverse sorter(tree, column, data_type, reverse) def read_pdf_content(file_path): logging.info('开始函数 read_pdf_content') with pdfplumber.open(file_path) as pdf: full_text = "".join(page.extract_text() for page in pdf.pages) invoice_number = re.findall(r'(?:发票号码)\s*[::]*\s*([^\s]+)', full_text) name = re.findall(r'(?:名称|名\s{1}称|名\s{1}称\s{1}:)\s*[::]*\s*([^\s]+)', full_text) date = re.findall(r'\d\s*\d\s*\d\s*\d\s*年\s*\d\s*\d?\s*月\s*\d\s*\d?\s*日', full_text) category = re.findall(r'\*([\u4e00-\u9fa5a-zA-Z]+)\*([\u4e00-\u9fa5a-zA-Z]+)', full_text) logging.info('结束函数 read_pdf_content') return { "text": full_text, "invoice_number": invoice_number[0] if invoice_number else "", "name": name[0] if name else "", "date": date[0] if date else "", "category": category if category else [] } def get_pdf_files(pdf_dir): logging.info('Starting function get_pdf_files') pdf_files = [] for root, dirs, files in os.walk(pdf_dir): for file in files: if file.endswith(".pdf"): # 在此处对文件路径进行处理,确保路径格式正确 filepath = os.path.normpath(os.path.join(root, file)) pdf_files.append(filepath) logging.info(f'Finished function get_pdf_files. Found {len(pdf_files)} pdf files.') return pdf_files def rename_pdf_file(file_path, new_value): logging.info('Starting function rename_pdf_file') dir_path = os.path.dirname(file_path) new_file_name = f"{new_value}.pdf" new_file_path = os.path.join(dir_path, new_file_name) os.rename(file_path, new_file_path) logging.info(f'Finished function rename_pdf_file. File {file_path} renamed to {new_file_path}') return new_file_path # Your remaining function definitions and script body here... # Don't forget to add logging statements to the rest of your functions and to your script body. def sort_amount(tree, prev_sort=None): if prev_sort is None: prev_sort = {"column": "Amount", "reverse": False} tree_items = tree.get_children() reverse = not prev_sort["reverse"] if prev_sort["column"] == "Amount" else False # 排序并更新序号 sorted_items = sorted(tree_items, key=lambda item: float(tree.set(item, "Amount")), reverse=reverse) for index, item in enumerate(sorted_items, start=1): tree.move(item, '', index - 1) tree.item(item, values=(index, *tree.item(item, "values")[1:])) prev_sort["column"] = "Amount" prev_sort["reverse"] = reverse return prev_sort def open_pdf(path): if os.name == 'nt': # For Windows os.startfile(path) else: # For MacOS and Linux opener = 'open' if os.name == 'posix' else 'xdg-open' subprocess.call([opener, path]) def display_results(values, total_amount, input_root): # 输入界面的根窗口销毁 input_root.destroy() # 创建新的根窗口 root = tk.Tk() root.title("发票金额统计") center_window(root, width=950, height=300) # 主框架设置 main_frame = ttk.Frame(root, padding="10 10 10 10") main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S)) main_frame.columnconfigure(0, weight=1) main_frame.rowconfigure(0, weight=1) # 定义重命名选中文件的功能 def rename_selected_files(): from tkinter import Toplevel, StringVar, Checkbutton, Button selected_items = tree.selection() if not selected_items: messagebox.showerror("错误", "请先选择一个或多个PDF文件进行重命名") return # 创建对话框并注册变量 dialog = Toplevel(root) dialog.title("选择需要的字段") include_amount = StringVar(value='no') include_category = StringVar(value='no') include_invoice_number = StringVar(value='no') include_name = StringVar(value='no') Checkbutton(dialog, text='金额', variable=include_amount, onvalue='yes', offvalue='no', anchor='w').pack(fill='x') Checkbutton(dialog, text='类别', variable=include_category, onvalue='yes', offvalue='no', anchor='w').pack(fill='x') Checkbutton(dialog, text='发票号码', variable=include_invoice_number, onvalue='yes', offvalue='no', anchor='w').pack(fill='x') Checkbutton(dialog, text='公司名称', variable=include_name, onvalue='yes', offvalue='no', anchor='w').pack(fill='x') def on_ok(): # 对话框确定按钮的回调函数 for item in selected_items: print(f"Processing item: {item}") item_values = tree.item(item, 'values') new_name_parts = [] if include_amount.get() == 'yes': new_name_parts.append(item_values[1]) if include_category.get() == 'yes': new_name_parts.append(item_values[5]) # Category if include_invoice_number.get() == 'yes': new_name_parts.append(item_values[2]) # Invoice Number if include_name.get() == 'yes': new_name_parts.append(item_values[3]) # Name if not new_name_parts: # 如果用户没有选择任何字段,显示错误信息 messagebox.showerror("错误", "至少需要选择一个字段来重命名文件。") dialog.destroy() return new_file_name = "_".join(new_name_parts) current_file_path = item_values[6] # 原始的pdf路径 new_file_path = rename_pdf_file(current_file_path, new_file_name) tree.set(item, column="文件路径", value=new_file_path) messagebox.showinfo("完成", "选中的文件已重命名。") dialog.destroy() Button(dialog, text='确定', command=on_ok).pack() dialog.update_idletasks() width = dialog.winfo_reqwidth() height = dialog.winfo_reqheight() x = (root.winfo_screenwidth() // 2) - (width // 2) y = (root.winfo_screenheight() // 2) - (height // 2) dialog.geometry('+{}+{}'.format(x, y)) dialog.transient(root) dialog.grab_set() dialog.wait_window() # 等待对话框关闭 def export_to_xls(): global pdf_files_folder if pdf_files_folder is None: messagebox.showerror("错误", "PDF文件目录未设置。") return xls_file_path = os.path.join(pdf_files_folder, '发票数据.xls') # 创建一个Workbook对象 workbook = xlwt.Workbook() # 添加一个sheet sheet = workbook.add_sheet('发票数据') sheet.col(0).width = 256 * 5 # 序号列设为20个字符宽度 sheet.col(1).width = 256 * 10 # 金额列 sheet.col(2).width = 256 * 30 # 发票号码列 sheet.col(3).width = 256 * 30 # 公司列 sheet.col(4).width = 256 * 20 # 开票日期列 sheet.col(5).width = 256 * 25 # 类别列 sheet.col(6).width = 256 * 10 # 文件路径列 style = xlwt.easyxf('align: vert centre, horiz centre') # 将Treeview列标题写入sheet的第一行 for i, header in enumerate(headers): sheet.write(0, i, header, style) # 写入数据 for i, item in enumerate(tree.get_children(), start=1): # 获取每一行的数据 row_values = tree.item(item, 'values') for j, value in enumerate(row_values): # 假设你想将所有的单元格格式设置为文本,请确保这里的value是字符串 sheet.write(i, j, str(value), style) # 写入数据,注意i是从1开始的因为0是标题行 # 保存xls文件 workbook.save(xls_file_path) messagebox.showinfo("完成", f"数据成功导出至 '{xls_file_path}' 文件。") # 定义复制总金额到剪贴板的功能 def copy_total_amount_to_clipboard(total_amount): root.clipboard_clear() root.clipboard_append(f"{total_amount:.2f}") # 树视图设置 tree = ttk.Treeview(main_frame, columns=("序号", "金额", "发票号码", "公司", "开票日期", "类别", "文件路径"), show="headings") tree.grid(column=0, row=0, pady=5, padx=5, sticky=(tk.N, tk.S, tk.E, tk.W)) tree.bind('<Double-1>', lambda event: open_pdf(tree.item(tree.selection())['values'][6])) # 默认的列宽和标题设置 columns = ("序号", "金额", "发票号码", "公司", "开票日期", "类别", "文件路径") headers = ("序号", "金额", "发票号码", "公司", "开票日期", "类别", "文件路径") column_widths = {"序号": 50, "金额": 100, "发票号码": 200, "公司": 180, "开票日期": 100, "类别": 100, "文件路径": 50} for col in tree['columns']: # 使用列的名字作为键来从字典中获取列的宽度 tree.column(col, width=column_widths[col]) tree.heading(col, text=col) tree.column(col, anchor='center') # 插入数据到树视图 for index, value in enumerate(values, start=1): tree.insert("", "end", values=(index, *value)) # 右侧按钮容器 button_frame = ttk.Frame(main_frame) button_frame.grid(column=1, row=0, padx=5, sticky=(tk.N, tk.S)) button_frame.rowconfigure(0, weight=1) button_frame.rowconfigure(1, weight=1) button_frame.rowconfigure(2, weight=1) button_frame.rowconfigure(3, weight=1) # 创建按钮并添加到右侧按钮容器 rename_button = ttk.Button(button_frame, text="重命名选中文件", command=rename_selected_files) rename_button.grid(column=0, row=0, pady=5, sticky=(tk.N, tk.S)) copy_button = ttk.Button(button_frame, text="复制总金额", command=lambda: copy_total_amount_to_clipboard(total_amount)) copy_button.grid(column=0, row=1, pady=5, sticky=(tk.N, tk.S)) export_button = ttk.Button(button_frame, text="导出到XLS", command=export_to_xls) export_button.grid(column=0, row=2, pady=5, sticky=(tk.N, tk.S)) # 注意:row的索引是3,因为我们已经有三个按钮了 exit_button = ttk.Button(button_frame, text="退出", command=root.destroy) exit_button.grid(column=0, row=3, pady=5, sticky=(tk.N, tk.S)) # 累计金额标签 total_amount_label = ttk.Label(main_frame, text=f"所有发票的累计金额: {total_amount:.2f}") total_amount_label.grid(column=0, row=1, pady=5, sticky=tk.W) # 启动事件循环前更新窗口 root.update() # 启动事件循环 root.mainloop() logging.info('Exited main loop') def copy_total_amount_to_clipboard(): root.clipboard_clear() root.clipboard_append(f"{total_amount:.2f}") root.mainloop() logging.info('Exited main loop') tree = None root = None def resize_treeview_columns(event): global tree, root new_width = event.width tree.column("Index", width=int(new_width*0.05), anchor=tk.CENTER) tree.column("Amount", width=int(new_width*0.05), anchor=tk.CENTER) tree.column("Invoice Number", width=int(new_width*0.15), anchor=tk.CENTER) tree.column("Name", width=int(new_width*0.15), anchor=tk.CENTER) tree.column("Date", width=int(new_width*0.1), anchor=tk.CENTER) tree.column("Category", width=int(new_width*0.15), anchor=tk.CENTER) tree.column("Renamed File", width=int(new_width*0.15), anchor=tk.CENTER) tree.update_idletasks() total_amount_label = ttk.Label(main_frame, text=f"所有发票的累计金额: {total_amount:.2f}") total_amount_label.grid(column=0, row=1, pady=5, sticky=tk.W) def copy_total_amount_to_clipboard(): root.clipboard_clear() root.clipboard_append(total_amount_label["text"].split(": ")[1]) copy_button = ttk.Button(main_frame, text="复制总金额", command=copy_total_amount_to_clipboard) copy_button.grid(column=1, row=1, pady=5, padx=5, sticky=tk.W) ttk.Button(main_frame, text="退出", command=root.destroy).grid(column=1, row=2, pady=10, sticky=tk.E) root.columnconfigure(0, weight=1) root.rowconfigure(0, weight=1) root.mainloop() def center_window(root, width=420, height=100): screen_width = root.winfo_screenwidth() screen_height = root.winfo_screenheight() x = (screen_width - width) // 2 y = (screen_height - height) // 2 root.geometry(f"{width}x{height}+{x}+{y}") def browse_folder(entry): folder = filedialog.askdirectory(title="请选择发票PDF文件夹路径") if folder: entry.delete(0, tk.END) entry.insert(0, folder) def start_processing(entry, input_root): logging.info('Start processing') folder = entry.get() if not folder: messagebox.showerror("错误", "请先选择或输入一个文件夹路径") return global pdf_files_folder pdf_files = get_pdf_files(folder) if pdf_files: pdf_files_folder = os.path.dirname(pdf_files[0]) values = [] renamed_pdf_files = [] for pdf_file in pdf_files: pdf_content = read_pdf_content(pdf_file) pdf_text = pdf_content["text"] pattern = r'[¥¥](\d+(?:\.\d{1,2})?)' result = re.findall(pattern, pdf_text) if result: max_value = max([float(i) for i in result]) values.append((max_value, pdf_content["invoice_number"], pdf_content["name"], pdf_content["date"], pdf_content["category"], pdf_file)) # 将原始pdf路径保存而不是重命名后的路径 logging.info(f'Values: {values}') logging.info(f'Renamed pdf files: {renamed_pdf_files}') # Extract the first element of each tuple in values amounts = [value[0] for value in values] total_amount = sum(amounts) logging.info(f'Total amount: {total_amount}') logging.info('Calling display_results function') display_results(values, total_amount, input_root) logging.info('Finished processing') def main(): logging.info('Application start') root = tk.Tk() root.title("发票金额统计") center_window(root) main_frame = ttk.Frame(root, padding="10 10 10 10") main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S)) folder_entry = ttk.Entry(main_frame, width=40) folder_entry.grid(column=0, row=0, padx=5, pady=5, sticky=tk.W) browse_button = ttk.Button(main_frame, text="打开", command=lambda: browse_folder(folder_entry)) browse_button.grid(column=1, row=0, padx=5, pady=5, sticky=tk.W) start_button = ttk.Button(main_frame, text="开始处理", command=lambda: start_processing(folder_entry, root)) start_button.grid(column=1, row=1, padx=5, pady=5, sticky=tk.W) root.columnconfigure(0, weight=1) root.rowconfigure(0, weight=1) root.mainloop() logging.info('Application finished') if __name__ == "__main__": main()
|