工作内容:
1.读取pdf文档内容
2.分页显示
3.每个数据后设置下拉框供手动打标签
4.数据录入txt文档
代码:
import math
import os
import tkinter as tk
from tkinter import *
import tkinter.ttk as ttk
from tkinter.messagebox import *
import time
import pdfplumber as pp# request:pip install pdfplumberclass GUI(object):def __init__(self):print('begin time:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))self.resume_kinds = ('基础资料', '教育经历', '校园经历', '项目经历', '工作经历', '专业技能', '自我评价', '求职意向', '其他')self.file_list = [i for i in os.listdir(r'D:\hk\微信资料\OA测试简历\OA测试简历') if i.endswith('.pdf')]self.content, self.page_num = [], 0self.lb_text, self.cmb = [], []# 主窗口self.root = Tk()self.root.title('简历打标签')self.root.geometry("680x620")# 分页栏self.tab_main = ttk.Notebook()self.tab_main.place(relx=0.05, rely=0.1, relwidth=0.9, relheight=0.8)# 文件下拉框self.cmb_files = ttk.Combobox(self.root, state='readonly')self.cmb_files['value'] = self.file_listself.cmb_files.current(0)self.cmb_files.place(relx=0.3, rely=0, relwidth=0.3, relheight=0.05)# 文件选择提取按钮self.extract_butt = Button(self.root, text='提取', command=lambda: extract_file(self.cmb_files.get()))self.extract_butt.place(relx=0.61, rely=0, relwidth=0.05, relheight=0.05)# 数据录入按钮self.writein_butt = Button(self.root, text='录入', command=lambda: write_in())self.writein_butt.place(relx=0.5, rely=0.91, relwidth=0.05, relheight=0.05)# 文件选择提取按钮响应函数def extract_file(file):if file.endswith('.pdf'):self.content, self.page_num = [], 0self.lb_text, self.cmb = [], []# 获取文本内容path = 'D:\\hk\\微信资料\\OA测试简历\\OA测试简历\\' + fileprint('path:', path)pdf = pp.open(path)pdf.metadatapages = pdf.pagesfor i in pages:text = i.extract_text()lines = text.splitlines()self.content += linesprint(len(self.content))self.page_num = math.ceil(len(self.content) / 21)print('page_num:', self.page_num)self.tab_main.destroy()self.tab_main = ttk.Notebook()self.tab_main.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.85)for i in range(self.page_num):tab = Frame(self.tab_main)tab.pack()labels_area = Frame(tab)cmbs_area = Frame(tab)labels_area.place(relx=0, rely=0, relwidth=0.9, relheight=1)cmbs_area.place(relx=0.9, rely=0, relwidth=0.1, relheight=1)self.tab_main.add(tab, text='%i' % (i + 1))for j in range(21):if i * 21 + j < len(self.content):lb = Label(labels_area, text=self.content[i * 21 + j])# lb.place(relx=0,rely=j/25,relwidth=1,relheight=1/25)lb.pack(anchor=E)self.lb_text.append(self.content[i * 21 + j].replace(' ',''))self.cmb.append(ttk.Combobox(cmbs_area, state='readonly'))self.cmb[-1]['value'] = self.resume_kindsself.cmb[-1].current(0)self.cmb[-1].pack()# 数据录入按钮响应函数def write_in():confirm = askyesno('提示框', '是否录入数据?(此操作会影响文本录入信息)')if confirm:with open('data.txt', 'a', encoding='utf-8') as f:for i in range(len(self.lb_text)):f.write(self.cmb[i].get() + ' ' + self.lb_text[i] + '\n')print('录入成功')print('finish time:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))self.root.mainloop()if __name__ == '__main__':gui = GUI()
界面展示: