Spaces:

DzmitryXXL
/

CV_Parser

Runtime error

App Files Files Community

ddovidovich commited on Aug 30, 2023

Commit

9b70b81

•

1 Parent(s): de0702c

version 0.2

Browse files

Added:
1) tula logo
2) cv samples

Files changed (4) hide show

app.py +103 -46
cv_melanie.jpg +0 -0
cv_patrik.jpg +0 -0
tulaco.png +0 -0

app.py CHANGED Viewed

@@ -19,17 +19,66 @@ from datetime import datetime
 from tempfile import NamedTemporaryFile
 import pypdfium2 as pdfium
-st.subheader("Upload CV in PDF or image format")
-uploaded_file = st.file_uploader("Upload PDF or Images", type=["pdf","png","jpg","jpeg"])
-nltk.download('punkt')
-nltk.download('averaged_perceptron_tagger')
-if uploaded_file:
     file_name, file_extension = os.path.splitext(uploaded_file.name)
     if file_extension != '.pdf':
         uploaded_image = Image.open(uploaded_file)
-        st.image(uploaded_image,width=700)
         img = uploaded_image.convert('RGB')
         loader = UnstructuredPDFLoader(img)
         img.save(file_name+'.pdf')
@@ -38,40 +87,42 @@ if uploaded_file:
         with NamedTemporaryFile(delete=False, dir='.', suffix='.pdf') as f:
             f.write(uploaded_file.getbuffer())
             PDFFileName = f.name
-            pdf = pdfium.PdfDocument(PDFFileName)
-            n_pages = len(pdf)
-            for page_number in range(n_pages):
-                page = pdf.get_page(page_number)
-                pil_image = page.render(scale=4).to_pil()
-                st.image(pil_image,width=700)
-    st.write("Document parsing in progress ...")
-    loader = UnstructuredPDFLoader(PDFFileName)
-    pages = loader.load_and_split()
-    embeddings = OpenAIEmbeddings()
-    docsearch = Chroma.from_documents(pages, embeddings).as_retriever()
-    current_date = datetime.now()
-    query = "Output informatio, (all in English), from the document in JSON format: full name, contacts, age, languages, education, school, places of work, skills.If some fields cannot be filled from the document, then create this field and fill it with N/A. If the date of birth is not indicated, then please calculate the approximate age of the candidate based on the information provided in the document, for calculations, take into account that graduation from the university is usually at 22 years old. Current date = "+ current_date.date().strftime('%Y-%m-%d')
-    docs = docsearch.get_relevant_documents(query)
-    chain = load_qa_chain(ChatOpenAI(temperature=0), chain_type="stuff")
-    output = chain.run(input_documents=docs, question=query)
-    st.subheader("Parsing result in JSON format")
-    valid_json = ast.literal_eval(output)
-    st.json(valid_json)
-    json_data = json.loads(json.dumps(valid_json))
-    names = [json_data.get("full_name", "N/A")]
-    contacts = [json_data.get("contacts", "N/A")]
-    ages = [json_data.get("age", "N/A")]
-    languages = [json_data.get("languages", "N/A")]
-    education = [json_data.get("education", "N/A")]
-    school = [json_data.get("school", "N/A")]
-    works = [json_data.get("places_of_work", "N/A")]
-    skills = [json_data.get("skills", "N/A")]
-    df = pd.DataFrame({
         "name": names,
         "contacts": contacts,
         "age": ages,
@@ -80,9 +131,15 @@ if uploaded_file:
         "school": school,
         "work": works,
         "skill": skills
-    })
-    st.subheader("Parsing result as a table")
-    st.table(df)
-    csv = df.to_csv(index=False).encode('utf-8')
-    download1 = st.download_button(label="Download result as CSV",data=csv,file_name='result_df.csv',mime='text/csv')
-    st.write("Done...")

 from tempfile import NamedTemporaryFile
 import pypdfium2 as pdfium
+examples=["CV.png","cv_patrik.jpg","cv_melanie.jpg"]
+examples_pdf=["CV.pdf","CV_Patrik.pdf","CV_Melanie.pdf"]
+def load_image(image_file):
+	img = Image.open(image_file)
+	return img
+def main():
+  head1, head2 = st.columns(2)
+  with head1:
+    tula_logo=load_image('tulaco.png')
+    st.image(tula_logo,width=200)
+  with head2:
+    st.write('mail@tula.co')
+    st.write('www.tula.co')
+  st.title("CV parsing with Chat GPT")
+  PDFFileName = ''
+  if not "initialized" in st.session_state:
+    st.session_state.isbutton = False
+    st.session_state.initialized = True
+  uploaded_file = st.file_uploader("Upload CV in PDF or image format", type=["pdf","png","jpg","jpeg"])
+  nltk.download('punkt')
+  nltk.download('averaged_perceptron_tagger')
+  st.subheader("CV examples")
+  col1, col2, col3 = st.columns(3)
+  with col1:
+    ex=load_image(examples[0])
+    st.image(ex,width=100)
+    if st.button('Example 1'):
+        ex=load_image(examples[0])
+        img = ex.convert('RGB')
+        loader = UnstructuredPDFLoader(img)
+        img.save('CV.pdf')
+        st.session_state.isbutton=True
+        PDFFileName=examples_pdf[0]
+  with col2:
+    ex1=load_image(examples[1])
+    st.image(ex1,width=100)
+    if st.button('Example 2'):
+        st.session_state.isbutton=True
+        PDFFileName = examples_pdf[1]
+  with col3:
+    ex2=load_image(examples[2])
+    st.image(ex2,width=100)
+    if st.button('Example 3'):
+        st.session_state.isbutton=True
+        PDFFileName = examples_pdf[2]
+  if (uploaded_file is not None) and (st.session_state.isbutton==False):
     file_name, file_extension = os.path.splitext(uploaded_file.name)
     if file_extension != '.pdf':
         uploaded_image = Image.open(uploaded_file)
         img = uploaded_image.convert('RGB')
         loader = UnstructuredPDFLoader(img)
         img.save(file_name+'.pdf')
         with NamedTemporaryFile(delete=False, dir='.', suffix='.pdf') as f:
             f.write(uploaded_file.getbuffer())
             PDFFileName = f.name
+  if PDFFileName != '':
+    pdf = pdfium.PdfDocument(PDFFileName)
+    n_pages = len(pdf)
+    for page_number in range(n_pages):
+      page = pdf.get_page(page_number)
+      pil_image = page.render(scale=4).to_pil()
+      st.image(pil_image,width=700)
+    with st.spinner('Document parsing in progress ...'):
+      loader = UnstructuredPDFLoader(PDFFileName)
+      pages = loader.load_and_split()
+      embeddings = OpenAIEmbeddings()
+      docsearch = Chroma.from_documents(pages, embeddings).as_retriever()
+      current_date = datetime.now()
+      query = "Output informatio, (all in English), from the document in JSON format: full name, contacts, age, languages, education, school, work experience, skills. If some fields cannot be filled from the document, then create this field and fill it with N/A. If the date of birth is not indicated, then please calculate the approximate age of the candidate based on the information provided in the document, for calculations, take into account that graduation from the university is usually at 22 years old. Current date = "+ current_date.date().strftime('%Y-%m-%d')
+      docs = docsearch.get_relevant_documents(query)
+      chain = load_qa_chain(ChatOpenAI(temperature=0), chain_type="stuff")
+      output = chain.run(input_documents=docs, question=query)
+      st.subheader("Parsing result in JSON format")
+      valid_json = ast.literal_eval(output)
+      st.json(valid_json)
+      json_data = json.loads(json.dumps(valid_json))
+      names = [json_data.get("full_name", "N/A")]
+      contacts = [json_data.get("contacts", "N/A")]
+      ages = [json_data.get("age", "N/A")]
+      languages = [json_data.get("languages", "N/A")]
+      education = [json_data.get("education", "N/A")]
+      school = [json_data.get("school", "N/A")]
+      works = [json_data.get("work_experience", "N/A")]
+      skills = [json_data.get("skills", "N/A")]
+      df = pd.DataFrame({
         "name": names,
         "contacts": contacts,
         "age": ages,
         "school": school,
         "work": works,
         "skill": skills
+      })
+      st.subheader("Parsing result as a table")
+      st.table(df)
+      csv = df.to_csv(index=False).encode('utf-8')
+      download1 = st.download_button(label="Download result as CSV",data=csv,file_name='result_df.csv',mime='text/csv')
+      PDFFileName = ''
+      uploaded_file = None
+      st.success("Ready!")
+if __name__ == "__main__":
+    main()

cv_melanie.jpg ADDED Viewed

cv_patrik.jpg ADDED Viewed

tulaco.png ADDED Viewed