Spaces:

cm0805
/

ResuMate_NVIDIA

Sleeping

App Files Files Community

cm0805 commited on May 31, 2023

Commit

0a510f9

•

1 Parent(s): 54f4f78

Create utils.py

Browse files

Files changed (1) hide show

utils.py +135 -0

utils.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import PyPDF2
+from docx import Document
+from pptx import Presentation
+from nlp import get_average_similarity_scores
+import numpy as np
+import plotly.graph_objects as go
+import os
+import tempfile
+import shutil
+# Langchain document loaders
+from langchain.document_loaders import PyPDFLoader #for pdf files
+from langchain.document_loaders import TextLoader  #for text files
+from langchain.document_loaders import Docx2txtLoader  #for docx files
+from langchain.document_loaders import UnstructuredPowerPointLoader  #for pptx files
+from constants import StreamlitException
+from PyPDF2.errors import PdfReadError
+from zipfile import BadZipFile
+def load_file(st, uploaded_file):
+    # uploaded_file is the output of st.sidebar.file_uploader
+    file_type = uploaded_file.type
+    try:
+        os.mkdir('downloaded_files')
+    except FileExistsError:
+        pass
+    download_path = os.path.join('downloaded_files', uploaded_file.name)
+    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+        # Write the contents of the uploaded file to the temporary file
+        tmp_file.write(uploaded_file.read())
+        tmp_file.flush()
+        shutil.copy(tmp_file.name, download_path)
+    try:
+        if file_type == "application/pdf":
+            resume_text_raw = extract_pdf_text(uploaded_file)
+            lang_loader = PyPDFLoader(download_path)
+        elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+            resume_text_raw = extract_word_text(uploaded_file)
+            lang_loader = Docx2txtLoader(download_path)
+        elif file_type == "application/vnd.ms-powerpoint" or file_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
+            resume_text_raw = extract_ppt_text(uploaded_file)
+            lang_loader = UnstructuredPowerPointLoader(download_path)
+        else:
+            return StreamlitException("**Error**: Invalid file format. Please upload a Word, PDF, or PowerPoint file.")
+    except (PdfReadError, BadZipFile):
+            return StreamlitException("**Error**: Invalid file content. Please upload a valid Word, PDF, or PowerPoint file.")
+    return resume_text_raw, lang_loader
+# Function to extract text from a PDF file
+def extract_pdf_text(file):
+    pdf_reader = PyPDF2.PdfReader(file)
+    text = ""
+    for page in pdf_reader.pages:
+        lines = page.extract_text().split('\n')
+        for line in lines:
+            text += line.strip() + ".\n"
+    return text
+# Function to extract text from a Word file
+def extract_word_text(file):
+    doc = Document(file)
+    text = ''
+    p_iter = iter(doc.paragraphs)
+    t_iter = iter(doc.tables)
+    while True:
+        try:
+            paragraph = next(p_iter)
+            text += paragraph.text + '.\n'
+        except StopIteration:
+            break
+        try:
+            table = next(t_iter)
+            for row in table.rows:
+                for cell in row.cells:
+                    text += cell.text + '\t'
+                text += '\n'
+        except StopIteration:
+            pass
+    return text
+# Function to extract text from a PowerPoint file
+def extract_ppt_text(file):
+    prs = Presentation(file)
+    text = ""
+    for slide in prs.slides:
+        for shape in slide.shapes:
+            if shape.has_text_frame:
+                text += shape.text_frame.text
+    return text
+# Function to plot the average similarity score for each job description phrase
+def plot_similarity_scores(job_description_phrases, resume_phrases):
+    avg_similarity_scores = get_average_similarity_scores(job_description_phrases, resume_phrases)
+    sorted_scores = sorted(enumerate(avg_similarity_scores), key=lambda x: x[1], reverse=True)[:10]
+    indices = [i[0] for i in sorted_scores]
+    sorted_scores = [i[1] for i in sorted_scores]
+    y_pos = list(range(len(indices)))
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+        y=y_pos,
+        x=sorted_scores,
+        orientation='h'
+    ))
+    fig.update_layout(
+        yaxis=dict(
+            tickmode="array",
+            tickvals=y_pos,
+            ticktext=[s[:100].ljust(100) + '...' if len(s) > 100 else s.ljust(75) for s in np.array(job_description_phrases)[indices]],
+            tickfont=dict(size=14),
+            autorange="reversed",
+            side="right",
+            automargin=True
+        ),
+        xaxis=dict(
+            tickmode="array",
+            tickvals=np.round(np.arange(0, 1.2, 0.2), 2),
+            ticktext=np.round(np.arange(0, 1.2, 0.2), 2),
+            tickfont=dict(size=14),
+            range=[0, 1.05]
+        ),
+        showlegend=False,
+        margin=dict(t=0)
+    )
+    fig.update_xaxes(title="Average Similarity Score", title_font=dict(size=14))
+    return fig