|
from dotenv import load_dotenv |
|
import io |
|
import streamlit as st |
|
import streamlit.components.v1 as components |
|
import base64 |
|
|
|
from langchain.prompts import PromptTemplate |
|
from langchain_core.output_parsers import PydanticOutputParser |
|
from langchain_anthropic import ChatAnthropic |
|
from langchain_openai import ChatOpenAI |
|
from langchain_groq import ChatGroq |
|
from langchain_google_genai import ChatGoogleGenerativeAI |
|
from langchain_core.exceptions import OutputParserException |
|
from pydantic import ValidationError |
|
from langchain_core.pydantic_v1 import BaseModel, Field |
|
from resume_template import Resume |
|
from json import JSONDecodeError |
|
import PyPDF2 |
|
import json |
|
import time |
|
import os |
|
|
|
|
|
|
|
os.environ['LANGCHAIN_TRACING_V2'] = 'true' |
|
|
|
|
|
os.environ['LANGCHAIN_PROJECT'] = 'Resume_Project' |
|
|
|
load_dotenv() |
|
llm_dict = { |
|
"GPT 3.5 turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0125"), |
|
"GPT 4o": ChatOpenAI(temperature=0, model_name="gpt-4o"), |
|
"Anthropic 3.5 Sonnet": ChatAnthropic(model="claude-3-5-sonnet-20240620"), |
|
"Llama 3 8b": ChatGroq(model_name="llama3-8b-8192"), |
|
"Llama 3 70b": ChatGroq(model_name="llama3-70b-8192"), |
|
"Gemma 7b": ChatGroq(model_name="gemma-7b-it"), |
|
"Mixtral 8x7b": ChatGroq(model_name="mixtral-8x7b-32768"), |
|
"Gemini 1.5 Pro": ChatGoogleGenerativeAI(model="gemini-1.5-pro"), |
|
"Gemini 1.5 Flash": ChatGoogleGenerativeAI(model="gemini-1.5-flash"), |
|
} |
|
def pdf_to_string(file): |
|
""" |
|
Convert a PDF file to a string. |
|
|
|
Parameters: |
|
file (io.BytesIO): A file-like object representing the PDF file. |
|
|
|
Returns: |
|
str: The extracted text from the PDF. |
|
""" |
|
pdf_reader = PyPDF2.PdfReader(file) |
|
num_pages = len(pdf_reader.pages) |
|
text = '' |
|
for i in range(num_pages): |
|
page = pdf_reader.pages[i] |
|
text += page.extract_text() |
|
file.close() |
|
return text |
|
|
|
class CustomOutputParserException(Exception): |
|
pass |
|
|
|
def extract_resume_fields(full_text, model): |
|
""" |
|
Analyze a resume text and extract structured information using a specified language model. |
|
Parameters: |
|
full_text (str): The text content of the resume. |
|
model (str): The language model object to use for processing the text. |
|
Returns: |
|
dict: A dictionary containing structured information extracted from the resume. |
|
""" |
|
|
|
|
|
with open("prompts/resume_extraction.prompt", "r") as f: |
|
template = f.read() |
|
|
|
parser = PydanticOutputParser(pydantic_object=Resume) |
|
|
|
prompt_template = PromptTemplate( |
|
template=template, |
|
input_variables=["resume"], |
|
partial_variables={"response_template": parser.get_format_instructions()}, |
|
) |
|
llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model)) |
|
|
|
chain = prompt_template | llm | parser |
|
max_attempts = 3 |
|
attempt = 1 |
|
|
|
while attempt <= max_attempts: |
|
try: |
|
output = chain.invoke(full_text) |
|
print(output) |
|
return output |
|
except (CustomOutputParserException, ValidationError) as e: |
|
if attempt == max_attempts: |
|
raise e |
|
else: |
|
print(f"Parsing error occurred. Retrying (attempt {attempt + 1}/{max_attempts})...") |
|
attempt += 1 |
|
|
|
return None |
|
|
|
def display_extracted_fields(obj, section_title=None, indent=0): |
|
if section_title: |
|
st.subheader(section_title) |
|
for field_name, field_value in obj: |
|
if field_name in ["personal_details", "education", "work_experience", "projects", "skills", "certifications", "publications", "awards", "additional_sections"]: |
|
st.write(" " * indent + f"**{field_name.replace('_', ' ').title()}**:") |
|
if isinstance(field_value, BaseModel): |
|
display_extracted_fields(field_value, None, indent + 1) |
|
elif isinstance(field_value, list): |
|
for item in field_value: |
|
if isinstance(item, BaseModel): |
|
display_extracted_fields(item, None, indent + 1) |
|
else: |
|
st.write(" " * (indent + 1) + "- " + str(item)) |
|
else: |
|
st.write(" " * (indent + 1) + str(field_value)) |
|
else: |
|
st.write(" " * indent + f"{field_name.replace('_', ' ').title()}: " + str(field_value)) |
|
|
|
def get_json_download_link(json_str, download_name): |
|
|
|
data = json.loads(json_str) |
|
|
|
|
|
json_str_formatted = json.dumps(data, indent=4) |
|
|
|
b64 = base64.b64encode(json_str_formatted.encode()).decode() |
|
href = f'<a href="data:file/json;base64,{b64}" download="{download_name}.json">Click here to download the JSON file</a>' |
|
return href |