import streamlit as st from dotenv import load_dotenv import os from typing import Any, List, Optional from llama_index.llama_pack.base import BaseLlamaPack from llama_index.llms.base import LLM from llama_index.llms import OpenAI from llama_index import ServiceContext from llama_index.schema import NodeWithScore from llama_index.response_synthesizers import TreeSummarize from pydantic import BaseModel import PyPDF2 import io # Load environment variables from .env file load_dotenv() # Get OpenAI API key from environment variables openai_api_key = os.getenv("OPENAI_API_KEY") QUERY_TEMPLATE = """ You are an expert resume reviewer. Your job is to decide if the candidate passes the resume screen given the job description and a list of criteria: ### Job Description {job_description} ### Screening Criteria {criteria_str} """ class CriteriaDecision(BaseModel): """The decision made based on a single criterion""" decision: bool reasoning: str class ResumeScreenerDecision(BaseModel): """The decision made by the resume screener""" criteria_decisions: List[CriteriaDecision] overall_reasoning: str overall_decision: bool def _format_criteria_str(criteria: List[str]) -> str: criteria_str = "" for criterion in criteria: criteria_str += f"- {criterion}\n" return criteria_str class ResumeScreenerPack(BaseLlamaPack): def __init__( self, job_description: str = "", criteria: List[str] = [], llm: Optional[LLM] = None ) -> None: llm = llm or OpenAI(model="gpt-4", api_key=openai_api_key) service_context = ServiceContext.from_defaults(llm=llm) criteria_str = _format_criteria_str(criteria) self.query = QUERY_TEMPLATE.format( job_description=job_description, criteria_str=criteria_str ) self.synthesizer = TreeSummarize( output_cls=ResumeScreenerDecision, service_context=service_context ) def get_modules(self) -> dict: """Get modules.""" return {"synthesizer": self.synthesizer} def run(self, resume_text: str) -> Any: """Run pack.""" node_with_score_input = { "metadata": {}, # Provide any necessary metadata "content": resume_text, # Use extracted text as content "type": "resume_text", # Define the type as per your schema } output = self.synthesizer.synthesize( query=self.query, nodes=[NodeWithScore(node=node_with_score_input, score=1.0)], ) return output.response def main(): st.title("Resume Screener App") # Sidebar for user input job_description = st.text_area("Job Description") criteria = st.text_area("Screening Criteria (separate each criterion by a new line)") uploaded_file = st.file_uploader("Upload Resume (PDF)", type=["pdf"]) if st.button("Submit"): if job_description and criteria and uploaded_file: resume_text = extract_text_from_pdf(uploaded_file) screener_pack = ResumeScreenerPack(job_description=job_description, criteria=criteria.split("\n")) with st.spinner("Analyzing the resume..."): result = screener_pack.run(resume_text) st.subheader("Screening Results") st.json(result) def extract_text_from_pdf(uploaded_file): if uploaded_file is not None: try: # Read PDF content using PyPDF2's PdfReader pdf_reader = PyPDF2.PdfReader(uploaded_file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text except Exception as e: st.error(f"Error extracting text from PDF: {str(e)}") return "" else: st.error("Please upload a PDF file.") return "" if __name__ == "__main__": main()