Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| import logging | |
| import re | |
| from chromadb import PersistentClient | |
| from sentence_transformers import SentenceTransformer | |
| from langchain_groq import ChatGroq | |
| from rag_utils_updated import extract_text, preprocess_text, get_embeddings, is_image_pdf, assess_cv, extract_job_requirements | |
| import plotly.graph_objects as go | |
| from dotenv import load_dotenv | |
| #from huggingface_hub import get_secret, SecretNotFoundError | |
| # Logging setup | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| logger = logging.getLogger(__name__) | |
| load_dotenv() | |
| # Test if LLM_PROMPT is loaded correctly | |
| if os.environ.get("LLM_PROMPT") is None: | |
| st.error("LLM_PROMPT is missing. Check your .env file!") | |
| # Initialize session state (ONLY for job description and flags) | |
| if "job_description" not in st.session_state: | |
| st.session_state.job_description = "" | |
| if "continue_to_detailed_assessment" not in st.session_state: | |
| st.session_state.continue_to_detailed_assessment = False | |
| if "requirements" not in st.session_state: | |
| st.session_state.requirements = None | |
| if "detailed_assessments" not in st.session_state: | |
| st.session_state.detailed_assessments = {} # Initialize as an empty dictionary | |
| if "chromadb_initialized" not in st.session_state: | |
| st.session_state.chromadb_initialized = False | |
| if "cvs" not in st.session_state: | |
| st.session_state.cvs = {} | |
| if "job_description_embedding" not in st.session_state: | |
| st.session_state.job_description_embedding = None | |
| # Initialize session state variable | |
| if "assessment_completed" not in st.session_state: | |
| st.session_state.assessment_completed = False | |
| # Persistent Storage for Embeddings | |
| PERMANENT_DB_PATH = "./cv_db" | |
| if "collection" not in st.session_state: | |
| db_client = PersistentClient(path=PERMANENT_DB_PATH) | |
| st.session_state.collection = db_client.get_or_create_collection("cv_embeddings") | |
| if "embedding_model" not in st.session_state: | |
| st.session_state.embedding_model = SentenceTransformer('all-mpnet-base-v2') | |
| if "groq_client" not in st.session_state: | |
| st.session_state.groq_client = ChatGroq(api_key=os.environ.get("GROQ_API_KEY")) | |
| st.title("CV Assessment and Ranking App") | |
| # 1. Input Job Description | |
| st.subheader("Enter Job Description") | |
| requirements_source = st.radio("Source:", ("File Upload", "Web Page Link", "Text Input")) | |
| job_description_text = "" | |
| if requirements_source == "File Upload": | |
| uploaded_file = st.file_uploader("Upload Job Requirements (PDF/DOCX)", type=["pdf", "docx"]) | |
| if uploaded_file: | |
| job_description_text = extract_text(uploaded_file) | |
| elif requirements_source == "Web Page Link": | |
| # webpage_url = st.text_input("Enter Web Page URL") | |
| # if webpage_url: | |
| # job_description_text = extract_text(webpage_url) | |
| st.warning("This function is not available in MVP yet.") | |
| elif requirements_source == "Text Input": | |
| job_description_text = st.text_area("Enter Job Requirements", height=200) | |
| st.session_state.job_description = job_description_text | |
| if st.session_state.job_description: | |
| st.success("Job description uploaded successfully!") | |
| # 2. Upload CVs (Folder Upload) | |
| st.subheader("Upload CVs (Folder)") | |
| uploaded_files = st.file_uploader("Choose a folder containing CV files", accept_multiple_files=True) | |
| if uploaded_files and not st.session_state.assessment_completed: | |
| st.write(f"{len(uploaded_files)} CV(s) uploaded.") | |
| st.session_state.cvs = {} | |
| cv_embeddings_created = 0 | |
| if not st.session_state.chromadb_initialized: | |
| try: | |
| ids_in_collection = st.session_state.collection.get()['ids'] | |
| if ids_in_collection: | |
| st.session_state.collection.delete(ids=ids_in_collection) | |
| logger.info("ChromaDB collection cleared.") | |
| else: | |
| logger.info("ChromaDB collection is already empty. Skipping deletion.") | |
| except Exception as e: | |
| st.error(f"Error clearing ChromaDB collection: {e}") | |
| st.stop() | |
| st.session_state.chromadb_initialized = True | |
| for uploaded_file in uploaded_files: | |
| filename = uploaded_file.name | |
| if filename in st.session_state.cvs: | |
| continue | |
| for attempt in range(2): | |
| try: | |
| if is_image_pdf(uploaded_file): | |
| st.warning(f"{filename} appears to be an image-based PDF and cannot be processed.") | |
| break | |
| text = extract_text(uploaded_file) | |
| if not text.strip(): | |
| raise ValueError("No text extracted.") | |
| preprocessed_text = preprocess_text(text) | |
| embedding = get_embeddings(preprocessed_text, st.session_state.embedding_model) | |
| st.session_state.cvs[filename] = { | |
| "text": preprocessed_text, | |
| "embedding": embedding, | |
| } | |
| cv_embeddings_created += 1 | |
| try: | |
| st.session_state.collection.add( | |
| embeddings=[embedding], | |
| documents=[preprocessed_text], | |
| ids=[filename], | |
| metadatas=[{"filename": filename}] | |
| ) | |
| logger.info(f"Embedding for {filename} added to ChromaDB.") | |
| except Exception as e: | |
| st.error(f"Error adding embedding to ChromaDB for {filename}: {e}") | |
| st.stop() | |
| break | |
| except Exception as e: | |
| logger.error(f"Text extraction failed for {filename} on attempt {attempt + 1}: {e}") | |
| if attempt == 1: | |
| st.error(f"Failed to process {filename} after multiple attempts.") | |
| if cv_embeddings_created > 0: | |
| st.success(f"{cv_embeddings_created} CV embeddings created successfully!") | |
| num_errors = len(uploaded_files) - cv_embeddings_created | |
| if num_errors > 0: | |
| st.error(f"Error in CV embeddings creation for {num_errors} CV(s).") | |
| if st.button("Continue Assessment"): | |
| st.session_state.continue_to_detailed_assessment = True | |
| elif uploaded_files and st.session_state.assessment_completed: | |
| st.warning("This is an MVP. Please refresh the page before uploading and assessing new files.") | |
| if st.session_state.continue_to_detailed_assessment: | |
| st.session_state.continue_to_detailed_assessment = False # reset value | |
| st.write("Performing detailed assessments...") | |
| # Extract Job Requirements | |
| if st.session_state.job_description and st.session_state.requirements is None: | |
| st.session_state.requirements = extract_job_requirements(st.session_state.job_description, st.session_state.groq_client) | |
| if st.session_state.requirements: | |
| with st.expander("Extracted Job Requirements:"): | |
| for req in st.session_state.requirements: | |
| st.write(f"- {req}") | |
| # st.write("Extracted Job Requirements:") | |
| # for req in st.session_state.requirements: | |
| # st.write(f"- {req}") | |
| else: | |
| st.warning("Could not extract job requirements.") | |
| # Generate job description embedding if not already done | |
| if st.session_state.job_description and st.session_state.job_description_embedding is None: | |
| try: | |
| job_description_embedding = get_embeddings(st.session_state.job_description, st.session_state.embedding_model) | |
| st.session_state.job_description_embedding = job_description_embedding | |
| except Exception as e: | |
| st.error(f"Error creating job description embedding: {e}") | |
| st.stop() | |
| # Detailed CV Assessments | |
| selected_cvs = list(st.session_state.cvs.keys()) | |
| if not st.session_state.detailed_assessments: | |
| st.session_state.detailed_assessments = {} | |
| with st.spinner("Performing detailed assessments..."): | |
| for filename in selected_cvs: | |
| if filename in st.session_state.cvs: | |
| cv_text = st.session_state.cvs[filename]["text"] | |
| try: | |
| assessment = assess_cv(cv_text, st.session_state.requirements, filename, st.session_state.groq_client) | |
| st.session_state.detailed_assessments[filename] = assessment | |
| except Exception as e: | |
| st.error(f"Error during detailed assessment of {filename}: {e}") | |
| # Display Results (Remaining part of the code) | |
| st.session_state.assessment_completed = True | |
| st.success("Detailed assessments complete!") | |
| st.subheader("Candidates Assessment and Ranking") | |
| def parse_assessment(raw_response, requirements): | |
| """Parses the LLM's assessment with robust error handling.""" | |
| matches = { | |
| "technical_lead": "Not Found", | |
| "hr_specialist": "Not Found", | |
| "project_manager": "Not Found", | |
| "final_assessment": "Not Found", | |
| "recommendation": "Not Found", | |
| "technical_lead_score": "Not Found", | |
| "hr_specialist_score": "Not Found", | |
| "project_manager_score": "Not Found", | |
| "final_assessment_score": "Not Found", | |
| } | |
| try: | |
| # Parse labeled scores | |
| technical_lead_match = re.search(r"Technical Lead Assessment:\s*(.*?)\s*Technical Lead Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) | |
| if technical_lead_match: | |
| matches["technical_lead"] = technical_lead_match.group(1).strip() | |
| matches["technical_lead_score"] = technical_lead_match.group(2) | |
| hr_specialist_match = re.search(r"HR Specialist Assessment:\s*(.*?)\s*HR Specialist Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) | |
| if hr_specialist_match: | |
| matches["hr_specialist"] = hr_specialist_match.group(1).strip() | |
| matches["hr_specialist_score"] = hr_specialist_match.group(2) | |
| project_manager_match = re.search(r"Project Manager Assessment:\s*(.*?)\s*Project Manager Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) | |
| if project_manager_match: | |
| matches["project_manager"] = project_manager_match.group(1).strip() | |
| matches["project_manager_score"] = project_manager_match.group(2) | |
| final_assessment_match = re.search(r"Final Assessment:\s*(.*?)\s*Final Assessment Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) | |
| if final_assessment_match: | |
| matches["final_assessment"] = final_assessment_match.group(1).strip() | |
| matches["final_assessment_score"] = final_assessment_match.group(2) | |
| recommendation_match = re.search(r"Recommendation:\s*(.*?)$", raw_response, re.IGNORECASE | re.DOTALL) | |
| if recommendation_match: | |
| matches["recommendation"] = recommendation_match.group(1).strip() | |
| # Fallback mechanism: extract scores from raw response if labels are not found | |
| if matches["technical_lead_score"] == "Not Found": | |
| score_match = re.search(r"Technical Lead Assessment:.*?score(?:s)?\s*(?:of)?\s*(\d+)\s*(?:out\s*of|\/)\s*100", raw_response, re.IGNORECASE | re.DOTALL) | |
| if score_match: | |
| matches["technical_lead_score"] = score_match.group(1) | |
| if matches["hr_specialist_score"] == "Not Found": | |
| score_match = re.search(r"HR Specialist Assessment:.*?score(?:s)?\s*(?:of)?\s*(\d+)\s*(?:out\s*of|\/)\s*100", raw_response, re.IGNORECASE | re.DOTALL) | |
| if score_match: | |
| matches["hr_specialist_score"] = score_match.group(1) | |
| if matches["project_manager_score"] == "Not Found": | |
| score_match = re.search(r"Project Manager Assessment:.*?score(?:s)?\s*(?:of)?\s*(\d+)\s*(?:out\s*of|\/)\s*100", raw_response, re.IGNORECASE | re.DOTALL) | |
| if score_match: | |
| matches["project_manager_score"] = score_match.group(1) | |
| if matches["final_assessment_score"] == "Not Found": | |
| score_match = re.search(r"Final Assessment:.*?(?:Consensus Score|total of|final score).*?(\d+)\s*(?:out of)?\s*100", raw_response, re.IGNORECASE | re.DOTALL) | |
| if score_match: | |
| matches["final_assessment_score"] = score_match.group(1) | |
| except Exception as e: | |
| print(f"Error parsing assessment: {e}") | |
| return matches | |
| # Data frame logic | |
| if st.session_state.detailed_assessments: | |
| assessments_df = pd.DataFrame(columns=["filename", | |
| "final_assessment_score", "final_assessment", | |
| "technical_lead_score", "technical_lead", | |
| "hr_specialist_score", "hr_specialist", | |
| "project_manager_score", "project_manager", | |
| "recommendation" | |
| ]) | |
| for filename, assessment in st.session_state.detailed_assessments.items(): | |
| if "error" in assessment: | |
| st.error(assessment["error"]) | |
| elif "raw_response" in assessment: | |
| parsed_data = parse_assessment(assessment["raw_response"], st.session_state.requirements) | |
| # Append the new dictionary as a row | |
| assessments_df = pd.concat([assessments_df, pd.DataFrame([parsed_data])], ignore_index=True) | |
| assessments_df.loc[assessments_df.index[-1], 'filename'] = filename | |
| #st.write("---") | |
| # Sort the DataFrame by 'final_assessment_score' in descending order | |
| # Convert the column to numeric before sorting | |
| assessments_df['final_assessment_score'] = pd.to_numeric(assessments_df['final_assessment_score'], errors='coerce') #coerce turns non numeric values to NaN. | |
| assessments_df = assessments_df.sort_values(by='final_assessment_score', ascending=False) | |
| st.dataframe(assessments_df) | |
| st.subheader("Detailed Assessment Results") | |
| # Iterate through the DataFrame rows to display the UI for each assessment | |
| for index, row in assessments_df.iterrows(): | |
| st.write(f"**Filename:** {row['filename']}") | |
| scores = { | |
| "Technical Lead": int(row["technical_lead_score"]), | |
| "HR Specialist": int(row["hr_specialist_score"]), | |
| "Project Manager": int(row["project_manager_score"]), | |
| "Final Assessment": int(row["final_assessment_score"]), | |
| } | |
| scores_df = pd.DataFrame(list(scores.items()), columns=["Expert", "Score"]) | |
| # Create Plotly bar chart with annotations | |
| fig = go.Figure(data=[go.Bar( | |
| x=scores_df["Expert"], | |
| y=scores_df["Score"], | |
| text=scores_df["Score"], | |
| textposition='auto', | |
| )]) | |
| fig.update_layout(yaxis_range=[0, 100]) | |
| # Create columns layout | |
| col1, col2 = st.columns([1, 3]) | |
| # Display bar chart in the first column | |
| with col1: | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Display collapsed panels in the second column | |
| with col2: | |
| with st.expander("Technical Lead Assessment"): | |
| st.write(f"{row['technical_lead']}") | |
| st.write(f"**Technical Lead Score:** {row['technical_lead_score']}") | |
| with st.expander("HR Specialist Assessment"): | |
| st.write(f"{row['hr_specialist']}") | |
| st.write(f"**HR Specialist Score:** {row['hr_specialist_score']}") | |
| with st.expander("Project Manager Assessment"): | |
| st.write(f"{row['project_manager']}") | |
| st.write(f"**Project Manager Score:** {row['project_manager_score']}") | |
| with st.expander("Final Assessment"): | |
| st.write(f"{row['final_assessment']}") | |
| st.write(f"**Final Assessment Score:** {row['final_assessment_score']}") | |
| with st.expander("Recommendation"): | |
| st.write(f"{row['recommendation']}") | |
| st.write("---") | |
| else: | |
| st.write("No detailed assessments were performed.") | |