Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import PyPDF2 | |
| import docx | |
| import requests | |
| import json | |
| import re | |
| from typing import Union, Dict, Any | |
| # Text extraction functions | |
| def extract_text_from_pdf(file) -> str: | |
| """Extract text from PDF file""" | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| return " ".join(page.extract_text() or "" for page in pdf_reader.pages) | |
| except Exception as e: | |
| raise ValueError(f"PDF extraction failed: {str(e)}") | |
| def extract_text_from_docx(file) -> str: | |
| """Extract text from Word document""" | |
| try: | |
| doc = docx.Document(file) | |
| return "\n".join(para.text for para in doc.paragraphs if para.text) | |
| except Exception as e: | |
| raise ValueError(f"DOCX extraction failed: {str(e)}") | |
| def process_uploaded_file(file) -> str: | |
| """Process uploaded file based on its type""" | |
| if not file.name: | |
| raise ValueError("No filename provided") | |
| filename = file.name.lower() | |
| if filename.endswith(".pdf"): | |
| return extract_text_from_pdf(file) | |
| elif filename.endswith(".docx"): | |
| return extract_text_from_docx(file) | |
| raise ValueError("Unsupported file format. Please upload PDF or Word document.") | |
| def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]: | |
| """Extract JSON from text response with robust error handling""" | |
| try: | |
| # First try parsing the entire text as JSON | |
| return json.loads(text) | |
| except json.JSONDecodeError: | |
| # If that fails, try to find JSON within the text | |
| json_match = re.search(r'\{[\s\S]*\}', text) | |
| if json_match: | |
| try: | |
| return json.loads(json_match.group()) | |
| except json.JSONDecodeError: | |
| pass | |
| return None | |
| def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]: | |
| """Analyze resume using Hugging Face API with enhanced error handling""" | |
| try: | |
| # Process file and validate inputs | |
| resume_text = process_uploaded_file(file) | |
| if not resume_text.strip(): | |
| return {"error": "Extracted resume text is empty"} | |
| HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY") | |
| if not HF_API_KEY: | |
| return {"error": "Hugging Face API key not configured"} | |
| # Prepare the prompt with strict JSON instructions | |
| prompt = """<s>[INST] <<SYS>> | |
| You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure: | |
| { | |
| "analysis": { | |
| "keywords": {"score": 0-100, "recommendations": []}, | |
| "formatting": {"score": 0-100, "recommendations": []}, | |
| "skills": {"score": 0-100, "recommendations": []}, | |
| "experience": {"score": 0-100, "recommendations": []}, | |
| "education": {"score": 0-100, "recommendations": []} | |
| }, | |
| "overall_score": 0-100, | |
| "summary": "Brief overall assessment" | |
| } | |
| Important: | |
| 1. Generate actual scores based on content | |
| 2. Provide specific recommendations | |
| 3. Return ONLY the JSON object | |
| 4. No additional text or explanations | |
| <</SYS>> | |
| Resume: {resume} | |
| Job Description: {jd} | |
| [/INST]""".format( | |
| resume=resume_text[:3000], | |
| jd=job_description[:1000] | |
| ) | |
| # Make API request | |
| response = requests.post( | |
| "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3", | |
| headers={"Authorization": f"Bearer {HF_API_KEY}"}, | |
| json={ | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_new_tokens": 800, | |
| "temperature": 0.3, | |
| "do_sample": False | |
| } | |
| }, | |
| timeout=45 | |
| ) | |
| # Handle API errors | |
| if response.status_code != 200: | |
| error_msg = response.json().get("error", "Unknown API error") | |
| return { | |
| "error": f"API request failed: {error_msg}", | |
| "api_response": response.text | |
| } | |
| # Process response | |
| raw_output = response.json()[0]['generated_text'] | |
| # Try to extract JSON | |
| result = extract_json_from_text(raw_output) | |
| if result: | |
| # Validate JSON structure | |
| required_keys = {"analysis", "overall_score"} | |
| if all(key in result for key in required_keys): | |
| return { | |
| "structured_result": result, | |
| "raw_response": raw_output | |
| } | |
| # If JSON extraction failed, return raw output | |
| return { | |
| "raw_response": raw_output, | |
| "error": "Could not extract valid JSON" | |
| } | |
| except requests.exceptions.RequestException as e: | |
| return {"error": f"Network error: {str(e)}"} | |
| except Exception as e: | |
| return {"error": f"Analysis failed: {str(e)}"} | |
| # Enhanced Gradio Interface | |
| with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("""# ATS Resume Analyzer""") | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_input = gr.File( | |
| label="Upload Resume (PDF or DOCX)", | |
| file_types=[".pdf", ".docx"], | |
| type="filepath" | |
| ) | |
| jd_input = gr.Textbox( | |
| label="Job Description", | |
| lines=8, | |
| placeholder="Paste the job description here..." | |
| ) | |
| analyze_btn = gr.Button("Analyze", variant="primary") | |
| with gr.Column(): | |
| output_tabs = gr.Tabs() | |
| with output_tabs: | |
| with gr.Tab("Analysis Results"): | |
| json_output = gr.JSON(label="Structured Analysis") | |
| summary_output = gr.Textbox(label="Summary", interactive=False) | |
| with gr.Tab("API Response"): | |
| raw_output = gr.Textbox(label="Raw API Response", lines=10) | |
| with gr.Tab("Debug Info"): | |
| status_output = gr.Textbox(label="Status Info", lines=5) | |
| status = gr.Textbox(label="Status", interactive=False) | |
| def display_results(file, job_description): | |
| """Handle results display with proper error handling""" | |
| result = analyze_with_huggingface(file, job_description) | |
| output = { | |
| json_output: None, | |
| summary_output: None, | |
| raw_output: None, | |
| status_output: None, | |
| status: "" | |
| } | |
| if "error" in result: | |
| output.update({ | |
| status: f"Error: {result['error']}", | |
| status_output: str(result), | |
| raw_output: result.get("api_response") or result.get("raw_response") or "" | |
| }) | |
| elif "structured_result" in result: | |
| structured = result["structured_result"] | |
| output.update({ | |
| json_output: structured["analysis"], | |
| summary_output: structured.get("summary", ""), | |
| raw_output: result["raw_response"], | |
| status: "Analysis complete!", | |
| status_output: "Successfully parsed JSON response" | |
| }) | |
| else: | |
| output.update({ | |
| raw_output: result.get("raw_response", "No response received"), | |
| status: "Received non-JSON response", | |
| status_output: "The API returned a response but it couldn't be parsed as JSON" | |
| }) | |
| return output | |
| analyze_btn.click( | |
| fn=display_results, | |
| inputs=[file_input, jd_input], | |
| outputs=[json_output, summary_output, raw_output, status_output, status] | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch( | |
| allowed_paths=["*"], | |
| ) |