Spaces:

Mangesh223
/

DefendModel

Sleeping

App Files Files Community

DefendModel / app.py

Mangesh223

Update app.py

6ff26ad verified 11 months ago

raw

history blame contribute delete

7.68 kB

	import os
	import gradio as gr
	import PyPDF2
	import docx
	import requests
	import json
	import re
	from typing import Union, Dict, Any

	# Text extraction functions
	def extract_text_from_pdf(file) -> str:
	"""Extract text from PDF file"""
	try:
	pdf_reader = PyPDF2.PdfReader(file)
	return " ".join(page.extract_text() or "" for page in pdf_reader.pages)
	except Exception as e:
	raise ValueError(f"PDF extraction failed: {str(e)}")

	def extract_text_from_docx(file) -> str:
	"""Extract text from Word document"""
	try:
	doc = docx.Document(file)
	return "\n".join(para.text for para in doc.paragraphs if para.text)
	except Exception as e:
	raise ValueError(f"DOCX extraction failed: {str(e)}")

	def process_uploaded_file(file) -> str:
	"""Process uploaded file based on its type"""
	if not file.name:
	raise ValueError("No filename provided")

	filename = file.name.lower()
	if filename.endswith(".pdf"):
	return extract_text_from_pdf(file)
	elif filename.endswith(".docx"):
	return extract_text_from_docx(file)
	raise ValueError("Unsupported file format. Please upload PDF or Word document.")

	def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]:
	"""Extract JSON from text response with robust error handling"""
	try:
	# First try parsing the entire text as JSON
	return json.loads(text)
	except json.JSONDecodeError:
	# If that fails, try to find JSON within the text
	json_match = re.search(r'\{[\s\S]*\}', text)
	if json_match:
	try:
	return json.loads(json_match.group())
	except json.JSONDecodeError:
	pass
	return None

	def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]:
	"""Analyze resume using Hugging Face API with enhanced error handling"""
	try:
	# Process file and validate inputs
	resume_text = process_uploaded_file(file)
	if not resume_text.strip():
	return {"error": "Extracted resume text is empty"}

	HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
	if not HF_API_KEY:
	return {"error": "Hugging Face API key not configured"}

	# Prepare the prompt with strict JSON instructions
	prompt = """<s>[INST] <<SYS>>
	You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure:
	{
	"analysis": {
	"keywords": {"score": 0-100, "recommendations": []},
	"formatting": {"score": 0-100, "recommendations": []},
	"skills": {"score": 0-100, "recommendations": []},
	"experience": {"score": 0-100, "recommendations": []},
	"education": {"score": 0-100, "recommendations": []}
	},
	"overall_score": 0-100,
	"summary": "Brief overall assessment"
	}
	Important:
	1. Generate actual scores based on content
	2. Provide specific recommendations
	3. Return ONLY the JSON object
	4. No additional text or explanations
	<</SYS>>

	Resume: {resume}
	Job Description: {jd}
	[/INST]""".format(
	resume=resume_text[:3000],
	jd=job_description[:1000]
	)

	# Make API request
	response = requests.post(
	"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
	headers={"Authorization": f"Bearer {HF_API_KEY}"},
	json={
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 800,
	"temperature": 0.3,
	"do_sample": False
	}
	},
	timeout=45
	)

	# Handle API errors
	if response.status_code != 200:
	error_msg = response.json().get("error", "Unknown API error")
	return {
	"error": f"API request failed: {error_msg}",
	"api_response": response.text
	}

	# Process response
	raw_output = response.json()[0]['generated_text']

	# Try to extract JSON
	result = extract_json_from_text(raw_output)

	if result:
	# Validate JSON structure
	required_keys = {"analysis", "overall_score"}
	if all(key in result for key in required_keys):
	return {
	"structured_result": result,
	"raw_response": raw_output
	}

	# If JSON extraction failed, return raw output
	return {
	"raw_response": raw_output,
	"error": "Could not extract valid JSON"
	}

	except requests.exceptions.RequestException as e:
	return {"error": f"Network error: {str(e)}"}
	except Exception as e:
	return {"error": f"Analysis failed: {str(e)}"}

	# Enhanced Gradio Interface
	with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""# ATS Resume Analyzer""")

	with gr.Row():
	with gr.Column():
	file_input = gr.File(
	label="Upload Resume (PDF or DOCX)",
	file_types=[".pdf", ".docx"],
	type="filepath"
	)
	jd_input = gr.Textbox(
	label="Job Description",
	lines=8,
	placeholder="Paste the job description here..."
	)
	analyze_btn = gr.Button("Analyze", variant="primary")

	with gr.Column():
	output_tabs = gr.Tabs()
	with output_tabs:
	with gr.Tab("Analysis Results"):
	json_output = gr.JSON(label="Structured Analysis")
	summary_output = gr.Textbox(label="Summary", interactive=False)
	with gr.Tab("API Response"):
	raw_output = gr.Textbox(label="Raw API Response", lines=10)
	with gr.Tab("Debug Info"):
	status_output = gr.Textbox(label="Status Info", lines=5)
	status = gr.Textbox(label="Status", interactive=False)

	def display_results(file, job_description):
	"""Handle results display with proper error handling"""
	result = analyze_with_huggingface(file, job_description)

	output = {
	json_output: None,
	summary_output: None,
	raw_output: None,
	status_output: None,
	status: ""
	}

	if "error" in result:
	output.update({
	status: f"Error: {result['error']}",
	status_output: str(result),
	raw_output: result.get("api_response") or result.get("raw_response") or ""
	})
	elif "structured_result" in result:
	structured = result["structured_result"]
	output.update({
	json_output: structured["analysis"],
	summary_output: structured.get("summary", ""),
	raw_output: result["raw_response"],
	status: "Analysis complete!",
	status_output: "Successfully parsed JSON response"
	})
	else:
	output.update({
	raw_output: result.get("raw_response", "No response received"),
	status: "Received non-JSON response",
	status_output: "The API returned a response but it couldn't be parsed as JSON"
	})

	return output

	analyze_btn.click(
	fn=display_results,
	inputs=[file_input, jd_input],
	outputs=[json_output, summary_output, raw_output, status_output, status]
	)

	if __name__ == "__main__":
	demo.queue().launch(
	allowed_paths=["*"],

	)