Spaces:

telcom
/

ResumeQA

Sleeping

App Files Files Community

ResumeQA / app.py

telcom

Update app.py

36c4581 verified about 2 months ago

raw

history blame contribute delete

7.41 kB

	import gradio as gr
	import torch
	import spaces

	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings

	import PyPDF2
	from docx import Document


	class ResumeRAG:
	def __init__(self):
	self.has_cuda = torch.cuda.is_available()
	self.device = "cuda" if self.has_cuda else "cpu"
	print(f"Using device: {self.device}")

	# Embeddings (small + fast)
	self.embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={"device": self.device},
	)

	self.text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=500,
	chunk_overlap=50
	)

	self.vector_store = None

	model_name = "mistralai/Mistral-7B-Instruct-v0.2"

	if not self.has_cuda:
	raise RuntimeError(
	"No CUDA GPU detected. Use a GPU Space/ZeroGPU, or switch to a smaller CPU model."
	)

	# 4-bit quantization for GPU efficiency
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	)

	print("Loading tokenizer...")
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)

	print("Loading model...")
	self.model = AutoModelForCausalLM.from_pretrained(
	model_name,
	quantization_config=quantization_config,
	device_map="auto", # important for Spaces
	trust_remote_code=True
	)

	# Ensure pad token exists
	if self.tokenizer.pad_token_id is None:
	self.tokenizer.pad_token = self.tokenizer.eos_token

	def extract_text_from_pdf(self, file_path: str) -> str:
	try:
	with open(file_path, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	return "".join([(p.extract_text() or "") for p in reader.pages])
	except Exception as e:
	return f"Error reading PDF: {e}"

	def extract_text_from_docx(self, file_path: str) -> str:
	try:
	doc = Document(file_path)
	return "\n".join([p.text for p in doc.paragraphs])
	except Exception as e:
	return f"Error reading DOCX: {e}"

	def process_resume(self, file) -> str:
	if file is None:
	return "Please upload a resume file."

	file_path = file.name
	if file_path.lower().endswith(".pdf"):
	text = self.extract_text_from_pdf(file_path)
	elif file_path.lower().endswith(".docx"):
	text = self.extract_text_from_docx(file_path)
	else:
	return "Unsupported file format. Please upload PDF or DOCX."

	if text.startswith("Error"):
	return text

	if not text.strip():
	return "No text could be extracted from the resume."

	chunks = self.text_splitter.split_text(text)
	if not chunks:
	return "No text chunks could be created from the resume."

	self.vector_store = FAISS.from_texts(chunks, self.embeddings)
	return f"✅ Resume processed successfully! Extracted {len(chunks)} text chunks."

	def generate_answer(self, question: str, context: str) -> str:
	prompt = f"""[INST] You are a helpful assistant analyzing a resume.

	Context:
	{context}

	Question: {question}

	Answer only from the context. If the answer is not in the context, say it is not in the resume. [/INST]"""

	inputs = self.tokenizer(prompt, return_tensors="pt")

	# FIX: move inputs onto the SAME device as the model's embedding weights
	target_device = self.model.get_input_embeddings().weight.device
	inputs = {k: v.to(target_device) for k, v in inputs.items()}

	with torch.no_grad():
	outputs = self.model.generate(
	**inputs,
	max_new_tokens=1024,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=self.tokenizer.eos_token_id,
	)

	text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	# If the full prompt is included, return only the last segment
	if "[/INST]" in text:
	return text.split("[/INST]")[-1].strip()
	return text.strip()

	def query(self, question: str):
	if self.vector_store is None:
	return "Please upload a resume first.", ""

	if not question.strip():
	return "Please enter a question.", ""

	docs = self.vector_store.similarity_search(question, k=3)
	context = "\n\n".join([d.page_content for d in docs])

	answer = self.generate_answer(question, context)

	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	return answer, context


	print("Initializing Resume RAG System...")
	rag_system = ResumeRAG()

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
	gr.Markdown(
	"""
	# 📄 Resume RAG Q&A System
	Powered by Mistral-7B + FAISS vector search

	Upload your resume and ask questions about experience, skills, education, and more.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Upload Resume")
	file_input = gr.File(
	label="Upload PDF or DOCX",
	file_types=[".pdf", ".docx"]
	)
	upload_btn = gr.Button("Process Resume", variant="primary", size="lg")
	upload_status = gr.Textbox(label="Status", interactive=False)

	gr.Markdown(
	"""
	---
	Example Questions:
	- What programming languages does the candidate know?
	- Summarize the work experience
	- What is the education background?
	- List all technical skills
	"""
	)

	with gr.Column(scale=2):
	gr.Markdown("### 💬 Ask Questions")
	question_input = gr.Textbox(
	label="Your Question",
	placeholder="e.g., What are the candidate's key skills?",
	lines=2
	)
	submit_btn = gr.Button("Get Answer", variant="primary", size="lg")

	answer_output = gr.Textbox(
	label="Answer",
	lines=8,
	interactive=False
	)

	with gr.Accordion("📚 Retrieved Context", open=False):
	context_output = gr.Textbox(
	label="Relevant Resume Sections",
	lines=6,
	interactive=False
	)

	# GPU-decorated handler for ZeroGPU/Spaces GPU
	@spaces.GPU
	def query_gpu(q):
	return rag_system.query(q)

	upload_btn.click(
	fn=rag_system.process_resume,
	inputs=[file_input],
	outputs=[upload_status]
	)

	submit_btn.click(
	fn=query_gpu,
	inputs=[question_input],
	outputs=[answer_output, context_output]
	)

	question_input.submit(
	fn=query_gpu,
	inputs=[question_input],
	outputs=[answer_output, context_output]
	)

	if __name__ == "__main__":
	demo.launch(share=True)