Spaces:

Irfaniiioo
/

cvjdgradio

Running on Zero

App Files Files Community

cvjdgradio / app.py

Irfaniiioo

Update app.py

eef10e7 verified 4 days ago

raw

history blame contribute delete

5.92 kB

	import json
	import re

	import torch
	import gradio as gr
	from huggingface_hub import snapshot_download
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	from peft import PeftModel
	import spaces # provided automatically on HF Spaces

	# -----------------------------
	# 1. Constants
	# -----------------------------
	PEFT_MODEL_ID = "LlamaFactoryAI/cv-job-description-matching"
	BASE_MODEL_NAME = "akjindal53244/Llama-3.1-Storm-8B"

	SYSTEM_PROMPT = (
	"You analyze how well a CV matches a job description for No Skill Jobs. "
	"education is not much relevant unless specified."
	"Your ONLY output must be a single JSON object with EXACTLY these keys: "
	"matching_analysis, description, Total score, recommendation, name, email adress, phone number.\n\n"
	"Constraints:\n"
	"- matching_analysis: at most 3 short bullet-like points, max 20 words each.\n"
	"- description: at most 2 sentences, max 35 words total.\n"
	"- score: integer from 0 to 100.\n"
	"- recommendation: at most 2 sentences, max 35 words total.\n\n"
	"Very important:\n"
	"- Do NOT include the full CV or job description text.\n"
	"- Do NOT wrap the JSON in backticks or any extra text.\n"
	"- Output ONLY raw JSON, nothing before or after."
	)

	# -----------------------------
	# 2. Download & patch adapter (CPU only, safe in main process)
	# -----------------------------
	print("Downloading adapter...")
	adapter_path = snapshot_download(PEFT_MODEL_ID)

	config_path = adapter_path + "/adapter_config.json"
	with open(config_path, "r") as f:
	cfg = json.load(f)

	cfg["task_type"] = "CAUSAL_LM"

	with open(config_path, "w") as f:
	json.dump(cfg, f, indent=2)

	print("Patched adapter_config.json → task_type = CAUSAL_LM")
	print("Adapter path:", adapter_path)

	# -----------------------------
	# 3. Globals for lazy GPU init
	# -----------------------------
	tokenizer = None
	model = None


	def build_messages(cv: str, job_description: str):
	return [
	{
	"role": "system",
	"content": SYSTEM_PROMPT,
	},
	{
	"role": "user",
	"content": f"<CV> {cv} </CV>\n<job_description> {job_description} </job_description>",
	},
	]


	def extract_json_from_text(text: str):
	"""
	Try to pull a JSON object out of the model's output.
	If it fails, wrap the raw text in a fallback JSON structure.
	"""
	match = re.search(r"\{.*\}", text, flags=re.DOTALL)
	candidate = match.group(0) if match else text

	try:
	return json.loads(candidate)
	except Exception:
	return {
	"matching_analysis": [
	"Model output could not be parsed as JSON.",
	],
	"description": text[:200],
	"score": 0,
	"recommendation": "Please try again; the model returned non-JSON output.",
	}


	# -----------------------------
	# 4. Main inference function (GPU)
	# -----------------------------
	@spaces.GPU # required for Stateless GPU Spaces
	def match_cv_job(cv: str, job_description: str):
	global tokenizer, model

	if not cv.strip() or not job_description.strip():
	return {
	"matching_analysis": ["Please provide both a CV and a job description."],
	"description": "",
	"score": 0,
	"recommendation": "Fill both text boxes and run again.",
	}

	# Lazy GPU initialization: all CUDA-related stuff happens ONLY here
	if tokenizer is None or model is None:
	print("Initializing tokenizer + model on GPU...")
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16,
	)

	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)

	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_NAME,
	quantization_config=bnb_config,
	device_map="auto",
	)

	base_model.config.pad_token_id = tokenizer.pad_token_id

	model_ = PeftModel.from_pretrained(
	base_model,
	adapter_path,
	device_map="auto",
	)
	model_.eval()
	torch.set_grad_enabled(False)

	model = model_
	print("Model + LoRA adapter loaded successfully on GPU.")

	messages = build_messages(cv, job_description)

	prompt = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=False,
	)

	encoded = tokenizer(prompt, return_tensors="pt")
	# Move tensors to the same device as the model
	encoded = {k: v.to(model.device) for k, v in encoded.items()}

	with torch.inference_mode():
	outputs = model.generate(
	**encoded,
	max_new_tokens=256,
	pad_token_id=tokenizer.pad_token_id,
	)

	input_len = encoded["input_ids"].shape[1]
	generated_tokens = outputs[0][input_len:]
	generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

	result = extract_json_from_text(generated_text)
	return result


	# -----------------------------
	# 5. Gradio interface
	# -----------------------------
	cv_input = gr.Textbox(
	label="CV",
	placeholder="Paste the candidate's CV here...",
	lines=18,
	)

	jd_input = gr.Textbox(
	label="Job Description",
	placeholder="Paste the job description here...",
	lines=8,
	)

	output_json = gr.JSON(label="Matching result (JSON)")

	demo = gr.Interface(
	fn=match_cv_job,
	inputs=[cv_input, jd_input],
	outputs=output_json,
	title="CV–Job Description Matching API",
	description=(
	"Paste a CV and a job description. The model returns a JSON object with "
	"`matching_analysis`, `description`, `score`, and `recommendation`."
	),
	)

	if __name__ == "__main__":
	demo.launch()