RAG_cv_jam / app.py
jmedina888's picture
Update app.py
675c766 verified
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import numpy as np
import fitz
import io
import re
from sentence_transformers import SentenceTransformer
# In[2]:
import faiss
import pickle
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
# In[3]:
resume_dict= {
"CONTACT": {
"NAME": "Juan Medina",
"CITY": "Toronto, Canada",
"EMAIL": "juanandresmedina125@gmail.com"
},
"RELEVANT WORK EXPERIENCE": {
"Vector Institute": {
"CITY": "Toronto, ON",
"ROLE": "Faculty Affiliate Researcher",
"DATE": "Sep 2023 - Feb 2025",
"DESCRIPTION": "Developed and implemented a comprehensive Natural Language Processing and Machine Learning pipeline utilizing large language models (LLM) in few shot, pre-training and fine-tuning settings, resulting in valuable policy insights on post-COVID conditions (PCC). Implemented a novel named entity recognition (NER) tool for identifying social determinants of PCC, facilitating the analysis of unexplored relationships between PCC and sociodemographic dimensions. Designed and executed a language entailment pipeline to automate a granular analysis of annotated data, providing actionable insights for over 26 SDOH entity dimensions in over 7,000 texts."
},
"Rubik": {
"CITY": "London, UK (Remote)",
"ROLE": "AI Product Strategy [Consulting Project]",
"DATE": "Sep 2024 - Dec 2024",
"DESCRIPTION": "Developed a comprehensive business intelligence AI implementation strategy tailored to the waste management sector, including scalability in cloud systems, interoperability, and product differentiation."
},
"J. Roy Gillis Lab, University of Toronto": {
"CITY": "Toronto, ON",
"ROLE": "Data Science, Quantitative Analysis Specialist",
"DATE": "Jun 2024 - Dec 2024",
"DESCRIPTION": "Designed an end-to-end sentiment analysis pipeline to analyze discourse around vaccination hesitancy in Canada, entailing the extraction, cleaning, annotation, modeling and visualization of over 100,000 data points from the Reddit API. Led an interdisciplinary team of 8 researchers, including engineers, social scientists and designers. Created an interactive visual story showcasing key trends and contextual patterns related to vaccination hesitancy, enhancing understanding and decision-making for stakeholders."
},
"i4Health Research lab, York University": {
"CITY": "Toronto, ON",
"ROLE": "Machine Learning Research Assistant",
"DATE": "Oct 2023 - Sep 2024",
"DESCRIPTION": "Explored and developed ML-driven disparity analysis pipelines with Natural Language Processing and Causal Inference to assess discriminatory relationships in health. Collaborated in the development of a question-answering model for medical images."
}
},
"EDUCATION": {
"University of Toronto": {
"CITY": "Toronto, ON, CA",
"DEGREE": "Master of Science (M.Sc.): Health Systems Artificial Intelligence emphasis",
"START DATE": "Sep 2023",
"GRADUATION DATE": "Mar 2025"
},
"University of California, San Diego": {
"CITY": "Remote",
"DEGREE": "Coursework in Object-Oriented Programming, Natural Language Processing, Probability and Statistics for Deep Learning, and Discrete Mathematics",
"START DATE": "Jun 2022",
"GRADUATION DATE": "Nov 2022"
},
"Wesleyan University": {
"CITY": "Middletown, CT, USA",
"DEGREE": "Bachelor of Arts Double Major: Economics, Science in Society Program (Mathematics/Neuroscience & Sociology emphases)",
"START DATE": "Aug 2018",
"GRADUATION DATE": "May 2022"
}
},
"SKILLS": {
"Technical": "Python, R, SQL, Tableau, SAS, SLURM (HPC), AWS, Spark, PowerBI, Stata",
"Relevant Courses": "Machine Learning, Deep Learning, Statistical Learning, Data Visualization, Causal Inference, AI Implementation, Biostatistics, Innovation Management, Health Policy"
},
"ASPIRATIONS" : "I aspire to work in a multidisciplinary environment where I can utilize my skills in data science, economics, public health, and policy."
}
# In[4]:
resume_chunks=[]
for k, val in resume_dict.items():
string= str(k) + ': ' + str(val) +' }}'
resume_chunks.append(string)
# In[5]:
# Load pre-trained embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")
# Generate embeddings
resume_embeddings = model.encode(resume_chunks)
print("Embedding shape:", resume_embeddings.shape) # Should be (num_chunks, 384)
# In[6]:
# Initialize FAISS index
dimension = resume_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(resume_embeddings)
# Save index and text chunks for future use
with open("resume_faiss.pkl", "wb") as f:
pickle.dump((index, resume_chunks), f)
print("FAISS index built and saved.")
# In[7]:
# Load the saved FAISS index and text chunks
with open("resume_faiss.pkl", "rb") as f:
index, resume_chunks = pickle.load(f)
# In[8]:
import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# In[9]:
embedder = SentenceTransformer("all-MiniLM-L6-v2")
model_name = "Qwen/Qwen2.5-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
llm = AutoModelForCausalLM.from_pretrained(model_name)
# In[10]:
def retrieve_and_generate(query, top_k=2):
# Embed the query
query_embedding = embedder.encode([query])
# Search FAISS for top matching chunks
distances, indices = index.search(query_embedding, k=top_k)
retrieved_texts = [resume_chunks[i] for i in indices[0] if i < len(resume_chunks)]
if not retrieved_texts:
return "Sorry, I couldn't find relevant information in the resume."
# Combine context
context = "\n".join(retrieved_texts)
# Manually build the prompt for Qwen2.5
prompt = (
"You are Juan, a recent master's graduate. Based on your resume information below (in python dictionary format), "
"answer the user's question truthfully and concisely in first person, checking for the right key in the dictionary. Let's think step by step.\n\n"
f"Resume:\n{context}\n\n"
f"Question: {query}\nAnswer:"
)
# Tokenize and generate
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cpu")
output = llm.generate(input_ids, max_new_tokens=200)
# Only decode the newly generated part
generated_tokens = output[0][input_ids.shape[-1]:]
response = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
return response
# In[11]:
# In[12]:
qa_dict = {"Work experience": "My experience spans both academia and industry, including my role as a Faculty Affiliate Researcher at the Vector Institute for Artificial Intelligence, where I focused on Natural Language Processing in healthcare, and as an AI strategy consultant for Rubik, a London-based tech startup.",
"Education": "I recently earned an M.Sc. with an emphasis in Health Systems Artificial Intelligence from the University of Toronto and hold a B.A. in Economics and Science & Technology Studies from Wesleyan University.",
"Skillset": "With a multidisciplinary background in AI, statistics, economics, and health systems, I bring a unique perspective to solving complex problems.",
"Other": "Please reach out directly at juanandresmedina125[at]gmail[dot]com, and I would be delighted to discuss my background and experiences in greater detail."
}
# In[14]:
import gradio as gr
import requests
# In[15]:
futuristic_theme = gr.themes.Soft(
primary_hue="purple",
secondary_hue="slate",
font=["Montserrat", "sans-serif"]
).set(
body_background_fill="#0f1117", # Deep navy
body_text_color="#74EE2D", # Soft gray text
block_background_fill="#1f2937", # Slightly lighter panel
block_border_color="#6b7280", # Cool gray border
input_background_fill="#000000", # Input fields
input_border_color="#9CA3AF",
button_primary_background_fill="#74EE2D",
button_primary_text_color="#000000",
button_primary_background_fill_hover="#7C3AED"
)
# Define your logic
def handle_predefined_question(topic):
return qa_dict.get(topic, "Please select a valid topic.")
# 🎨 Gradio UI with enhanced look
with gr.Blocks(title="Ask My Resume", theme=futuristic_theme, css="""
/* Style the dropdown button */
#custom-dropdown .gr-button {
background-color: #000000 !important;
color: #74EE2D !important;
border: 1px solid #9CA3AF !important;
}
/* Global override for dropdown list */
ul[role="listbox"] {
background-color: #000000 !important;
color: #74EE2D !important;
border: 1px solid #9CA3AF !important;
}
/* Style individual options */
ul[role="listbox"] > li {
background-color: #000000 !important;
color: #74EE2D !important;
}
/* Hover effect */
ul[role="listbox"] > li:hover {
background-color: #1f2937 !important;
}
""") as demo:
gr.Markdown(
"## ✨ Ask My Resume\n"
"Welcome! Ask me anything about my experience below.",
elem_id="title"
)
with gr.Row():
with gr.Column():
gr.Markdown("## Topics")
dropdown = gr.Dropdown(
choices=["Select..."] + list(qa_dict.keys()),
label="Choose a topic",
elem_id="custom-dropdown"
)
predefined_output = gr.Textbox(
label="Answer", lines=4, interactive=False, show_copy_button=True
)
dropdown.change(fn=handle_predefined_question, inputs=dropdown, outputs=predefined_output)
with gr.Column():
gr.Markdown("## Ask Your Own Question")
user_input = gr.Textbox(label="Enter your question")
custom_output = gr.Textbox(
label="Answer (AI-Powered)", lines=5, interactive=False, show_copy_button=True
)
ask_btn = gr.Button("Ask", variant="primary")
ask_btn.click(fn=retrieve_and_generate, inputs=user_input, outputs=custom_output)
gr.Markdown("*Disclaimer: LLM-based answers can be prone to errors.*")
demo.launch()
# In[ ]: