Spaces:
Running
Running
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[1]: | |
import pandas as pd | |
import numpy as np | |
import fitz | |
import io | |
import re | |
from sentence_transformers import SentenceTransformer | |
# In[2]: | |
import faiss | |
import pickle | |
import os | |
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" | |
# In[3]: | |
resume_dict= { | |
"CONTACT": { | |
"NAME": "Juan Medina", | |
"CITY": "Toronto, Canada", | |
"EMAIL": "juanandresmedina125@gmail.com" | |
}, | |
"RELEVANT WORK EXPERIENCE": { | |
"Vector Institute": { | |
"CITY": "Toronto, ON", | |
"ROLE": "Faculty Affiliate Researcher", | |
"DATE": "Sep 2023 - Feb 2025", | |
"DESCRIPTION": "Developed and implemented a comprehensive Natural Language Processing and Machine Learning pipeline utilizing large language models (LLM) in few shot, pre-training and fine-tuning settings, resulting in valuable policy insights on post-COVID conditions (PCC). Implemented a novel named entity recognition (NER) tool for identifying social determinants of PCC, facilitating the analysis of unexplored relationships between PCC and sociodemographic dimensions. Designed and executed a language entailment pipeline to automate a granular analysis of annotated data, providing actionable insights for over 26 SDOH entity dimensions in over 7,000 texts." | |
}, | |
"Rubik": { | |
"CITY": "London, UK (Remote)", | |
"ROLE": "AI Product Strategy [Consulting Project]", | |
"DATE": "Sep 2024 - Dec 2024", | |
"DESCRIPTION": "Developed a comprehensive business intelligence AI implementation strategy tailored to the waste management sector, including scalability in cloud systems, interoperability, and product differentiation." | |
}, | |
"J. Roy Gillis Lab, University of Toronto": { | |
"CITY": "Toronto, ON", | |
"ROLE": "Data Science, Quantitative Analysis Specialist", | |
"DATE": "Jun 2024 - Dec 2024", | |
"DESCRIPTION": "Designed an end-to-end sentiment analysis pipeline to analyze discourse around vaccination hesitancy in Canada, entailing the extraction, cleaning, annotation, modeling and visualization of over 100,000 data points from the Reddit API. Led an interdisciplinary team of 8 researchers, including engineers, social scientists and designers. Created an interactive visual story showcasing key trends and contextual patterns related to vaccination hesitancy, enhancing understanding and decision-making for stakeholders." | |
}, | |
"i4Health Research lab, York University": { | |
"CITY": "Toronto, ON", | |
"ROLE": "Machine Learning Research Assistant", | |
"DATE": "Oct 2023 - Sep 2024", | |
"DESCRIPTION": "Explored and developed ML-driven disparity analysis pipelines with Natural Language Processing and Causal Inference to assess discriminatory relationships in health. Collaborated in the development of a question-answering model for medical images." | |
} | |
}, | |
"EDUCATION": { | |
"University of Toronto": { | |
"CITY": "Toronto, ON, CA", | |
"DEGREE": "Master of Science (M.Sc.): Health Systems Artificial Intelligence emphasis", | |
"START DATE": "Sep 2023", | |
"GRADUATION DATE": "Mar 2025" | |
}, | |
"University of California, San Diego": { | |
"CITY": "Remote", | |
"DEGREE": "Coursework in Object-Oriented Programming, Natural Language Processing, Probability and Statistics for Deep Learning, and Discrete Mathematics", | |
"START DATE": "Jun 2022", | |
"GRADUATION DATE": "Nov 2022" | |
}, | |
"Wesleyan University": { | |
"CITY": "Middletown, CT, USA", | |
"DEGREE": "Bachelor of Arts Double Major: Economics, Science in Society Program (Mathematics/Neuroscience & Sociology emphases)", | |
"START DATE": "Aug 2018", | |
"GRADUATION DATE": "May 2022" | |
} | |
}, | |
"SKILLS": { | |
"Technical": "Python, R, SQL, Tableau, SAS, SLURM (HPC), AWS, Spark, PowerBI, Stata", | |
"Relevant Courses": "Machine Learning, Deep Learning, Statistical Learning, Data Visualization, Causal Inference, AI Implementation, Biostatistics, Innovation Management, Health Policy" | |
}, | |
"ASPIRATIONS" : "I aspire to work in a multidisciplinary environment where I can utilize my skills in data science, economics, public health, and policy." | |
} | |
# In[4]: | |
resume_chunks=[] | |
for k, val in resume_dict.items(): | |
string= str(k) + ': ' + str(val) +' }}' | |
resume_chunks.append(string) | |
# In[5]: | |
# Load pre-trained embedding model | |
model = SentenceTransformer("all-MiniLM-L6-v2") | |
# Generate embeddings | |
resume_embeddings = model.encode(resume_chunks) | |
print("Embedding shape:", resume_embeddings.shape) # Should be (num_chunks, 384) | |
# In[6]: | |
# Initialize FAISS index | |
dimension = resume_embeddings.shape[1] | |
index = faiss.IndexFlatL2(dimension) | |
index.add(resume_embeddings) | |
# Save index and text chunks for future use | |
with open("resume_faiss.pkl", "wb") as f: | |
pickle.dump((index, resume_chunks), f) | |
print("FAISS index built and saved.") | |
# In[7]: | |
# Load the saved FAISS index and text chunks | |
with open("resume_faiss.pkl", "rb") as f: | |
index, resume_chunks = pickle.load(f) | |
# In[8]: | |
import transformers | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# In[9]: | |
embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
model_name = "Qwen/Qwen2.5-0.5B" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
llm = AutoModelForCausalLM.from_pretrained(model_name) | |
# In[10]: | |
def retrieve_and_generate(query, top_k=2): | |
# Embed the query | |
query_embedding = embedder.encode([query]) | |
# Search FAISS for top matching chunks | |
distances, indices = index.search(query_embedding, k=top_k) | |
retrieved_texts = [resume_chunks[i] for i in indices[0] if i < len(resume_chunks)] | |
if not retrieved_texts: | |
return "Sorry, I couldn't find relevant information in the resume." | |
# Combine context | |
context = "\n".join(retrieved_texts) | |
# Manually build the prompt for Qwen2.5 | |
prompt = ( | |
"You are Juan, a recent master's graduate. Based on your resume information below (in python dictionary format), " | |
"answer the user's question truthfully and concisely in first person, checking for the right key in the dictionary. Let's think step by step.\n\n" | |
f"Resume:\n{context}\n\n" | |
f"Question: {query}\nAnswer:" | |
) | |
# Tokenize and generate | |
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cpu") | |
output = llm.generate(input_ids, max_new_tokens=200) | |
# Only decode the newly generated part | |
generated_tokens = output[0][input_ids.shape[-1]:] | |
response = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip() | |
return response | |
# In[11]: | |
# In[12]: | |
qa_dict = {"Work experience": "My experience spans both academia and industry, including my role as a Faculty Affiliate Researcher at the Vector Institute for Artificial Intelligence, where I focused on Natural Language Processing in healthcare, and as an AI strategy consultant for Rubik, a London-based tech startup.", | |
"Education": "I recently earned an M.Sc. with an emphasis in Health Systems Artificial Intelligence from the University of Toronto and hold a B.A. in Economics and Science & Technology Studies from Wesleyan University.", | |
"Skillset": "With a multidisciplinary background in AI, statistics, economics, and health systems, I bring a unique perspective to solving complex problems.", | |
"Other": "Please reach out directly at juanandresmedina125[at]gmail[dot]com, and I would be delighted to discuss my background and experiences in greater detail." | |
} | |
# In[14]: | |
import gradio as gr | |
import requests | |
# In[15]: | |
futuristic_theme = gr.themes.Soft( | |
primary_hue="purple", | |
secondary_hue="slate", | |
font=["Montserrat", "sans-serif"] | |
).set( | |
body_background_fill="#0f1117", # Deep navy | |
body_text_color="#74EE2D", # Soft gray text | |
block_background_fill="#1f2937", # Slightly lighter panel | |
block_border_color="#6b7280", # Cool gray border | |
input_background_fill="#000000", # Input fields | |
input_border_color="#9CA3AF", | |
button_primary_background_fill="#74EE2D", | |
button_primary_text_color="#000000", | |
button_primary_background_fill_hover="#7C3AED" | |
) | |
# Define your logic | |
def handle_predefined_question(topic): | |
return qa_dict.get(topic, "Please select a valid topic.") | |
# 🎨 Gradio UI with enhanced look | |
with gr.Blocks(title="Ask My Resume", theme=futuristic_theme, css=""" | |
/* Style the dropdown button */ | |
#custom-dropdown .gr-button { | |
background-color: #000000 !important; | |
color: #74EE2D !important; | |
border: 1px solid #9CA3AF !important; | |
} | |
/* Global override for dropdown list */ | |
ul[role="listbox"] { | |
background-color: #000000 !important; | |
color: #74EE2D !important; | |
border: 1px solid #9CA3AF !important; | |
} | |
/* Style individual options */ | |
ul[role="listbox"] > li { | |
background-color: #000000 !important; | |
color: #74EE2D !important; | |
} | |
/* Hover effect */ | |
ul[role="listbox"] > li:hover { | |
background-color: #1f2937 !important; | |
} | |
""") as demo: | |
gr.Markdown( | |
"## ✨ Ask My Resume\n" | |
"Welcome! Ask me anything about my experience below.", | |
elem_id="title" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## Topics") | |
dropdown = gr.Dropdown( | |
choices=["Select..."] + list(qa_dict.keys()), | |
label="Choose a topic", | |
elem_id="custom-dropdown" | |
) | |
predefined_output = gr.Textbox( | |
label="Answer", lines=4, interactive=False, show_copy_button=True | |
) | |
dropdown.change(fn=handle_predefined_question, inputs=dropdown, outputs=predefined_output) | |
with gr.Column(): | |
gr.Markdown("## Ask Your Own Question") | |
user_input = gr.Textbox(label="Enter your question") | |
custom_output = gr.Textbox( | |
label="Answer (AI-Powered)", lines=5, interactive=False, show_copy_button=True | |
) | |
ask_btn = gr.Button("Ask", variant="primary") | |
ask_btn.click(fn=retrieve_and_generate, inputs=user_input, outputs=custom_output) | |
gr.Markdown("*Disclaimer: LLM-based answers can be prone to errors.*") | |
demo.launch() | |
# In[ ]: | |