Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
from transformers import AutoTokenizer, AutoModel | |
import torch | |
import os | |
import numpy as np | |
from groq import Groq | |
from huggingface_hub import login | |
from groq import Groq | |
login(token=os.getenv("HF_API_Key")) | |
class Engine: | |
def __init__(self): | |
# Create sample data | |
self.job_titles = [ | |
"Software Engineer", | |
"Data Scientist", | |
"Product Manager", | |
"DevOps Engineer", | |
"ML Engineer", | |
] | |
self.locations = ["New York", "San Francisco", "Remote", "Seattle", "Austin"] | |
self.connections = ["1st", "2nd", "3rd", "2nd", "1st"] | |
self.st_model = AutoModel.from_pretrained( | |
"sentence-transformers/all-mpnet-base-v2" | |
) | |
self.llama_model = AutoModel.from_pretrained("meta-llama/Llama-3.2-1B") | |
self.gemma_model = AutoModel.from_pretrained("google/gemma-3-1b-it") | |
#self.deepseek_ri_model = AutoModel.from_pretrained("deepseek-ai/deepseek-ri") | |
self.st_tokenizer = AutoTokenizer.from_pretrained( | |
"sentence-transformers/all-mpnet-base-v2" | |
) | |
self.llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B") | |
self.gemma_tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it") | |
#self.deepseek_ri_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-ri") | |
self.potential_talents = pd.read_excel("potential-talents.xlsx") | |
self.groq = Groq(api_key=os.getenv("Groq_API_Key")) | |
def sentence_transformer(self, query): | |
# Generate random scores | |
query_embedding = self.st_encode_text(query) | |
self.potential_talents["Similarity_all-mpnet-base-v2"] = self.potential_talents[ | |
"job_title" | |
].apply( | |
lambda job_title: torch.nn.functional.cosine_similarity( | |
torch.from_numpy(self.st_encode_text(job_title)), | |
torch.from_numpy(query_embedding), | |
).item() | |
) | |
# Rename the column for display while keeping the original data | |
result_df = self.potential_talents[ | |
["job_title", "location", "connection", "Similarity_all-mpnet-base-v2"] | |
] | |
result_df.columns = ["Job Title", "Location", "Connection", "Score"] | |
return result_df.sort_values(by="Score", ascending=False) | |
def st_encode_text(self, text): | |
inputs = self.st_tokenizer( | |
text, return_tensors="pt", padding=True, truncation=True | |
) | |
outputs = self.st_model(**inputs) | |
embeddings = outputs.last_hidden_state.mean(dim=1) | |
return embeddings.detach().numpy() | |
def llama_3_2_1b(self, query): | |
# Generate random scores | |
query_embedding = self.llama1_encode_text(query) | |
self.potential_talents["Similarity_Llama-3.2-1B"] = self.potential_talents[ | |
"job_title" | |
].apply( | |
lambda job_title: torch.nn.functional.cosine_similarity( | |
torch.from_numpy(self.llama1_encode_text(job_title)), | |
torch.from_numpy(query_embedding), | |
).item() | |
) | |
result_df = self.potential_talents[ | |
["job_title", "location", "connection", "Similarity_Llama-3.2-1B"] | |
] | |
result_df.columns = ["Job Title", "Location", "Connection", "Score"] | |
return result_df.sort_values(by="Score", ascending=False) | |
def llama1_encode_text(self, text): | |
self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token | |
inputs = self.llama_tokenizer( | |
text, return_tensors="pt", padding=True, truncation=True | |
) | |
outputs = self.llama_model(**inputs) | |
embeddings = outputs.last_hidden_state.mean(dim=1) | |
return embeddings.detach().numpy() | |
def gemma_3_1_1b(self, query): | |
# Generate random scores | |
query_embedding = self.gemma_encode_text(query) | |
self.potential_talents["Similarity_Gemma-3.1-1B"] = self.potential_talents[ | |
"job_title" | |
].apply( | |
lambda job_title: torch.nn.functional.cosine_similarity( | |
torch.from_numpy(self.gemma_encode_text(job_title)), | |
torch.from_numpy(query_embedding), | |
).item() | |
) | |
result_df = self.potential_talents[ | |
["job_title", "location", "connection", "Similarity_Gemma-3.1-1B"] | |
] | |
result_df.columns = ["Job Title", "Location", "Connection", "Score"] | |
return result_df.sort_values(by="Score", ascending=False) | |
def gemma_encode_text(self, text): | |
# self.gemma_tokenizer.pad_token = self.gemma_tokenizer.eos_token | |
inputs = self.gemma_tokenizer( | |
text, return_tensors="pt", padding=True, truncation=True | |
) | |
outputs = self.gemma_model(**inputs) | |
embeddings = outputs.last_hidden_state.mean(dim=1) | |
return embeddings.detach().numpy() | |
def llama_3_2_70b(self, query): | |
self.potential_talents["groq_3_3_70b_llama"] = self.potential_talents[ | |
"job_title" | |
].apply(lambda job_title: self.groq_llama_encode_text(job_title, query)) | |
result_df = self.potential_talents[ | |
["job_title", "location", "connection", "groq_3_3_70b_llama"] | |
] | |
result_df.columns = ["Job Title", "Location", "Connection", "Score"] | |
return result_df.sort_values(by="Score", ascending=False) | |
def groq_llama_encode_text(self, text, query): | |
response = self.groq.chat.completions.create( | |
model="llama-3.3-70b-versatile", | |
messages=[ | |
{ | |
"role": "user", | |
"content": "return the similarity between {job_title} and {query} as a only one simple real value from 0 to 1. Only return real value. All descriptions are not acceptable.".format( | |
job_title=text, query=query | |
), | |
} | |
], | |
) | |
print(text, query) | |
return response.choices[0].message.content | |
def deepseek_ri(self, query): | |
self.potential_talents["Similarity_DeepSeek-RI"] = self.potential_talents[ | |
"job_title" | |
].apply(lambda job_title: self.groq_deepseek_encode_text(job_title, query)) | |
result_df = self.potential_talents[ | |
["job_title", "location", "connection", "Similarity_DeepSeek-RI"] | |
] | |
result_df.columns = ["Job Title", "Location", "Connection", "Score"] | |
return result_df.sort_values(by="Score", ascending=False) | |
def groq_deepseek_encode_text(self, text, query): | |
response = self.groq.chat.completions.create( | |
model="qwen-qwq-32b", | |
messages=[ | |
{ | |
"role": "user", | |
"content": "return the similarity between {job_title} and {query} as a only one simple real value from 0 to 1. Only return real value. All descriptions are not acceptable.".format( | |
job_title=text, query=query | |
), | |
} | |
], | |
) | |
print(text, query) | |
return response.choices[0].message.content | |
def process_query(query, selection): | |
engine = Engine() | |
# Map selection to corresponding method | |
engine_map = { | |
"Sentence Transformer (all-mpnet-base-v2)": engine.sentence_transformer, | |
"Llama 3.2 - 1B": engine.llama_3_2_1b, | |
"Llama 3.2 - 70B": engine.llama_3_2_70b, | |
"Gemma 3.1 1B": engine.gemma_3_1_1b, | |
"qwen-qwq-32b": engine.deepseek_ri, | |
} | |
# Get the selected engine function and call it | |
selected_engine = engine_map.get(selection) | |
if selected_engine: | |
return selected_engine(query) | |
else: | |
return pd.DataFrame() # Return empty DataFrame if no valid selection | |
# Create the interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Potential Talent Ranker") | |
with gr.Row(): | |
query_input = gr.Textbox( | |
label="Enter the job description", | |
placeholder="E.x. Aspiring Human Resource", | |
lines=2, | |
) | |
selection_dropdown = gr.Dropdown( | |
choices=[ | |
"Sentence Transformer (all-mpnet-base-v2)", | |
"Llama 3.2 - 1B", | |
"Gemma 3.1 1B", | |
"Llama 3.2 - 70B", | |
"qwen-qwq-32b" | |
], | |
label="Select the ranking engine", | |
value="Sentence Transformer (all-mpnet-base-v2)", | |
) | |
submit_btn = gr.Button("Submit") | |
with gr.Row(): | |
results_table = gr.DataFrame( | |
label="Results", | |
headers=["Job Title", "Location", "Connection", "Score"], | |
row_count=20, | |
col_count=4, | |
) | |
submit_btn.click( | |
fn=process_query, | |
inputs=[query_input, selection_dropdown], | |
outputs=results_table, | |
) | |
if __name__ == "__main__": | |
demo.launch() | |
#change |