import gradio as gr import pandas as pd from transformers import AutoTokenizer, AutoModel import torch import os import numpy as np from groq import Groq from huggingface_hub import login from groq import Groq login(token=os.getenv("HF_API_Key")) class Engine: def __init__(self): # Create sample data self.job_titles = [ "Software Engineer", "Data Scientist", "Product Manager", "DevOps Engineer", "ML Engineer", ] self.locations = ["New York", "San Francisco", "Remote", "Seattle", "Austin"] self.connections = ["1st", "2nd", "3rd", "2nd", "1st"] self.st_model = AutoModel.from_pretrained( "sentence-transformers/all-mpnet-base-v2" ) self.llama_model = AutoModel.from_pretrained("meta-llama/Llama-3.2-1B") self.gemma_model = AutoModel.from_pretrained("google/gemma-3-1b-it") #self.deepseek_ri_model = AutoModel.from_pretrained("deepseek-ai/deepseek-ri") self.st_tokenizer = AutoTokenizer.from_pretrained( "sentence-transformers/all-mpnet-base-v2" ) self.llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B") self.gemma_tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it") #self.deepseek_ri_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-ri") self.potential_talents = pd.read_excel("potential-talents.xlsx") self.groq = Groq(api_key=os.getenv("Groq_API_Key")) def sentence_transformer(self, query): # Generate random scores query_embedding = self.st_encode_text(query) self.potential_talents["Similarity_all-mpnet-base-v2"] = self.potential_talents[ "job_title" ].apply( lambda job_title: torch.nn.functional.cosine_similarity( torch.from_numpy(self.st_encode_text(job_title)), torch.from_numpy(query_embedding), ).item() ) # Rename the column for display while keeping the original data result_df = self.potential_talents[ ["job_title", "location", "connection", "Similarity_all-mpnet-base-v2"] ] result_df.columns = ["Job Title", "Location", "Connection", "Score"] return result_df.sort_values(by="Score", ascending=False) def st_encode_text(self, text): inputs = self.st_tokenizer( text, return_tensors="pt", padding=True, truncation=True ) outputs = self.st_model(**inputs) embeddings = outputs.last_hidden_state.mean(dim=1) return embeddings.detach().numpy() def llama_3_2_1b(self, query): # Generate random scores query_embedding = self.llama1_encode_text(query) self.potential_talents["Similarity_Llama-3.2-1B"] = self.potential_talents[ "job_title" ].apply( lambda job_title: torch.nn.functional.cosine_similarity( torch.from_numpy(self.llama1_encode_text(job_title)), torch.from_numpy(query_embedding), ).item() ) result_df = self.potential_talents[ ["job_title", "location", "connection", "Similarity_Llama-3.2-1B"] ] result_df.columns = ["Job Title", "Location", "Connection", "Score"] return result_df.sort_values(by="Score", ascending=False) def llama1_encode_text(self, text): self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token inputs = self.llama_tokenizer( text, return_tensors="pt", padding=True, truncation=True ) outputs = self.llama_model(**inputs) embeddings = outputs.last_hidden_state.mean(dim=1) return embeddings.detach().numpy() def gemma_3_1_1b(self, query): # Generate random scores query_embedding = self.gemma_encode_text(query) self.potential_talents["Similarity_Gemma-3.1-1B"] = self.potential_talents[ "job_title" ].apply( lambda job_title: torch.nn.functional.cosine_similarity( torch.from_numpy(self.gemma_encode_text(job_title)), torch.from_numpy(query_embedding), ).item() ) result_df = self.potential_talents[ ["job_title", "location", "connection", "Similarity_Gemma-3.1-1B"] ] result_df.columns = ["Job Title", "Location", "Connection", "Score"] return result_df.sort_values(by="Score", ascending=False) def gemma_encode_text(self, text): # self.gemma_tokenizer.pad_token = self.gemma_tokenizer.eos_token inputs = self.gemma_tokenizer( text, return_tensors="pt", padding=True, truncation=True ) outputs = self.gemma_model(**inputs) embeddings = outputs.last_hidden_state.mean(dim=1) return embeddings.detach().numpy() def llama_3_2_70b(self, query): self.potential_talents["groq_3_3_70b_llama"] = self.potential_talents[ "job_title" ].apply(lambda job_title: self.groq_llama_encode_text(job_title, query)) result_df = self.potential_talents[ ["job_title", "location", "connection", "groq_3_3_70b_llama"] ] result_df.columns = ["Job Title", "Location", "Connection", "Score"] return result_df.sort_values(by="Score", ascending=False) def groq_llama_encode_text(self, text, query): response = self.groq.chat.completions.create( model="llama-3.3-70b-versatile", messages=[ { "role": "user", "content": "return the similarity between {job_title} and {query} as a only one simple real value from 0 to 1. Only return real value. All descriptions are not acceptable.".format( job_title=text, query=query ), } ], ) print(text, query) return response.choices[0].message.content def deepseek_ri(self, query): self.potential_talents["Similarity_DeepSeek-RI"] = self.potential_talents[ "job_title" ].apply(lambda job_title: self.groq_deepseek_encode_text(job_title, query)) result_df = self.potential_talents[ ["job_title", "location", "connection", "Similarity_DeepSeek-RI"] ] result_df.columns = ["Job Title", "Location", "Connection", "Score"] return result_df.sort_values(by="Score", ascending=False) def groq_deepseek_encode_text(self, text, query): response = self.groq.chat.completions.create( model="qwen-qwq-32b", messages=[ { "role": "user", "content": "return the similarity between {job_title} and {query} as a only one simple real value from 0 to 1. Only return real value. All descriptions are not acceptable.".format( job_title=text, query=query ), } ], ) print(text, query) return response.choices[0].message.content def process_query(query, selection): engine = Engine() # Map selection to corresponding method engine_map = { "Sentence Transformer (all-mpnet-base-v2)": engine.sentence_transformer, "Llama 3.2 - 1B": engine.llama_3_2_1b, "Llama 3.2 - 70B": engine.llama_3_2_70b, "Gemma 3.1 1B": engine.gemma_3_1_1b, "qwen-qwq-32b": engine.deepseek_ri, } # Get the selected engine function and call it selected_engine = engine_map.get(selection) if selected_engine: return selected_engine(query) else: return pd.DataFrame() # Return empty DataFrame if no valid selection # Create the interface with gr.Blocks() as demo: gr.Markdown("# Potential Talent Ranker") with gr.Row(): query_input = gr.Textbox( label="Enter the job description", placeholder="E.x. Aspiring Human Resource", lines=2, ) selection_dropdown = gr.Dropdown( choices=[ "Sentence Transformer (all-mpnet-base-v2)", "Llama 3.2 - 1B", "Gemma 3.1 1B", "Llama 3.2 - 70B", "qwen-qwq-32b" ], label="Select the ranking engine", value="Sentence Transformer (all-mpnet-base-v2)", ) submit_btn = gr.Button("Submit") with gr.Row(): results_table = gr.DataFrame( label="Results", headers=["Job Title", "Location", "Connection", "Score"], row_count=20, col_count=4, ) submit_btn.click( fn=process_query, inputs=[query_input, selection_dropdown], outputs=results_table, ) if __name__ == "__main__": demo.launch() #change