alizoljodi's picture
deepseek
48c88b5
import gradio as gr
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
import os
import numpy as np
from groq import Groq
from huggingface_hub import login
from groq import Groq
login(token=os.getenv("HF_API_Key"))
class Engine:
def __init__(self):
# Create sample data
self.job_titles = [
"Software Engineer",
"Data Scientist",
"Product Manager",
"DevOps Engineer",
"ML Engineer",
]
self.locations = ["New York", "San Francisco", "Remote", "Seattle", "Austin"]
self.connections = ["1st", "2nd", "3rd", "2nd", "1st"]
self.st_model = AutoModel.from_pretrained(
"sentence-transformers/all-mpnet-base-v2"
)
self.llama_model = AutoModel.from_pretrained("meta-llama/Llama-3.2-1B")
self.gemma_model = AutoModel.from_pretrained("google/gemma-3-1b-it")
#self.deepseek_ri_model = AutoModel.from_pretrained("deepseek-ai/deepseek-ri")
self.st_tokenizer = AutoTokenizer.from_pretrained(
"sentence-transformers/all-mpnet-base-v2"
)
self.llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
self.gemma_tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")
#self.deepseek_ri_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-ri")
self.potential_talents = pd.read_excel("potential-talents.xlsx")
self.groq = Groq(api_key=os.getenv("Groq_API_Key"))
def sentence_transformer(self, query):
# Generate random scores
query_embedding = self.st_encode_text(query)
self.potential_talents["Similarity_all-mpnet-base-v2"] = self.potential_talents[
"job_title"
].apply(
lambda job_title: torch.nn.functional.cosine_similarity(
torch.from_numpy(self.st_encode_text(job_title)),
torch.from_numpy(query_embedding),
).item()
)
# Rename the column for display while keeping the original data
result_df = self.potential_talents[
["job_title", "location", "connection", "Similarity_all-mpnet-base-v2"]
]
result_df.columns = ["Job Title", "Location", "Connection", "Score"]
return result_df.sort_values(by="Score", ascending=False)
def st_encode_text(self, text):
inputs = self.st_tokenizer(
text, return_tensors="pt", padding=True, truncation=True
)
outputs = self.st_model(**inputs)
embeddings = outputs.last_hidden_state.mean(dim=1)
return embeddings.detach().numpy()
def llama_3_2_1b(self, query):
# Generate random scores
query_embedding = self.llama1_encode_text(query)
self.potential_talents["Similarity_Llama-3.2-1B"] = self.potential_talents[
"job_title"
].apply(
lambda job_title: torch.nn.functional.cosine_similarity(
torch.from_numpy(self.llama1_encode_text(job_title)),
torch.from_numpy(query_embedding),
).item()
)
result_df = self.potential_talents[
["job_title", "location", "connection", "Similarity_Llama-3.2-1B"]
]
result_df.columns = ["Job Title", "Location", "Connection", "Score"]
return result_df.sort_values(by="Score", ascending=False)
def llama1_encode_text(self, text):
self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
inputs = self.llama_tokenizer(
text, return_tensors="pt", padding=True, truncation=True
)
outputs = self.llama_model(**inputs)
embeddings = outputs.last_hidden_state.mean(dim=1)
return embeddings.detach().numpy()
def gemma_3_1_1b(self, query):
# Generate random scores
query_embedding = self.gemma_encode_text(query)
self.potential_talents["Similarity_Gemma-3.1-1B"] = self.potential_talents[
"job_title"
].apply(
lambda job_title: torch.nn.functional.cosine_similarity(
torch.from_numpy(self.gemma_encode_text(job_title)),
torch.from_numpy(query_embedding),
).item()
)
result_df = self.potential_talents[
["job_title", "location", "connection", "Similarity_Gemma-3.1-1B"]
]
result_df.columns = ["Job Title", "Location", "Connection", "Score"]
return result_df.sort_values(by="Score", ascending=False)
def gemma_encode_text(self, text):
# self.gemma_tokenizer.pad_token = self.gemma_tokenizer.eos_token
inputs = self.gemma_tokenizer(
text, return_tensors="pt", padding=True, truncation=True
)
outputs = self.gemma_model(**inputs)
embeddings = outputs.last_hidden_state.mean(dim=1)
return embeddings.detach().numpy()
def llama_3_2_70b(self, query):
self.potential_talents["groq_3_3_70b_llama"] = self.potential_talents[
"job_title"
].apply(lambda job_title: self.groq_llama_encode_text(job_title, query))
result_df = self.potential_talents[
["job_title", "location", "connection", "groq_3_3_70b_llama"]
]
result_df.columns = ["Job Title", "Location", "Connection", "Score"]
return result_df.sort_values(by="Score", ascending=False)
def groq_llama_encode_text(self, text, query):
response = self.groq.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{
"role": "user",
"content": "return the similarity between {job_title} and {query} as a only one simple real value from 0 to 1. Only return real value. All descriptions are not acceptable.".format(
job_title=text, query=query
),
}
],
)
print(text, query)
return response.choices[0].message.content
def deepseek_ri(self, query):
self.potential_talents["Similarity_DeepSeek-RI"] = self.potential_talents[
"job_title"
].apply(lambda job_title: self.groq_deepseek_encode_text(job_title, query))
result_df = self.potential_talents[
["job_title", "location", "connection", "Similarity_DeepSeek-RI"]
]
result_df.columns = ["Job Title", "Location", "Connection", "Score"]
return result_df.sort_values(by="Score", ascending=False)
def groq_deepseek_encode_text(self, text, query):
response = self.groq.chat.completions.create(
model="qwen-qwq-32b",
messages=[
{
"role": "user",
"content": "return the similarity between {job_title} and {query} as a only one simple real value from 0 to 1. Only return real value. All descriptions are not acceptable.".format(
job_title=text, query=query
),
}
],
)
print(text, query)
return response.choices[0].message.content
def process_query(query, selection):
engine = Engine()
# Map selection to corresponding method
engine_map = {
"Sentence Transformer (all-mpnet-base-v2)": engine.sentence_transformer,
"Llama 3.2 - 1B": engine.llama_3_2_1b,
"Llama 3.2 - 70B": engine.llama_3_2_70b,
"Gemma 3.1 1B": engine.gemma_3_1_1b,
"qwen-qwq-32b": engine.deepseek_ri,
}
# Get the selected engine function and call it
selected_engine = engine_map.get(selection)
if selected_engine:
return selected_engine(query)
else:
return pd.DataFrame() # Return empty DataFrame if no valid selection
# Create the interface
with gr.Blocks() as demo:
gr.Markdown("# Potential Talent Ranker")
with gr.Row():
query_input = gr.Textbox(
label="Enter the job description",
placeholder="E.x. Aspiring Human Resource",
lines=2,
)
selection_dropdown = gr.Dropdown(
choices=[
"Sentence Transformer (all-mpnet-base-v2)",
"Llama 3.2 - 1B",
"Gemma 3.1 1B",
"Llama 3.2 - 70B",
"qwen-qwq-32b"
],
label="Select the ranking engine",
value="Sentence Transformer (all-mpnet-base-v2)",
)
submit_btn = gr.Button("Submit")
with gr.Row():
results_table = gr.DataFrame(
label="Results",
headers=["Job Title", "Location", "Connection", "Score"],
row_count=20,
col_count=4,
)
submit_btn.click(
fn=process_query,
inputs=[query_input, selection_dropdown],
outputs=results_table,
)
if __name__ == "__main__":
demo.launch()
#change