Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
from transformers import pipeline
|
5 |
+
from sentence_transformers import SentenceTransformer, util
|
6 |
+
from langdetect import detect
|
7 |
+
|
8 |
+
#information about job descriptions
|
9 |
+
data = {
|
10 |
+
'Company': ['Google', 'Amazon', 'Microsoft', 'Facebook', 'Tesla'],
|
11 |
+
'Job_Description': [
|
12 |
+
"We are looking for a Senior Software Engineer with extensive experience in Python, Java, and cloud computing. The candidate should have experience working in an Agile environment and a deep understanding of machine learning.",
|
13 |
+
"The Data Analyst will analyze large datasets to uncover trends, patterns, and insights. Proficiency in SQL, Python, and data visualization tools like PowerBI or Tableau is required.",
|
14 |
+
"Hiring a Cloud Architect with experience in Azure, AWS, and cloud infrastructure design. The ideal candidate should have experience with Docker, Kubernetes, and network security.",
|
15 |
+
"AI Research Scientist with expertise in machine learning, deep learning, and natural language processing (NLP). Experience with TensorFlow, PyTorch, and data-driven research.",
|
16 |
+
"Looking for an Electrical Engineer with experience in circuit design, power electronics, and embedded systems. Proficiency in CAD tools and simulation software is a must."
|
17 |
+
]
|
18 |
+
}
|
19 |
+
|
20 |
+
# Load the job descriptions into a pandas DataFrame
|
21 |
+
df = pd.DataFrame(data)
|
22 |
+
# Load the Hugging Face model for semantic similarity
|
23 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
24 |
+
translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M")
|
25 |
+
lang_code_mapping = {
|
26 |
+
'ar': 'arb_Arab', # Arabic
|
27 |
+
'fr': 'fra_Latn', # French
|
28 |
+
'es': 'spa_Latn', # Spanish
|
29 |
+
'de': 'deu_Latn', # German
|
30 |
+
}
|
31 |
+
|
32 |
+
# Take CV and translate it to English if not in English to compare to job descriptions and return English CV and detected language
|
33 |
+
def translate_to_english(cv_text):
|
34 |
+
detected_lang = lang_code_mapping.get(detect(cv_text), "eng_Latn")
|
35 |
+
|
36 |
+
# Translate to English if not already in English
|
37 |
+
if detected_lang != 'eng_Latn':
|
38 |
+
translation = translator(cv_text, src_lang=detected_lang, tgt_lang="eng_Latn")[0]['translation_text']
|
39 |
+
return translation, detected_lang
|
40 |
+
else:
|
41 |
+
return cv_text, detected_lang
|
42 |
+
|
43 |
+
#if an entered cv is not in English return the job description in the entered cv language
|
44 |
+
def translate_job_description_if_needed(job_description, target_lang):
|
45 |
+
if target_lang != 'en':
|
46 |
+
return translator(job_description, src_lang="eng_Latn", tgt_lang=target_lang)[0]['translation_text']
|
47 |
+
return job_description
|
48 |
+
|
49 |
+
# Function to find top 3 job descriptions matching the CV using semantic similarity
|
50 |
+
def find_top_matches(cv_text):
|
51 |
+
if not cv_text:
|
52 |
+
return "Error: CV is empty", None
|
53 |
+
|
54 |
+
# Translate CV to English if it contains Arabic text
|
55 |
+
cv_text, detected_lang = translate_to_english(cv_text)
|
56 |
+
# Get job descriptions from the DataFrame
|
57 |
+
descriptions = df['Job_Description'].tolist()
|
58 |
+
|
59 |
+
# Encode both the CV and job descriptions
|
60 |
+
descriptions_embeddings = model.encode(descriptions, convert_to_tensor=True)
|
61 |
+
cv_embedding = model.encode([cv_text], convert_to_tensor=True)
|
62 |
+
# Calculate cosine similarities between the CV and all job descriptions
|
63 |
+
similarities = util.pytorch_cos_sim(cv_embedding, descriptions_embeddings)[0]
|
64 |
+
|
65 |
+
# Get the top 3 matches based on similarity scores
|
66 |
+
top_3_indices = similarities.argsort(descending=True)[:3] # Get the indices of the top 3 matches
|
67 |
+
top_3_matches = df.iloc[top_3_indices]
|
68 |
+
top_3_similarities = similarities[top_3_indices].numpy()
|
69 |
+
|
70 |
+
#create vertical bar
|
71 |
+
plt.bar(top_3_matches['Company'], top_3_similarities, color='skyblue')
|
72 |
+
|
73 |
+
# Set the labels and title
|
74 |
+
plt.ylabel('Similarity Score')
|
75 |
+
plt.xlabel('Company')
|
76 |
+
plt.title('Top 3 Job Description Matches')
|
77 |
+
# Create a detailed summary for the top 3 job descriptions
|
78 |
+
job_summaries = ""
|
79 |
+
for _, row in top_3_matches.iterrows():
|
80 |
+
# Translate job description if the detected language is not English
|
81 |
+
job_desc_translated = translate_job_description_if_needed(row['Job_Description'], detected_lang)
|
82 |
+
|
83 |
+
# Show job description only in the detected language if it's not English
|
84 |
+
job_summaries += f"<strong>Company:</strong> {row['Company']}<br>"
|
85 |
+
job_summaries += f"<strong>Job Description :</strong> {job_desc_translated}<br><br>"
|
86 |
+
|
87 |
+
return job_summaries, plt
|
88 |
+
|
89 |
+
# Define Gradio interface
|
90 |
+
demo = gr.Interface(
|
91 |
+
fn=find_top_matches,
|
92 |
+
inputs=gr.Textbox(lines=15, placeholder="Enter your CV text here...", label="CV Text"),
|
93 |
+
outputs=[
|
94 |
+
gr.HTML(label="Job Summaries"),
|
95 |
+
gr.Plot(label="Top 3 Matching Job Descriptions")
|
96 |
+
],
|
97 |
+
title="'Match CV to Job Description",
|
98 |
+
description="Upload your CV to find the top 3 job descriptions that match from the available companies using semantic similarity."
|
99 |
+
)
|
100 |
+
|
101 |
+
# Launch the Gradio interface in Colab
|
102 |
+
demo.launch()
|