Spaces:
Build error
Build error
import torch | |
import transformers | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics.pairwise import cosine_similarity | |
from linkedin_api import Linkedin | |
import os | |
from dotenv import load_dotenv | |
import re | |
from typing import List, Dict | |
from linkedin_v2 import linkedin | |
from transformers import AutoTokenizer, AutoModel | |
class LinkedInResumeScreeningApp: | |
def __init__(self): | |
# Load LinkedIn credentials from environment variables | |
load_dotenv() | |
self.linkedin_client_id = os.getenv('77sd5p8vsgyf2w') | |
self.linkedin_client_secret = os.getenv('WPL_AP1.q9WCdX7Yf1z6YmEy.guv26Q==') | |
self.linkedin_redirect_uri = os.getenv('https://www.linkedin.com/in/anupam-joshi-980840290/') | |
# Debug logging | |
print("LinkedIn Credentials Status:") | |
print(f"Client ID exists: {bool(self.linkedin_client_id)}") | |
print(f"Client Secret exists: {bool(self.linkedin_client_secret)}") | |
print(f"Redirect URI exists: {bool(self.linkedin_redirect_uri)}") | |
# We also need an access token | |
self.access_token = os.getenv('AQVF8Ec70U1Qyajgk1Czv8Mk0WD8fs-PtWhPgFbxfGoUTFMy3XT8OmYkzx3riZNeIf-HJGMubPz6RVABfYV1JkGCHxUOfvddCsYHcwHLJ9mzTHCNag8Knkrf3ywzp1GowBxaSssi6lgnC01VfyAaN_qiC1RlJwEya2gEHRDkEHaYbb70tSjSx2Zk1yFPUiXO-uvG0AsDR0mGuBtf0nzkbEYnzAsEuhtMnUo3NGZhc8tzNgCKga6t7NOJ5aha7XmWo3M-P0rruGUeiVsfuM_wF2NnGv1wqICUuUNZ1t4MZbOJzfEhrBgsqu7fKp5MvQD8oSvHzX75t3R8m_dg-oSfFCo7GKS7xg') | |
print(f"Access Token exists: {bool(self.access_token)}") | |
if not all([self.linkedin_client_id, self.linkedin_client_secret, | |
self.linkedin_redirect_uri, self.access_token]): | |
print("WARNING: Some LinkedIn credentials are missing!") | |
# Demo mode for testing without LinkedIn API | |
self.demo_mode = not all([self.linkedin_client_id, self.linkedin_client_secret, self.linkedin_redirect_uri]) | |
# Sample profiles for demo mode | |
self.demo_profiles = [ | |
{ | |
'name': 'John Smith', | |
'headline': 'Senior Data Scientist at Tech Corp', | |
'email': 'john.smith@example.com', | |
'profile_url': 'https://linkedin.com/in/john-smith', | |
'skills': ['Python', 'Machine Learning', 'TensorFlow', 'Deep Learning', 'NLP'], | |
'experience': 'Senior Data Scientist at Tech Corp: Leading ML projects\nData Scientist at AI Solutions: Developed predictive models' | |
}, | |
{ | |
'name': 'Sarah Johnson', | |
'headline': 'Full Stack Developer | React | Node.js', | |
'email': 'sarah.j@example.com', | |
'profile_url': 'https://linkedin.com/in/sarah-johnson', | |
'skills': ['React', 'Node.js', 'TypeScript', 'AWS', 'Docker'], | |
'experience': 'Full Stack Developer at Web Solutions: Building scalable applications\nFrontend Developer at StartupX: Developed React applications' | |
}, | |
{ | |
'name': 'Michael Chen', | |
'headline': 'Machine Learning Engineer | AI Researcher', | |
'email': 'michael.c@example.com', | |
'profile_url': 'https://linkedin.com/in/michael-chen', | |
'skills': ['PyTorch', 'Computer Vision', 'Deep Learning', 'Python', 'MLOps'], | |
'experience': 'ML Engineer at AI Labs: Developing CV models\nResearch Scientist at Tech University: Published papers on deep learning' | |
} | |
] | |
try: | |
# Initialize LinkedIn API client | |
self.authentication = linkedin.LinkedInAuthentication( | |
self.linkedin_client_id, | |
self.linkedin_client_secret, | |
self.linkedin_redirect_uri, | |
['r_liteprofile', 'r_emailaddress', 'w_member_social'] | |
) | |
if self.access_token: | |
self.authentication.token = self.access_token | |
print("Successfully set LinkedIn access token") | |
else: | |
print("No access token found - authentication will fail") | |
# Test the connection | |
self.linkedin = linkedin.LinkedInApplication(self.authentication) | |
print("LinkedIn API connection initialized successfully") | |
except Exception as e: | |
print(f"Error initializing LinkedIn API: {str(e)}") | |
print("Falling back to demo mode") | |
# Load access token if available | |
self.access_token = os.getenv('LINKEDIN_ACCESS_TOKEN') | |
if self.access_token: | |
self.authentication.token = self.access_token | |
self.linkedin = linkedin.LinkedInApplication(self.authentication) | |
# Load pre-trained model for embedding generation | |
self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') | |
self.model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') | |
# Initialize cache for LinkedIn profiles | |
self.profile_cache = {} | |
def search_linkedin_profiles(self, keywords: str, limit: int = 20) -> List[Dict]: | |
""" | |
Search LinkedIn for profiles matching the given keywords. | |
Falls back to demo profiles if LinkedIn API is not configured. | |
""" | |
if self.demo_mode: | |
print("Running in demo mode with sample profiles") | |
return self.demo_profiles | |
try: | |
# Search for people on LinkedIn using the v2 API | |
search_params = { | |
'keywords': keywords, | |
'count': limit, | |
'facet': 'network,|S,F' | |
} | |
search_results = self.linkedin.search_profile( | |
selectors=[ | |
'id', 'first-name', 'last-name', 'headline', | |
'public-profile-url', 'email-address' | |
], | |
params=search_params | |
) | |
profiles = [] | |
for profile in search_results.get('people', {}).get('values', []): | |
# Format profile data | |
formatted_profile = { | |
'name': f"{profile.get('firstName', '')} {profile.get('lastName', '')}", | |
'headline': profile.get('headline', ''), | |
'email': profile.get('emailAddress', ''), | |
'profile_url': profile.get('publicProfileUrl', ''), | |
'skills': self._get_profile_skills(profile.get('id')), | |
'experience': self._get_profile_experience(profile.get('id')) | |
} | |
profiles.append(formatted_profile) | |
return profiles | |
except Exception as e: | |
print(f"Error searching LinkedIn profiles: {str(e)}") | |
return [] | |
def _get_profile_skills(self, profile_id: str) -> List[str]: | |
""" | |
Get skills for a specific profile | |
""" | |
try: | |
skills = self.linkedin.get_profile( | |
profile_id, | |
selectors=['skills'] | |
) | |
return [skill['name'] for skill in skills.get('skills', {}).get('values', [])] | |
except: | |
return [] | |
def _get_profile_experience(self, profile_id: str) -> str: | |
""" | |
Get formatted experience for a specific profile | |
""" | |
try: | |
experience = self.linkedin.get_profile( | |
profile_id, | |
selectors=['positions'] | |
) | |
formatted = [] | |
for position in experience.get('positions', {}).get('values', []): | |
company = position.get('company', {}).get('name', '') | |
title = position.get('title', '') | |
description = position.get('summary', '') | |
formatted.append(f"{title} at {company}: {description}") | |
return '\n'.join(formatted) | |
except: | |
return '' | |
def screen_candidates(self, job_description: str, keywords: str, limit: int = 10): | |
""" | |
Screen candidates from LinkedIn based on job description | |
""" | |
# Search LinkedIn for matching profiles | |
profiles = self.search_linkedin_profiles(keywords, limit) | |
if not profiles: | |
return [{"Error": "No profiles found or LinkedIn API access error"}] | |
# Generate embeddings for job description and profiles | |
job_embed = self.generate_embeddings([job_description]) | |
profile_texts = [ | |
f"{p['headline']}\n{p['experience']}\n{' '.join(p['skills'])}" | |
for p in profiles | |
] | |
profile_embeds = self.generate_embeddings(profile_texts) | |
# Calculate similarities | |
similarities = cosine_similarity(job_embed, profile_embeds)[0] | |
# Sort profiles by similarity | |
sorted_indices = similarities.argsort()[::-1] | |
# Format results | |
results = [] | |
for idx in sorted_indices: | |
profile = profiles[idx] | |
results.append({ | |
'Name': profile['name'], | |
'Email': profile['email'], | |
'Headline': profile['headline'], | |
'Profile URL': profile['profile_url'], | |
'Similarity Score': f"{similarities[idx]*100:.2f}%" | |
}) | |
return results | |
def generate_embeddings(self, texts): | |
# Generate embeddings for given texts | |
with torch.no_grad(): | |
inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=512) | |
outputs = self.model(**inputs) | |
embeddings = self._mean_pooling(outputs, inputs['attention_mask']) | |
return embeddings | |
def _mean_pooling(self, model_output, attention_mask): | |
token_embeddings = model_output[0] | |
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() | |
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) | |
def gradio_interface(self): | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=self.screen_candidates, | |
inputs=[ | |
gr.Textbox(label="Job Description"), | |
gr.Textbox(label="LinkedIn Search Keywords"), | |
gr.Slider(minimum=5, maximum=50, value=10, step=5, label="Number of candidates to screen") | |
], | |
outputs=gr.JSON(label="Matching Candidates"), | |
title="AI Resume Screening with LinkedIn Integration", | |
description="Search LinkedIn profiles and match candidates to job descriptions using AI embeddings." | |
) | |
return iface | |
# Initialize and launch the app | |
if __name__ == "__main__": | |
app = LinkedInResumeScreeningApp() | |
demo = app.gradio_interface() | |
demo.launch(debug=True) |