Anupam251272's picture
Create app.py
a0f700b verified
import torch
import transformers
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from linkedin_api import Linkedin
import os
from dotenv import load_dotenv
import re
from typing import List, Dict
from linkedin_v2 import linkedin
from transformers import AutoTokenizer, AutoModel
class LinkedInResumeScreeningApp:
def __init__(self):
# Load LinkedIn credentials from environment variables
load_dotenv()
self.linkedin_client_id = os.getenv('77sd5p8vsgyf2w')
self.linkedin_client_secret = os.getenv('WPL_AP1.q9WCdX7Yf1z6YmEy.guv26Q==')
self.linkedin_redirect_uri = os.getenv('https://www.linkedin.com/in/anupam-joshi-980840290/')
# Debug logging
print("LinkedIn Credentials Status:")
print(f"Client ID exists: {bool(self.linkedin_client_id)}")
print(f"Client Secret exists: {bool(self.linkedin_client_secret)}")
print(f"Redirect URI exists: {bool(self.linkedin_redirect_uri)}")
# We also need an access token
self.access_token = os.getenv('AQVF8Ec70U1Qyajgk1Czv8Mk0WD8fs-PtWhPgFbxfGoUTFMy3XT8OmYkzx3riZNeIf-HJGMubPz6RVABfYV1JkGCHxUOfvddCsYHcwHLJ9mzTHCNag8Knkrf3ywzp1GowBxaSssi6lgnC01VfyAaN_qiC1RlJwEya2gEHRDkEHaYbb70tSjSx2Zk1yFPUiXO-uvG0AsDR0mGuBtf0nzkbEYnzAsEuhtMnUo3NGZhc8tzNgCKga6t7NOJ5aha7XmWo3M-P0rruGUeiVsfuM_wF2NnGv1wqICUuUNZ1t4MZbOJzfEhrBgsqu7fKp5MvQD8oSvHzX75t3R8m_dg-oSfFCo7GKS7xg')
print(f"Access Token exists: {bool(self.access_token)}")
if not all([self.linkedin_client_id, self.linkedin_client_secret,
self.linkedin_redirect_uri, self.access_token]):
print("WARNING: Some LinkedIn credentials are missing!")
# Demo mode for testing without LinkedIn API
self.demo_mode = not all([self.linkedin_client_id, self.linkedin_client_secret, self.linkedin_redirect_uri])
# Sample profiles for demo mode
self.demo_profiles = [
{
'name': 'John Smith',
'headline': 'Senior Data Scientist at Tech Corp',
'email': 'john.smith@example.com',
'profile_url': 'https://linkedin.com/in/john-smith',
'skills': ['Python', 'Machine Learning', 'TensorFlow', 'Deep Learning', 'NLP'],
'experience': 'Senior Data Scientist at Tech Corp: Leading ML projects\nData Scientist at AI Solutions: Developed predictive models'
},
{
'name': 'Sarah Johnson',
'headline': 'Full Stack Developer | React | Node.js',
'email': 'sarah.j@example.com',
'profile_url': 'https://linkedin.com/in/sarah-johnson',
'skills': ['React', 'Node.js', 'TypeScript', 'AWS', 'Docker'],
'experience': 'Full Stack Developer at Web Solutions: Building scalable applications\nFrontend Developer at StartupX: Developed React applications'
},
{
'name': 'Michael Chen',
'headline': 'Machine Learning Engineer | AI Researcher',
'email': 'michael.c@example.com',
'profile_url': 'https://linkedin.com/in/michael-chen',
'skills': ['PyTorch', 'Computer Vision', 'Deep Learning', 'Python', 'MLOps'],
'experience': 'ML Engineer at AI Labs: Developing CV models\nResearch Scientist at Tech University: Published papers on deep learning'
}
]
try:
# Initialize LinkedIn API client
self.authentication = linkedin.LinkedInAuthentication(
self.linkedin_client_id,
self.linkedin_client_secret,
self.linkedin_redirect_uri,
['r_liteprofile', 'r_emailaddress', 'w_member_social']
)
if self.access_token:
self.authentication.token = self.access_token
print("Successfully set LinkedIn access token")
else:
print("No access token found - authentication will fail")
# Test the connection
self.linkedin = linkedin.LinkedInApplication(self.authentication)
print("LinkedIn API connection initialized successfully")
except Exception as e:
print(f"Error initializing LinkedIn API: {str(e)}")
print("Falling back to demo mode")
# Load access token if available
self.access_token = os.getenv('LINKEDIN_ACCESS_TOKEN')
if self.access_token:
self.authentication.token = self.access_token
self.linkedin = linkedin.LinkedInApplication(self.authentication)
# Load pre-trained model for embedding generation
self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
self.model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
# Initialize cache for LinkedIn profiles
self.profile_cache = {}
def search_linkedin_profiles(self, keywords: str, limit: int = 20) -> List[Dict]:
"""
Search LinkedIn for profiles matching the given keywords.
Falls back to demo profiles if LinkedIn API is not configured.
"""
if self.demo_mode:
print("Running in demo mode with sample profiles")
return self.demo_profiles
try:
# Search for people on LinkedIn using the v2 API
search_params = {
'keywords': keywords,
'count': limit,
'facet': 'network,|S,F'
}
search_results = self.linkedin.search_profile(
selectors=[
'id', 'first-name', 'last-name', 'headline',
'public-profile-url', 'email-address'
],
params=search_params
)
profiles = []
for profile in search_results.get('people', {}).get('values', []):
# Format profile data
formatted_profile = {
'name': f"{profile.get('firstName', '')} {profile.get('lastName', '')}",
'headline': profile.get('headline', ''),
'email': profile.get('emailAddress', ''),
'profile_url': profile.get('publicProfileUrl', ''),
'skills': self._get_profile_skills(profile.get('id')),
'experience': self._get_profile_experience(profile.get('id'))
}
profiles.append(formatted_profile)
return profiles
except Exception as e:
print(f"Error searching LinkedIn profiles: {str(e)}")
return []
def _get_profile_skills(self, profile_id: str) -> List[str]:
"""
Get skills for a specific profile
"""
try:
skills = self.linkedin.get_profile(
profile_id,
selectors=['skills']
)
return [skill['name'] for skill in skills.get('skills', {}).get('values', [])]
except:
return []
def _get_profile_experience(self, profile_id: str) -> str:
"""
Get formatted experience for a specific profile
"""
try:
experience = self.linkedin.get_profile(
profile_id,
selectors=['positions']
)
formatted = []
for position in experience.get('positions', {}).get('values', []):
company = position.get('company', {}).get('name', '')
title = position.get('title', '')
description = position.get('summary', '')
formatted.append(f"{title} at {company}: {description}")
return '\n'.join(formatted)
except:
return ''
def screen_candidates(self, job_description: str, keywords: str, limit: int = 10):
"""
Screen candidates from LinkedIn based on job description
"""
# Search LinkedIn for matching profiles
profiles = self.search_linkedin_profiles(keywords, limit)
if not profiles:
return [{"Error": "No profiles found or LinkedIn API access error"}]
# Generate embeddings for job description and profiles
job_embed = self.generate_embeddings([job_description])
profile_texts = [
f"{p['headline']}\n{p['experience']}\n{' '.join(p['skills'])}"
for p in profiles
]
profile_embeds = self.generate_embeddings(profile_texts)
# Calculate similarities
similarities = cosine_similarity(job_embed, profile_embeds)[0]
# Sort profiles by similarity
sorted_indices = similarities.argsort()[::-1]
# Format results
results = []
for idx in sorted_indices:
profile = profiles[idx]
results.append({
'Name': profile['name'],
'Email': profile['email'],
'Headline': profile['headline'],
'Profile URL': profile['profile_url'],
'Similarity Score': f"{similarities[idx]*100:.2f}%"
})
return results
def generate_embeddings(self, texts):
# Generate embeddings for given texts
with torch.no_grad():
inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=512)
outputs = self.model(**inputs)
embeddings = self._mean_pooling(outputs, inputs['attention_mask'])
return embeddings
def _mean_pooling(self, model_output, attention_mask):
token_embeddings = model_output[0]
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
def gradio_interface(self):
# Create Gradio interface
iface = gr.Interface(
fn=self.screen_candidates,
inputs=[
gr.Textbox(label="Job Description"),
gr.Textbox(label="LinkedIn Search Keywords"),
gr.Slider(minimum=5, maximum=50, value=10, step=5, label="Number of candidates to screen")
],
outputs=gr.JSON(label="Matching Candidates"),
title="AI Resume Screening with LinkedIn Integration",
description="Search LinkedIn profiles and match candidates to job descriptions using AI embeddings."
)
return iface
# Initialize and launch the app
if __name__ == "__main__":
app = LinkedInResumeScreeningApp()
demo = app.gradio_interface()
demo.launch(debug=True)