siddhartharya's picture
Update app.py
7a5f3e6 verified
import os
import requests
import gradio as gr
from openai import OpenAI
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
# Fetch API keys from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PROXYCURL_API_KEY = os.getenv("PROXYCURL_API_KEY")
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
# Function to sanitize and validate data
def sanitize_data(data, default_value=""):
return data.strip() if isinstance(data, str) and data.strip() else default_value
# Function to fetch LinkedIn data using the Proxycurl API
def fetch_linkedin_data(linkedin_url):
api_key = os.getenv("PROXYCURL_API_KEY")
headers = {'Authorization': f'Bearer {api_key}'}
api_endpoint = 'https://nubela.co/proxycurl/api/v2/linkedin'
logging.info("Fetching LinkedIn data...")
try:
response = requests.get(api_endpoint,
params={'url': linkedin_url},
headers=headers,
timeout=10)
if response.status_code == 200:
logging.info("LinkedIn data fetched successfully.")
return response.json()
else:
logging.error(f"Error fetching LinkedIn data: {response.text}")
return None
except Exception as e:
logging.error(f"Exception during LinkedIn data fetch: {e}")
return None
# Function to fetch company information using Firecrawl API
def fetch_company_info(company_url):
api_key = os.getenv("FIRECRAWL_API_KEY")
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
api_endpoint = 'https://api.firecrawl.dev/v1/crawl'
data = {
"url": company_url,
"limit": 100,
"scrapeOptions": {
"formats": ["markdown", "html"]
}
}
logging.info("Fetching company information...")
try:
response = requests.post(api_endpoint, json=data, headers=headers, timeout=15)
if response.status_code == 200:
logging.info("Company information fetched successfully.")
return response.json()
else:
logging.error(f"Error fetching company information: {response.text}")
return None
except Exception as e:
logging.error(f"Exception during company info fetch: {e}")
return None
# Function to structure the email dynamically with fallback for missing data
def structure_email(user_data, linkedin_info, company_info):
linkedin_role = sanitize_data(linkedin_info.get('current_role', user_data['role']))
linkedin_skills = sanitize_data(linkedin_info.get('skills', 'relevant skills'))
linkedin_industry = sanitize_data(linkedin_info.get('industry', 'the industry'))
company_name = sanitize_data(user_data['company_url'] or company_info.get('company_name', 'the company'))
company_mission = sanitize_data(company_info.get('mission', f"{company_name}'s mission"))
company_goal = sanitize_data(company_info.get('goal', 'achieving excellence'))
# If essential data is missing, fill with defaults to ensure email has some content
if not linkedin_role:
linkedin_role = user_data['role']
if not linkedin_skills:
linkedin_skills = "skills relevant to this position"
if not linkedin_industry:
linkedin_industry = "the industry"
if not company_mission:
company_mission = f"{company_name}'s mission"
if not company_goal:
company_goal = "the company's goals"
# Construct the email with fully sanitized and available data
email_body = (
f"Dear Hiring Manager,\n\n"
f"I am writing to express my interest in the {sanitize_data(user_data['role'])} position at {company_name}. "
f"{company_mission} aligns closely with my professional experience in {linkedin_industry}. "
f"As a {linkedin_role}, I have developed expertise in {linkedin_skills}, which are highly relevant to this role.\n\n"
f"My background in {linkedin_skills} will contribute significantly to {company_goal}. "
f"I am eager to bring my expertise to {company_name} and collaborate with your team.\n\n"
f"I would appreciate the opportunity to discuss how my background aligns with the needs of your organization. "
f"Thank you for your time and consideration. I look forward to the possibility of contributing to your team.\n\n"
f"Best regards,\n{sanitize_data(user_data['name'])}"
)
return email_body
# Function to validate the generated email based on critical components with improved flexibility
def validate_email(email_content, user_data):
logging.info("Validating email content...")
# Basic components we want to check in the email
required_keywords = [
user_data['name'],
user_data['role'],
"skills",
"experience",
"contribute",
"Best regards"
]
# Check if the email contains all the required elements, allow some flexibility
missing_elements = [keyword for keyword in required_keywords if keyword.lower() not in email_content.lower()]
if missing_elements:
logging.info(f"Missing elements: {missing_elements}")
return False
else:
logging.info("Email content validation passed.")
return True
# Custom Agent class following ReAct pattern
class Agent:
def __init__(self, name, instructions, user_data):
self.name = name
self.instructions = instructions
self.user_data = user_data
def act(self):
if self.name == "Data Collection Agent":
linkedin_info = fetch_linkedin_data(self.user_data['linkedin_url'])
company_info = fetch_company_info(self.user_data['company_url'])
if linkedin_info and company_info:
return linkedin_info, company_info
else:
return None, None
elif self.name == "Email Generation Agent":
linkedin_info = self.user_data['linkedin_info']
company_info = self.user_data['company_info']
prompt = structure_email(self.user_data['user_data'], linkedin_info, company_info)
return prompt
# Simulated Swarm class to manage multiple agents
class Swarm:
def __init__(self):
self.agents = []
def add_agent(self, agent):
self.agents.append(agent)
def run(self):
# The data collection agent acts first
linkedin_info, company_info = self.agents[0].act()
if not linkedin_info or not company_info:
return "Error: Could not retrieve data for LinkedIn or company information."
return linkedin_info, company_info
# Function to run the agent, using Swarm and ReAct
def run_agent(name, email, phone, linkedin_url, company_url, role):
user_data = {
"name": name,
"email": email,
"phone": phone,
"linkedin_url": linkedin_url,
"company_url": company_url,
"role": role
}
# Initialize Swarm and add the Data Collection Agent
email_swarm = Swarm()
data_collection_agent = Agent("Data Collection Agent", "Collect user inputs and relevant data", user_data)
email_swarm.add_agent(data_collection_agent)
linkedin_info, company_info = email_swarm.run()
if isinstance(linkedin_info, str):
return linkedin_info
agent_data = {
"user_data": user_data,
"linkedin_info": linkedin_info,
"company_info": company_info
}
email_agent = Agent("Email Generation Agent", "Generate the email content", agent_data)
email_content = email_agent.act()
# Iterative refinement using ReAct pattern
max_iterations = 3
for i in range(max_iterations):
if validate_email(email_content, user_data):
return email_content
else:
logging.info(f"Iteration {i+1}: Refining email...")
email_content = structure_email(user_data, linkedin_info, company_info)
return "Unable to generate a valid email after 3 attempts."
# Set up the Gradio interface
final_interface = gr.Interface(
fn=run_agent,
inputs=[
gr.Textbox(label="Name"),
gr.Textbox(label="Email"),
gr.Textbox(label="Phone Number"),
gr.Textbox(label="LinkedIn Profile URL"),
gr.Textbox(label="Company URL or Name"),
gr.Textbox(label="Role Being Applied For")
],
outputs="text",
title="Email Writing AI Agent",
description="Autonomously generate a professional email tailored to the job application."
)
if __name__ == "__main__":
final_interface.launch()