HackBuddyAI / generate_participants.py
Mahdiyar
Add initial implementation of Hackathon Team Organizer application
1f38061
raw
history blame
17.2 kB
#!/usr/bin/env python3
"""
Generate Synthetic Hackathon Participants
This script creates 100 diverse hackathon participants and saves them to the database.
The participants represent various personas that might attend a hackathon focused on
"Connected Experiences" and AI agents.
"""
import random
import sys
import os
from faker import Faker
from hackathon_organizer.database import initialize_database, add_participant, get_participants_dataframe
# Initialize Faker for generating realistic names and emails
fake = Faker()
# Define various participant personas and attributes
TECHNICAL_BACKGROUNDS = [
# Developers
"Full-stack developer with {years} years of experience in {stack}. {additional}",
"Backend engineer specializing in {backend_tech}. {additional}",
"Frontend developer focused on {frontend_tech}. {additional}",
"Mobile developer with expertise in {mobile_tech}. {additional}",
"DevOps engineer with experience in {devops_tech}. {additional}",
"Data scientist working with {ds_tech}. {additional}",
"Machine learning engineer focused on {ml_tech}. {additional}",
"AI researcher specializing in {ai_tech}. {additional}",
"Game developer using {game_tech}. {additional}",
"Embedded systems engineer working with {embedded_tech}. {additional}",
# Technical but not coding-focused
"UX/UI designer with {years} years of experience. {additional}",
"Product manager for {product_type} products. {additional}",
"QA engineer with expertise in {qa_tech}. {additional}",
"Technical writer specializing in {writing_focus}. {additional}",
"Solution architect with background in {arch_focus}. {additional}",
]
NON_TECHNICAL_BACKGROUNDS = [
"Marketing professional with experience in {marketing_focus}. {additional}",
"Business development specialist in the {industry} industry. {additional}",
"Project manager with {years} years of experience in {pm_focus}. {additional}",
"Entrepreneur and founder of a {startup_type} startup. {additional}",
"Student studying {field} at {university}. {additional}",
"Design thinking facilitator and innovation consultant. {additional}",
"Content creator focusing on {content_focus}. {additional}",
"Sales professional in the {sales_industry} sector. {additional}",
"HR specialist interested in tech talent and culture. {additional}",
"Non-profit professional looking to leverage technology for social impact. {additional}",
]
GOALS = [
# Learning-focused
"I want to learn about AI and how it can enhance user experiences at events.",
"I'm here to understand how to build AI agents and apply them to real-world problems.",
"I hope to gain practical experience with AI technologies and expand my technical skills.",
"I want to learn from experienced developers and improve my coding abilities.",
"I'm looking to understand how AI can create more meaningful human connections.",
# Project-focused
"I want to build a prototype that demonstrates the power of AI in connecting people.",
"I'm hoping to create an innovative solution that addresses the challenges of virtual events.",
"My goal is to develop an AI agent that enhances real-world social interactions.",
"I want to build something impressive for my portfolio that showcases my skills.",
"I'm aiming to create a practical tool that event organizers can actually use.",
# Networking-focused
"I'm primarily here to network with other professionals in the AI and event space.",
"I want to meet potential co-founders for a startup idea I've been developing.",
"I'm looking to connect with mentors who can guide my career in tech.",
"I hope to find collaborators for future projects beyond this hackathon.",
"I want to expand my professional network in the Toronto tech community.",
# Career-focused
"I'm exploring career opportunities in AI development and looking to showcase my skills.",
"I want to transition from my current role to a more tech-focused position.",
"I'm hoping this experience will help me land a job at an innovative tech company.",
"I want to demonstrate my abilities to potential employers or clients.",
"I'm building skills that will help me advance in my current organization.",
# Fun/Experience-focused
"I'm here for the creative experience and the thrill of building something in 24 hours.",
"I want to have fun while challenging myself technically.",
"I'm curious about hackathons and wanted to experience one firsthand.",
"I enjoy the collaborative atmosphere of hackathons and the energy they generate.",
"I'm looking for a break from my routine and a chance to work on something different.",
]
# Technical stack components
STACK_COMPONENTS = {
"years": [str(i) for i in range(1, 16)],
"stack": [
"JavaScript/TypeScript and Python", "MERN stack", "MEAN stack", "Ruby on Rails",
"Django and React", "Vue.js and Node.js", "PHP and Laravel", "Java Spring Boot",
".NET and Angular", "Go and React", "Python Flask and Vue.js"
],
"backend_tech": [
"Node.js and Express", "Django and PostgreSQL", "Ruby on Rails", "Java Spring Boot",
"ASP.NET Core", "PHP and Laravel", "Go microservices", "Python FastAPI",
"GraphQL APIs", "Serverless architectures on AWS"
],
"frontend_tech": [
"React and Redux", "Angular and RxJS", "Vue.js and Vuex", "Svelte and SvelteKit",
"Next.js", "Gatsby", "React Native", "Flutter", "TypeScript and Material UI",
"Tailwind CSS and Alpine.js"
],
"mobile_tech": [
"React Native", "Flutter", "Swift for iOS", "Kotlin for Android",
"Xamarin", "Ionic", "PWAs", "Unity for mobile games", "NativeScript",
"Mobile AR/VR applications"
],
"devops_tech": [
"Kubernetes and Docker", "AWS infrastructure", "Azure DevOps", "Google Cloud Platform",
"CI/CD pipelines", "Terraform and infrastructure as code", "Jenkins and GitLab CI",
"Monitoring and observability tools", "Site Reliability Engineering practices",
"Security automation"
],
"ds_tech": [
"Python, Pandas, and scikit-learn", "R and Tidyverse", "SQL and data warehousing",
"Tableau and data visualization", "Big data technologies like Spark",
"ETL pipelines", "Statistical analysis", "A/B testing methodologies",
"Natural Language Processing", "Computer Vision"
],
"ml_tech": [
"TensorFlow and Keras", "PyTorch", "scikit-learn", "deep learning models",
"MLOps and model deployment", "reinforcement learning", "computer vision algorithms",
"NLP models", "recommendation systems", "time series forecasting"
],
"ai_tech": [
"large language models", "generative AI", "conversational agents", "computer vision systems",
"reinforcement learning", "multimodal AI", "AI ethics and responsible AI",
"autonomous systems", "AI for social good", "explainable AI"
],
"game_tech": [
"Unity", "Unreal Engine", "Godot", "WebGL", "AR/VR development",
"mobile game development", "game AI", "procedural generation",
"multiplayer networking", "game physics"
],
"embedded_tech": [
"Arduino", "Raspberry Pi", "IoT devices", "embedded Linux",
"RTOS", "C/C++ for microcontrollers", "sensor networks",
"firmware development", "hardware interfaces", "low-power systems"
],
"product_type": [
"SaaS", "mobile", "enterprise", "consumer", "AI-powered",
"IoT", "fintech", "healthtech", "edtech", "e-commerce"
],
"qa_tech": [
"automated testing", "Selenium and Cypress", "performance testing",
"security testing", "mobile app testing", "API testing",
"test-driven development", "behavior-driven development",
"continuous integration testing", "accessibility testing"
],
"writing_focus": [
"API documentation", "user guides", "developer tutorials",
"knowledge bases", "technical blogs", "software requirements",
"open source documentation", "technical specifications",
"UX writing", "compliance documentation"
],
"arch_focus": [
"cloud architectures", "microservices", "serverless",
"enterprise systems", "distributed systems", "API design",
"security architectures", "data platforms", "IoT systems",
"mobile and web applications"
],
"additional": [
"I enjoy working in collaborative environments.",
"I'm passionate about creating accessible technology.",
"I've contributed to several open source projects.",
"I'm interested in ethical technology and responsible innovation.",
"I enjoy mentoring junior developers.",
"I have a background in design thinking.",
"I've worked in startups and enterprise environments.",
"I'm particularly interested in AI ethics.",
"I love solving complex algorithmic problems.",
"I focus on creating user-centered solutions.",
"I have experience leading small technical teams.",
"I'm self-taught and constantly learning new technologies.",
"I have a computer science degree but learned most of my skills on the job.",
"I'm currently transitioning careers into tech.",
"I'm an advocate for diversity in tech.",
"I've organized tech meetups and community events.",
"I'm interested in the intersection of technology and sustainability.",
"I have experience in both technical and business roles.",
"I'm passionate about making technology more accessible to everyone.",
"I enjoy the challenges of working with legacy systems.",
"", # Empty for some participants
]
}
# Non-technical components
NON_TECH_COMPONENTS = {
"marketing_focus": [
"digital marketing", "content strategy", "brand development",
"social media campaigns", "event promotion", "growth hacking",
"community building", "influencer partnerships", "SEO/SEM",
"product marketing"
],
"industry": [
"technology", "healthcare", "finance", "education", "retail",
"entertainment", "manufacturing", "non-profit", "government",
"hospitality"
],
"years": [str(i) for i in range(1, 16)],
"pm_focus": [
"agile methodologies", "waterfall approaches", "hybrid frameworks",
"technical projects", "creative initiatives", "product launches",
"organizational change", "international teams", "startup environments",
"enterprise transformations"
],
"startup_type": [
"tech", "social impact", "e-commerce", "healthcare", "education",
"fintech", "sustainability", "B2B SaaS", "consumer app", "AI/ML"
],
"field": [
"Computer Science", "Business Administration", "Design", "Marketing",
"Engineering", "Data Science", "Psychology", "Communications",
"Information Technology", "Entrepreneurship"
],
"university": [
"University of Toronto", "York University", "Ryerson University",
"Seneca College", "Humber College", "OCAD University",
"George Brown College", "McMaster University", "Waterloo University",
"Queen's University"
],
"content_focus": [
"tech tutorials", "industry trends", "career development",
"product reviews", "educational content", "lifestyle and tech",
"startup stories", "coding challenges", "design inspiration",
"thought leadership"
],
"sales_industry": [
"SaaS", "hardware", "consulting services", "enterprise solutions",
"consumer tech", "B2B technology", "telecommunications",
"cybersecurity", "cloud services", "digital transformation"
],
"additional": [
"I'm excited to learn more about technology and how it can solve real problems.",
"I bring a unique perspective from my non-technical background.",
"I'm interested in the human aspects of technology.",
"I'm looking to collaborate with technical team members and contribute my skills.",
"I have strong communication and presentation skills.",
"I excel at understanding user needs and translating them into requirements.",
"I'm good at explaining complex concepts to diverse audiences.",
"I have experience managing stakeholder expectations.",
"I'm skilled at identifying market opportunities.",
"I enjoy bridging the gap between technical and non-technical teams.",
"I have a creative approach to problem-solving.",
"I'm passionate about user experience and accessibility.",
"I have a network of industry connections that could be valuable.",
"I'm experienced in gathering and synthesizing user feedback.",
"I'm interested in how technology can create social impact.",
"I have experience in project coordination and team organization.",
"I'm good at creating compelling narratives around technical products.",
"I'm curious about AI and its potential applications.",
"I have a background in psychology and understand human behavior.",
"I'm skilled at facilitating workshops and brainstorming sessions.",
"", # Empty for some participants
]
}
def generate_background(is_technical=True):
"""Generate a realistic background for a participant."""
if is_technical:
template = random.choice(TECHNICAL_BACKGROUNDS)
components = STACK_COMPONENTS
else:
template = random.choice(NON_TECHNICAL_BACKGROUNDS)
components = NON_TECH_COMPONENTS
# Fill in the template with random components
for key in components:
if "{" + key + "}" in template:
template = template.replace("{" + key + "}", random.choice(components[key]))
return template
def generate_linkedin_profile(name):
"""Generate a realistic LinkedIn profile URL based on the name."""
# Remove spaces and special characters, convert to lowercase
name_part = ''.join(c for c in name if c.isalnum()).lower()
# Add some randomness to ensure uniqueness
if random.random() < 0.3:
# Some people use just their name
profile = name_part
elif random.random() < 0.6:
# Some add a random number
profile = f"{name_part}{random.randint(1, 999)}"
else:
# Some add their profession or location
suffixes = ["dev", "tech", "to", "canada", "design", "pm", "product", "marketing", "ai"]
profile = f"{name_part}-{random.choice(suffixes)}"
return f"linkedin.com/in/{profile}"
def generate_participants(count=100):
"""Generate a specified number of diverse hackathon participants."""
participants = []
# Define the distribution of technical vs non-technical participants
# For a hackathon, we'll have more technical participants but still a good mix
technical_count = int(count * 0.7) # 70% technical
non_technical_count = count - technical_count # 30% non-technical
# Generate technical participants
for _ in range(technical_count):
name = fake.name()
email = fake.email()
linkedin = generate_linkedin_profile(name)
background = generate_background(is_technical=True)
goals = random.choice(GOALS)
participants.append({
"email": email,
"name": name,
"linkedin_profile": linkedin,
"background": background,
"goals": goals
})
# Generate non-technical participants
for _ in range(non_technical_count):
name = fake.name()
email = fake.email()
linkedin = generate_linkedin_profile(name)
background = generate_background(is_technical=False)
goals = random.choice(GOALS)
participants.append({
"email": email,
"name": name,
"linkedin_profile": linkedin,
"background": background,
"goals": goals
})
# Shuffle the participants to mix technical and non-technical
random.shuffle(participants)
return participants
def main():
"""Main function to generate participants and save them to the database."""
print("Initializing database...")
initialize_database()
print("Generating 100 diverse hackathon participants...")
participants = generate_participants(100)
print("Adding participants to the database...")
for p in participants:
add_participant(p)
print("Participants added successfully.")
# Get and display a sample of the participants
df = get_participants_dataframe()
print(f"\nTotal participants in database: {len(df)}")
print("\nSample of participants:")
print(df.sample(5))
if __name__ == "__main__":
# Check if Faker is installed
try:
import faker
except ImportError:
print("The 'faker' package is required but not installed.")
print("Please install it using: pip install faker")
sys.exit(1)
main()