ai-learning-path-generator / initialize_db.py
“shubhamdhamal”
Deploy Flask app with Docker
7644eac
"""
Initialize the vector database with sample educational resources.
This provides some starter content for the Learning Path Generator.
"""
import os
import json
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Ensure OPENAI API key is set
if not os.getenv("OPENAI_API_KEY"):
print("ERROR: OPENAI_API_KEY not set in environment variables")
print("Please update your .env file with your API key")
exit(1)
# Import after checking API key
from src.data.document_store import DocumentStore
from src.data.resources import ResourceManager
from langchain.schema.document import Document
def load_sample_resources():
"""Load sample resources from JSON file"""
resources_path = Path("samples/sample_resources.json")
if not resources_path.exists():
# Create directory if it doesn't exist
resources_path.parent.mkdir(exist_ok=True, parents=True)
# Create sample resources file with basic content
sample_resources = [
{
"title": "Introduction to Machine Learning",
"type": "course",
"description": "A comprehensive beginner's course covering ML fundamentals",
"difficulty": "beginner",
"time_estimate": "10 hours",
"url": "https://example.com/intro-ml",
"topic": "machine learning",
"learning_styles": ["visual", "reading"]
},
{
"title": "Python for Data Science Handbook",
"type": "book",
"description": "Essential guide to using Python for data analysis and ML",
"difficulty": "intermediate",
"time_estimate": "20 hours",
"url": "https://jakevdp.github.io/PythonDataScienceHandbook/",
"topic": "python,data science",
"learning_styles": ["reading"]
},
{
"title": "Web Development Bootcamp",
"type": "course",
"description": "Full stack web development from scratch",
"difficulty": "beginner",
"time_estimate": "40 hours",
"url": "https://example.com/web-dev-bootcamp",
"topic": "web development",
"learning_styles": ["visual", "kinesthetic"]
},
{
"title": "Advanced JavaScript Patterns",
"type": "video",
"description": "Deep dive into advanced JS design patterns",
"difficulty": "advanced",
"time_estimate": "3 hours",
"url": "https://example.com/js-patterns",
"topic": "javascript",
"learning_styles": ["visual", "auditory"]
},
{
"title": "Spanish Learning Podcast",
"type": "podcast",
"description": "Learn Spanish through immersive audio lessons",
"difficulty": "beginner",
"time_estimate": "10 hours",
"url": "https://example.com/spanish-podcast",
"topic": "spanish,language learning",
"learning_styles": ["auditory"]
}
]
with open(resources_path, "w") as f:
json.dump(sample_resources, f, indent=2)
print(f"Created sample resources file at {resources_path}")
return sample_resources
else:
# Load existing resources
with open(resources_path, "r") as f:
return json.load(f)
def initialize_database():
"""Initialize the vector database with sample resources"""
print("Initializing vector database...")
# Create document store
document_store = DocumentStore()
# Load sample resources
resources = load_sample_resources()
# Convert to Document objects
documents = []
for resource in resources:
# Create content from resource information
content = f"""
Title: {resource['title']}
Description: {resource['description']}
Type: {resource['type']}
Difficulty: {resource['difficulty']}
Topics: {resource.get('topic', '')}
"""
# Create metadata
metadata = {
"title": resource["title"],
"type": resource["type"],
"difficulty": resource["difficulty"],
"url": resource["url"],
"topic": resource.get("topic", "").split(",")
}
# Add learning styles if available
if "learning_styles" in resource:
metadata["learning_styles"] = resource["learning_styles"]
# Create document
doc = Document(page_content=content, metadata=metadata)
documents.append(doc)
# Add documents to vector store
document_store.add_documents(documents)
print(f"Added {len(documents)} sample resources to vector database")
# Test search functionality
print("\nTesting search functionality...")
results = document_store.search_documents("machine learning beginner", top_k=2)
print(f"Found {len(results)} results for 'machine learning beginner'")
for result in results:
print(f"- {result.metadata.get('title')} (Relevance: {result.metadata.get('relevance_score', 0):.2f})")
print("\nDatabase initialization complete!")
if __name__ == "__main__":
initialize_database()