diff --git a/prev_backend_v0/backend/__pycache__/config.cpython-310.pyc b/prev_backend_v0/backend/__pycache__/config.cpython-310.pyc deleted file mode 100644 index 38f9237afe72e4b30ce819a9b3c3834d303b0d70..0000000000000000000000000000000000000000 Binary files a/prev_backend_v0/backend/__pycache__/config.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v0/backend/__pycache__/config.cpython-312.pyc b/prev_backend_v0/backend/__pycache__/config.cpython-312.pyc deleted file mode 100644 index d50c3cf19ad1710922db5ad653c0c5097bfb38ad..0000000000000000000000000000000000000000 Binary files a/prev_backend_v0/backend/__pycache__/config.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v0/backend/__pycache__/database.cpython-310.pyc b/prev_backend_v0/backend/__pycache__/database.cpython-310.pyc deleted file mode 100644 index 0ac106045c9ea5bff004fb9383dea8140b427016..0000000000000000000000000000000000000000 Binary files a/prev_backend_v0/backend/__pycache__/database.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v0/backend/__pycache__/database.cpython-312.pyc b/prev_backend_v0/backend/__pycache__/database.cpython-312.pyc deleted file mode 100644 index 15ad4ca043ffd91b50687df43d5b8f173214b797..0000000000000000000000000000000000000000 Binary files a/prev_backend_v0/backend/__pycache__/database.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v0/backend/__pycache__/main.cpython-310.pyc b/prev_backend_v0/backend/__pycache__/main.cpython-310.pyc deleted file mode 100644 index 60784ac2618fd43d4ee8b1daef6e6c210c798f36..0000000000000000000000000000000000000000 Binary files a/prev_backend_v0/backend/__pycache__/main.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v0/backend/__pycache__/main.cpython-312.pyc b/prev_backend_v0/backend/__pycache__/main.cpython-312.pyc deleted file mode 100644 index 7df3a2fad29562fd67ad0763a216f3a2f2366b71..0000000000000000000000000000000000000000 Binary files a/prev_backend_v0/backend/__pycache__/main.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v0/backend/config.py b/prev_backend_v0/backend/config.py deleted file mode 100644 index bb80d452c0987a15c8a8f63ca463510b22b0dea0..0000000000000000000000000000000000000000 --- a/prev_backend_v0/backend/config.py +++ /dev/null @@ -1,262 +0,0 @@ -language_metadata_extraction_prompt = """ -You are a language learning assistant. Your task is to analyze the user's input and infer their: -- Native language (use the language of the input as a fallback if unsure) -- Target language (the one they want to learn) -- Proficiency level (beginner, intermediate, or advanced) - -Respond ONLY with a valid JSON object using the following format: - -{ - "native_language": "", - "target_language": "", - "proficiency_level": "" -} - -Guidelines: -- If the user's native language is not explicitly stated, assume it's the same as the language used in the query. -- If the target language is mentioned indirectly (e.g. "my Dutch isn't great"), infer that as the target language. -- Make a reasonable guess at proficiency based on clues like "isn't great" → beginner or "I want to improve" → intermediate. -- If you cannot infer something at all, write "unknown". - -Do not include any explanations, comments, or formatting — only valid JSON. -""" - -flashcard_mode_instructions = """ -# Metadata: -# Native language: {native_language} -# Target language: {target_language} -# Proficiency level: {proficiency} - -You are a highly adaptive vocabulary tutor capable of teaching any language. Your primary goal is to help users learn rapidly by creating highly relevant, personalized flashcards tied to their specific context (e.g., hobbies, work, studies). - -### Context Format -You will receive a series of messages in the following structure: -[ - {"role": "user", "content": ""}, - {"role": "assistant", "content": ""}, - ... -] -Treat this list as prior conversation history. Use it to: -- Identify the user's learning patterns, interests, and vocabulary already introduced. -- Avoid repeating previously generated flashcards. -- Adjust difficulty based on progression. - -### Generation Guidelines -When generating a new set of flashcards: -1. **Use the provided metadata**: - - **Native language**: The language the user is typing in (for definitions). - - **Target language**: The language the user is trying to learn (for words and example sentences). - - **Proficiency level**: Adjust difficulty of words based on the user’s stated proficiency. - -2. **Avoid repetition**: - - If a word has already been introduced in a previous flashcard, do not repeat it. - - Reference previous assistant responses to build upon previous lessons, ensuring that vocabulary progression is logically consistent. - -3. **Adjust content based on proficiency**: - - For **beginner** users, use basic, high-frequency vocabulary. - - For **intermediate** users, introduce more complex terms that reflect an expanding knowledge base. - - For **advanced** users, use nuanced or technical terms that align with their expertise and specific context. - -4. **Domain relevance**: - - Make sure the words and examples are specific to the user’s context (e.g., their profession, hobbies, or field of study). - - Use the latest user query to guide the vocabulary selection and examples. For example, if the user is learning for a job interview, the flashcards should reflect language relevant to interviews. - -### Flashcard Format -Generate exactly **5 flashcards** as a **valid JSON array**, with each flashcard containing: -- `"word"`: A critical or frequently used word/phrase in the **target language**, tied to the user's domain. -- `"definition"`: A concise, learner-friendly definition in the **base language** (the user’s native language). -- `"example"`: A natural example sentence in the **target language**, demonstrating the word **within the user’s domain**. - -### Example Query and Expected Output - -#### Example Query: -User: "Flashcards for my hobby: landscape photography in German (intermediate level, base: English)" - -#### Example Output: -```json -[ - {"word": "Belichtung", "definition": "exposure (photography)", "example": "Die richtige Belichtung ist entscheidend für ein gutes Landschaftsfoto."}, - {"word": "Stativ", "definition": "tripod", "example": "Bei Langzeitbelichtungen brauchst du ein stabiles Stativ."}, - {"word": "Weitwinkelobjektiv", "definition": "wide-angle lens", "example": "Für weite Landschaften benutze ich oft ein Weitwinkelobjektiv."}, - {"word": "Goldene Stunde", "definition": "golden hour", "example": "Das Licht während der Goldenen Stunde ist perfekt für dramatische Aufnahmen."}, - {"word": "Filter", "definition": "filter (lens filter)", "example": "Ein Polarisationsfilter kann Reflexionen reduzieren und den Himmel betonen."} -] -""" - -exercise_mode_instructions = """ -# Metadata: -# Native language: {native_language} -# Target language: {target_language} -# Proficiency level: {proficiency} - -You are a smart, context-aware language exercise generator. Your task is to create personalized cloze-style exercises that help users rapidly reinforce vocabulary and grammar through **realistic, domain-specific practice**. You support any language. - -### Context Format -You will receive a list of previous messages: -[ - {"role": "user", "content": ""}, - {"role": "assistant", "content": ""} -] -Treat this list as prior conversation history. Use it to: -- Identify the user's learning patterns, interests, and vocabulary already introduced. -- Avoid repeating exercises or vocabulary. -- Ensure progression in complexity or topic coverage. -- Maintain continuity with the user’s learning focus. - -### Generation Task -When generating a new set of exercises: -1. **Use the provided metadata**: - - **Native language**: The user’s base language for definitions and understanding. - - **Target language**: The language the user is learning for both exercises and answers. - - **Proficiency level**: Adjust the complexity of the exercises based on the user's proficiency (beginner, intermediate, advanced). - -2. **Domain relevance**: - - Focus on the **domain of interest** (e.g., work, hobby, study area). - - Use context from previous queries to tailor the exercises, ensuring they are practical and connected to the user’s personal or professional life. - -3. **Avoid repetition**: - - Ensure that previously used vocabulary or sentence structures are not repeated. - - Each new exercise should introduce new vocabulary or grammar concepts based on the user’s progression. - -4. **Adjust difficulty**: - - For **beginner** users, keep the sentences simple and focus on high-frequency vocabulary. - - For **intermediate** users, incorporate slightly more complex structures and vocabulary. - - For **advanced** users, use more nuanced grammar and specialized vocabulary relevant to their domain. - -### Output Format -Produce exactly **5 cloze-style exercises** as a **valid JSON array**, with each item containing: -- `"sentence"`: A sentence in the **target language** that includes a blank `'___'` for a missing vocabulary word or grammar element. The sentence should be relevant to the user’s domain of interest. -- `"answer"`: The correct word or phrase to fill in the blank. -- `"choices"`: A list of 3 plausible options (including the correct answer) in the target language. Distractors should be believable but clearly incorrect in context. - -### Example Query and Expected Output - -#### Example Query: -User: "Beginner French exercises about my work in marketing (base: English)" - -#### Expected Output: -```json -[ - {"sentence": "Nous devons lancer la nouvelle ___ le mois prochain.", "answer": "campagne", "choices": ["campagne", "produit", "réunion"]}, - {"sentence": "Quel est le ___ principal de ce projet ?", "answer": "objectif", "choices": ["client", "objectif", "budget"]}, - {"sentence": "Il faut analyser le ___ avant de prendre une décision.", "answer": "marché", "choices": ["marché", "bureau", "téléphone"]}, - {"sentence": "Elle prépare une ___ pour les clients.", "answer": "présentation", "choices": ["facture", "présentation", "publicité"]}, - {"sentence": "Nous utilisons les ___ sociaux pour la promotion.", "answer": "réseaux", "choices": ["médias", "réseaux", "journaux"]} -] -""" - -simulation_mode_instructions = """ -# Metadata: -# Native language: {native_language} -# Target language: {target_language} -# Proficiency level: {proficiency} - -You are a **creative, context-aware storytelling engine**. Your job is to generate short, engaging stories or dialogues in **any language** that make language learning fun and highly relevant. The stories should be entertaining (funny, dramatic, exciting), and deeply personalized by incorporating the **user’s specific hobby, profession, or field of study** into the characters, plot, and dialogue. - -### Context Format -You will receive a list of prior messages: -[ - {"role": "user", "content": ""}, - {"role": "assistant", "content": ""} -] -Treat this list as prior conversation history. Use it to: -- Avoid repeating ideas, themes, or jokes from previous responses. -- Build on past tone, vocabulary, or characters if appropriate. -- Adjust story complexity based on past user proficiency or feedback cues. - -### Story Generation Task -From the latest user message: -1. **Use the provided metadata**: - - **Native language**: The user’s base language for understanding. - - **Target language**: The language the user is learning. - - **Proficiency level**: Adjust the complexity of the story or dialogue based on the user’s proficiency level. - -2. **Domain relevance**: - - Focus on the **user's domain of interest** (e.g., work, hobby, field of study). - - Use **realistic terminology or scenarios** related to their interests to make the story engaging and practical. - -3. **Adjust story complexity**: - - For **beginner** learners, keep sentences simple and direct with basic vocabulary and grammar. - - For **intermediate** learners, use natural dialogue, simple narrative structures, and introduce moderately challenging vocabulary. - - For **advanced** learners, incorporate idiomatic expressions, complex sentence structures, and domain-specific language. - -4. **Avoid repetition**: - - Ensure that new stories or dialogues bring fresh content and characters. Avoid reusing the same themes, jokes, or scenarios unless it builds naturally on past interactions. - -5. **Engage with the user’s tone and interests**: - - If the user is passionate about a specific topic (e.g., cooking, space exploration, or law), integrate that into the story. If the user likes humor, use a fun tone; for drama or excitement, make the story engaging with conflict or high stakes. - -### Output Format -Return a valid **JSON object** with the following structure: -- `"title"`: An engaging title in the **native language**. -- `"setting"`: A short setup in the **native language** explaining the story’s background, tailored to the user’s interest. -- `"content"`: A list of **6–10 segments**, each containing: - - `"speaker"`: Name or role of the speaker in the **native language** (e.g., "Narrator", "Professor Lee", "The Engineer"). - - `"target_language_text"`: Sentence in the **target language**. - - `"phonetics"`: Standardized phonetic transcription (IPA, Pinyin, etc.) if applicable and helpful. Omit if unavailable or not useful. - - `"base_language_translation"`: Simple translation of the sentence in the **native language**. - -### Personalization Rules -- Base the humor, conflict, and events directly on the user’s interest. For example: - - If the user loves space, create an exciting stargazing story. - - If they study law, create a courtroom dialogue with legal terms. - - If they’re into cooking, make the story about a cooking adventure. -- Include real terminology or realistic situations from the domain to make learning useful and immersive. -- Adjust the tone and vocabulary complexity based on user proficiency level (beginner = simple, intermediate = natural, advanced = idiomatic). -- Keep the pacing tight — avoid overly long narrations or explanations. - -### Output Instructions -Return only the final **JSON object**. Do not include: -- Explanations -- Notes -- Comments -- Markdown formatting - -### Example User Input -"Funny story for intermediate French learner about cooking hobby (base: English)" - -### Example Output (French) -```json -{ - "title": "La Panique de la Paella", - "setting": "Pierre essaie d'impressionner ses amis en cuisinant une paella espagnole authentique pour la première fois.", - "content": [ - { - "speaker": "Narrateur", - "target_language_text": "Pierre regarda la recette de paella. Cela semblait facile.", - "phonetics": "pjeʁ ʁəɡaʁda la ʁesɛt də paɛʎa. sə.la sɛ̃blɛ ɛ.fa.sil", - "base_language_translation": "Pierre looked at the paella recipe. It seemed easy." - }, - { - "speaker": "Pierre", - "target_language_text": "Il me faut du safran! Où est le safran?", - "phonetics": "il mə fo dy sa.fʁɑ̃! u ɛ lə sa.fʁɑ̃", - "base_language_translation": "I need saffron! Where is the saffron?" - }, - { - "speaker": "Narrateur", - "target_language_text": "Pierre fouilla le placard, mais il ne trouva pas de safran.", - "phonetics": "pjeʁ fwi.jɑ lə pla.kɑʁ, mɛ il nə tʁu.va pa də sa.fʁɑ̃", - "base_language_translation": "Pierre searched the cupboard, but he couldn’t find any saffron." - }, - { - "speaker": "Pierre", - "target_language_text": "Qu'est-ce que je vais faire maintenant ?", - "phonetics": "kɛs.kə ʒə vɛ fɛʁ mɛ̃tə.nɑ̃?", - "base_language_translation": "What am I going to do now?" - }, - { - "speaker": "Narrateur", - "target_language_text": "Finalement, Pierre décida de remplacer le safran par du curcuma.", - "phonetics": "fi.nal.mɑ̃ pjeʁ de.si.da də ʁɑ̃.pla.sə lə sa.fʁɑ̃ paʁ dy kyʁ.ky.ma", - "base_language_translation": "Finally, Pierre decided to replace the saffron with turmeric." - }, - { - "speaker": "Pierre", - "target_language_text": "C'est presque pareil, non ?", - "phonetics": "sɛ pʁɛs.kə paʁɛj, nɔ̃?", - "base_language_translation": "It's almost the same, right?" - } - ] -} -""" \ No newline at end of file diff --git a/prev_backend_v0/backend/database.py b/prev_backend_v0/backend/database.py deleted file mode 100644 index ad5a921b245a594bb25f43fb757c461be249cf4e..0000000000000000000000000000000000000000 --- a/prev_backend_v0/backend/database.py +++ /dev/null @@ -1,293 +0,0 @@ -import psycopg2 -import os -from psycopg2 import sql -from dotenv import load_dotenv - -load_dotenv() - -# Database Configuration from environment variables -DB_NAME = os.getenv("POSTGRES_DB", "linguaai") -DB_USER = os.getenv("POSTGRES_USER", "linguaai_user") -DB_PASSWORD = os.getenv("POSTGRES_PASSWORD", "LinguaAI1008") -DB_HOST = os.getenv("DB_HOST", "localhost") -DB_PORT = os.getenv("DB_PORT", "5432") - -# SQL Schema Definition -SCHEMA_SQL = """ --- Drop existing objects if they exist --- Note: Some drops below might be for tables not defined in this specific script. -DROP TABLE IF EXISTS user_activity_progress CASCADE; -DROP TABLE IF EXISTS activities CASCADE; -DROP TABLE IF EXISTS weekly_modules CASCADE; -DROP TABLE IF EXISTS curriculums CASCADE; -DROP TABLE IF EXISTS generated_flashcards CASCADE; -DROP TABLE IF EXISTS flashcard_sets CASCADE; -- Corrected name -DROP TABLE IF EXISTS generated_exercises CASCADE; -DROP TABLE IF EXISTS exercise_sets CASCADE; -- Corrected name -DROP TABLE IF EXISTS simulations CASCADE; -- Corrected name -DROP TABLE IF EXISTS users CASCADE; -DROP TYPE IF EXISTS activity_status CASCADE; - --- Table `users` -CREATE TABLE users ( - user_id SERIAL PRIMARY KEY, - username VARCHAR(50) UNIQUE NOT NULL, - email VARCHAR(100) UNIQUE NOT NULL, - password_hash VARCHAR(255) NOT NULL, - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -); - --- Trigger function (remains the same) -CREATE OR REPLACE FUNCTION update_updated_at_column() -RETURNS TRIGGER AS $$ -BEGIN - NEW.updated_at = now(); - RETURN NEW; -END; -$$ language 'plpgsql'; - --- Trigger for users (remains the same) -CREATE TRIGGER users_update_updated_at - BEFORE UPDATE ON users - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); - - --- ============================================ --- Tables for Generated Content (Flashcards) --- ============================================ - --- Table `flashcard_sets` (Represents one request/query) -CREATE TABLE flashcard_sets ( - id SERIAL PRIMARY KEY, - user_id INTEGER NOT NULL REFERENCES users(user_id), -- Added FK reference for completeness - query TEXT NOT NULL, - flashcards JSONB NOT NULL, -- Stores an array of 5 flashcards - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- Added updated_at for consistency -); - -CREATE INDEX idx_flashcard_set_user ON flashcard_sets(user_id); - --- Corrected Trigger definition for flashcard_sets -CREATE TRIGGER flashcard_sets_update_updated_at -- Renamed trigger - BEFORE UPDATE ON flashcard_sets -- Corrected table name - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); -- Assumes you want updated_at here too - --- Table `generated_flashcards` (Individual flashcards within a set) -CREATE TABLE generated_flashcards ( - flashcard_id SERIAL PRIMARY KEY, - set_id INT NOT NULL REFERENCES flashcard_sets(id) ON DELETE CASCADE, -- Corrected FK reference (table and column) - word TEXT NOT NULL, - definition TEXT NOT NULL, - example TEXT, -- Example might be optional - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX idx_flashcard_set ON generated_flashcards(set_id); - --- Trigger for generated_flashcards (remains the same) -CREATE TRIGGER generated_flashcards_update_updated_at - BEFORE UPDATE ON generated_flashcards - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); - - --- ============================================ --- Tables for Generated Content (Exercises) --- ============================================ - --- Table `exercise_sets` (Represents one request/query) -- Corrected comment -CREATE TABLE exercise_sets ( - id SERIAL PRIMARY KEY, - user_id INTEGER NOT NULL REFERENCES users(user_id), -- Added FK reference for completeness - query TEXT NOT NULL, - exercises JSONB NOT NULL, -- Array of 5 exercises - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- Added updated_at for consistency -); - -CREATE INDEX idx_exercise_set_user ON exercise_sets(user_id); -- Corrected table name (was already correct but double-checked) - --- Corrected Trigger definition for exercise_sets -CREATE TRIGGER exercise_sets_update_updated_at -- Renamed trigger - BEFORE UPDATE ON exercise_sets -- Corrected table name - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); -- Assumes you want updated_at here too - --- Table `generated_exercises` (Individual exercises within a set) -CREATE TABLE generated_exercises ( - exercise_id SERIAL PRIMARY KEY, - set_id INT NOT NULL REFERENCES exercise_sets(id) ON DELETE CASCADE, -- Corrected FK reference (table and column) - sentence TEXT NOT NULL, - answer TEXT NOT NULL, - choices JSONB NOT NULL, -- Storing the array of choices - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX idx_exercise_set ON generated_exercises(set_id); - --- Trigger for generated_exercises (remains the same) -CREATE TRIGGER generated_exercises_update_updated_at - BEFORE UPDATE ON generated_exercises - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); - - --- ============================================ --- Table for Generated Content (Simulations) --- ============================================ - --- Table `simulations` (Represents one simulation request/result) -- Corrected comment -CREATE TABLE simulations ( - id SERIAL PRIMARY KEY, - user_id INTEGER NOT NULL REFERENCES users(user_id), -- Added FK reference for completeness - query TEXT NOT NULL, - scenario TEXT NOT NULL, - dialog JSONB NOT NULL, -- Array of turns with 'role', 'chinese', 'pinyin', 'english' - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- Added updated_at for consistency -); - -CREATE INDEX idx_simulation_user ON simulations(user_id); -- Corrected table name - --- Corrected Trigger definition for simulations -CREATE TRIGGER simulations_update_updated_at -- Renamed trigger - BEFORE UPDATE ON simulations -- Corrected table name - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); -- Assumes you want updated_at here too -""" - -def get_db_connection(): - """Get a synchronous database connection.""" - try: - conn = psycopg2.connect( - dbname=DB_NAME, - user=DB_USER, - password=DB_PASSWORD, - host=DB_HOST, - port=DB_PORT - ) - return conn - except psycopg2.Error as e: - print(f"Database connection error: {e}") - raise - -def reset_sequences(): - """Generate SQL to reset all sequences (auto-incrementing IDs) to 1.""" - sequences_sql = """ - SELECT 'ALTER SEQUENCE ' || sequence_name || ' RESTART WITH 1;' - FROM information_schema.sequences - WHERE sequence_schema = 'public'; - """ - return sequences_sql - -def reset_database(confirm=True): - """Reset the database by dropping all tables and recreating them.""" - if confirm: - user_confirm = input("WARNING: This will DELETE ALL DATA. Type 'yes' to proceed: ") - if user_confirm.lower() != 'yes': - print("Database reset cancelled.") - return - - conn = None - try: - conn = get_db_connection() - conn.autocommit = False - print("Database connection established.") - - with conn.cursor() as cur: - print("Dropping and recreating schema...") - # Execute the main schema SQL (includes drops) - cur.execute(SCHEMA_SQL) - print("Schema recreated successfully.") - - # Generate and execute sequence reset SQL - print("Resetting sequences...") - reset_sql_query = reset_sequences() - cur.execute(reset_sql_query) - reset_commands = cur.fetchall() - for command in reset_commands: - cur.execute(command[0]) - print("Sequences reset successfully.") - - conn.commit() - print("Database reset complete.") - - except psycopg2.Error as e: - print(f"Database error during reset: {e}") - if conn: - conn.rollback() - print("Transaction rolled back.") - except Exception as e: - print(f"An unexpected error occurred during reset: {e}") - if conn: - conn.rollback() - finally: - if conn: - conn.close() - print("Database connection closed.") - -def setup_database(confirm=True): - """Set up the database schema if tables do not exist.""" - if confirm: - user_confirm = input("Do you want to set up the database? Type 'yes' to proceed: ") - if user_confirm.lower() != 'yes': - print("Database setup cancelled.") - return - - conn = None - try: - conn = get_db_connection() - conn.autocommit = False - print("Database connection established.") - - with conn.cursor() as cur: - print("Checking if tables exist...") - cur.execute(""" - SELECT EXISTS ( - SELECT FROM information_schema.tables - WHERE table_schema = 'public' - AND table_name = 'users' - ); - """) - tables_exist = cur.fetchone()[0] - - if tables_exist: - print("Tables already exist. Use reset_database() to reset the database or run setup with confirm=False.") - conn.rollback() # Rollback as no changes should be made - return - - print("Creating schema...") - cur.execute(SCHEMA_SQL) - print("Schema created successfully.") - - conn.commit() - print("Database setup complete.") - - except psycopg2.Error as e: - print(f"Database error during setup: {e}") - if conn: - conn.rollback() - print("Transaction rolled back.") - except Exception as e: - print(f"An unexpected error occurred during setup: {e}") - if conn: - conn.rollback() - finally: - if conn: - conn.close() - print("Database connection closed.") - -if __name__ == "__main__": - action = input("Enter 'setup' to setup database or 'reset' to reset database: ").lower() - if action == 'reset': - reset_database() - elif action == 'setup': - setup_database() - else: - print("Invalid action. Use 'setup' or 'reset'.") \ No newline at end of file diff --git a/prev_backend_v0/backend/main.py b/prev_backend_v0/backend/main.py deleted file mode 100644 index 8af509bdb6214246e36e22b0f7593f74777add40..0000000000000000000000000000000000000000 --- a/prev_backend_v0/backend/main.py +++ /dev/null @@ -1,155 +0,0 @@ -from fastapi import FastAPI, HTTPException -from fastapi.responses import JSONResponse -from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel -from backend.utils import generate_completions -from backend import config -from backend.database import get_db_connection -import psycopg2 -from psycopg2.extras import RealDictCursor -from typing import Union, List, Literal, Optional -import logging -import json - -logging.basicConfig(level=logging.INFO) - -app = FastAPI() - -# Add CORS middleware -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # Allows all origins - allow_credentials=True, - allow_methods=["*"], # Allows all methods - allow_headers=["*"], # Allows all headers -) - -# Dependency to get database connection -async def get_db(): - conn = await get_db_connection() - try: - yield conn - finally: - conn.close() - -# class GenerationRequest(BaseModel): -# user_id: int -# query: str - -class Message(BaseModel): - role: Literal["user", "assistant"] - content: str - -class GenerationRequest(BaseModel): - user_id: int - query: Union[str, List[Message]] - -class MetadataRequest(BaseModel): - query: str - -# Global metadata variables -native_language: Optional[str] = None -target_language: Optional[str] = None -proficiency: Optional[str] = None - -@app.get("/") -async def root(): - return {"message": "Welcome to the AI Learning Assistant API!"} - -@app.post("/extract/metadata") -async def extract_metadata(data: MetadataRequest): - try: - response_str = await generate_completions.get_completions( - data.query, - config.language_metadata_extraction_prompt - ) - metadata_dict = json.loads(response_str) - # Update globals for other endpoints - globals()['native_language'] = metadata_dict.get('native_language', 'unknown') - globals()['target_language'] = metadata_dict.get('target_language', 'unknown') - globals()['proficiency'] = metadata_dict.get('proficiency_level', 'unknown') - return JSONResponse( - content={ - "data": metadata_dict, - "type": "language_metadata", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/flashcards") -async def generate_flashcards(data: GenerationRequest): - try: - # Use previously extracted metadata - instructions = ( - config.flashcard_mode_instructions - .replace("{native_language}", native_language or "unknown") - .replace("{target_language}", target_language or "unknown") - .replace("{proficiency}", proficiency or "unknown") - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "flashcards", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/exercises") -async def generate_exercises(data: GenerationRequest): - try: - # Use previously extracted metadata - instructions = ( - config.exercise_mode_instructions - .replace("{native_language}", native_language or "unknown") - .replace("{target_language}", target_language or "unknown") - .replace("{proficiency}", proficiency or "unknown") - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "exercises", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/simulation") -async def generate_simulation(data: GenerationRequest): - try: - # Use previously extracted metadata - instructions = ( - config.simulation_mode_instructions - .replace("{native_language}", native_language or "unknown") - .replace("{target_language}", target_language or "unknown") - .replace("{proficiency}", proficiency or "unknown") - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "simulation", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/prev_backend_v0/backend/utils/__pycache__/generate_completions.cpython-310.pyc b/prev_backend_v0/backend/utils/__pycache__/generate_completions.cpython-310.pyc deleted file mode 100644 index b515240d65695a530ce3ca667042d874bd615015..0000000000000000000000000000000000000000 Binary files a/prev_backend_v0/backend/utils/__pycache__/generate_completions.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v0/backend/utils/__pycache__/generate_completions.cpython-312.pyc b/prev_backend_v0/backend/utils/__pycache__/generate_completions.cpython-312.pyc deleted file mode 100644 index ed59f04cbda6a1b391b74493f0ccfb8d38e8625f..0000000000000000000000000000000000000000 Binary files a/prev_backend_v0/backend/utils/__pycache__/generate_completions.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v1/prev_backend_v/__pycache__/config.cpython-310.pyc b/prev_backend_v1/prev_backend_v/__pycache__/config.cpython-310.pyc deleted file mode 100644 index 38f9237afe72e4b30ce819a9b3c3834d303b0d70..0000000000000000000000000000000000000000 Binary files a/prev_backend_v1/prev_backend_v/__pycache__/config.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v1/prev_backend_v/__pycache__/config.cpython-312.pyc b/prev_backend_v1/prev_backend_v/__pycache__/config.cpython-312.pyc deleted file mode 100644 index 4d27a6e3a9ef813b8ae1774cf1077c6ec0867263..0000000000000000000000000000000000000000 Binary files a/prev_backend_v1/prev_backend_v/__pycache__/config.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v1/prev_backend_v/__pycache__/database.cpython-310.pyc b/prev_backend_v1/prev_backend_v/__pycache__/database.cpython-310.pyc deleted file mode 100644 index 0ac106045c9ea5bff004fb9383dea8140b427016..0000000000000000000000000000000000000000 Binary files a/prev_backend_v1/prev_backend_v/__pycache__/database.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v1/prev_backend_v/__pycache__/database.cpython-312.pyc b/prev_backend_v1/prev_backend_v/__pycache__/database.cpython-312.pyc deleted file mode 100644 index 6f616c37d379eb6ae1ef6b3dc5ad1838a71fff3d..0000000000000000000000000000000000000000 Binary files a/prev_backend_v1/prev_backend_v/__pycache__/database.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v1/prev_backend_v/__pycache__/main.cpython-310.pyc b/prev_backend_v1/prev_backend_v/__pycache__/main.cpython-310.pyc deleted file mode 100644 index 60784ac2618fd43d4ee8b1daef6e6c210c798f36..0000000000000000000000000000000000000000 Binary files a/prev_backend_v1/prev_backend_v/__pycache__/main.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v1/prev_backend_v/__pycache__/main.cpython-312.pyc b/prev_backend_v1/prev_backend_v/__pycache__/main.cpython-312.pyc deleted file mode 100644 index 58de2b76a7de150076fd353368170111ca8dd695..0000000000000000000000000000000000000000 Binary files a/prev_backend_v1/prev_backend_v/__pycache__/main.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v1/prev_backend_v/config.py b/prev_backend_v1/prev_backend_v/config.py deleted file mode 100644 index 8ef795e69a8e8718ce573e1f8ba49146b595f2b3..0000000000000000000000000000000000000000 --- a/prev_backend_v1/prev_backend_v/config.py +++ /dev/null @@ -1,350 +0,0 @@ -language_metadata_extraction_prompt = """ -You are a language learning assistant. Your task is to analyze the user's input and infer their: -- Native language (use the language of the input as a fallback if unsure) -- Target language (the one they want to learn) -- Proficiency level (beginner, intermediate, or advanced) - -Respond ONLY with a valid JSON object using the following format: - -{ - "native_language": "", - "target_language": "", - "proficiency_level": "" -} - -Guidelines: -- Prioritize explicit statements about the native language (e.g., 'I’m a native Spanish speaker') over the language of the input. If no explicit statement is provided, assume the language of the input. If still unsure, default to 'english'. -- Infer the target language from explicit mentions (e.g., 'I want to learn French') or indirect clues (e.g., 'My Dutch isn’t great'). If multiple languages are mentioned, select the one most clearly associated with the learning intent. If ambiguous or no information is available, default to 'english'. -- Infer proficiency level based on clues: - - Beginner: 'isn’t great', 'just starting', 'learning the basics', 'new to', 'struggling with' - - Intermediate: 'want to improve', 'can hold basic conversations', 'okay at', 'decent at', 'some knowledge' - - Advanced: 'fluent', 'can read complex texts', 'almost native', 'very comfortable', 'proficient' - - If no clues are present, default to 'beginner'. -- Use full language names in lowercase English (e.g., 'english', 'spanish', 'french'). -- The default to 'english' for native_language and target_language assumes an English-majority context; adjust defaults for other regions if needed. The 'beginner' default for proficiency_level is a conservative assumption for users seeking assistance. - -Examples: -- Input: 'Hi, my Dutch isn’t great.' → {"native_language": "english", "target_language": "dutch", "proficiency_level": "beginner"} -- Input: 'Soy español y quiero aprender inglés.' → {"native_language": "spanish", "target_language": "english", "proficiency_level": "beginner"} -- Input: 'I’m a native French speaker learning German and can hold basic conversations.' → {"native_language": "french", "target_language": "german", "proficiency_level": "intermediate"} -- Input: 'Help me with language learning.' → {"native_language": "english", "target_language": "english", "proficiency_level": "beginner"} -- Input: 'I can read books in Italian but want to get better.' → {"native_language": "english", "target_language": "italian", "proficiency_level": "intermediate"} -- Input: 'I’m fluent in Portuguese.' → {"native_language": "english", "target_language": "portuguese", "proficiency_level": "advanced"} - -Do not include any explanations, comments, or formatting — only valid JSON. -""" - -curriculum_instructions = """ -# Metadata: -# Native language: {native_language} -# Target language: {target_language} -# Proficiency level: {proficiency} - -You are an AI-powered language learning assistant tasked with generating a tailored curriculum based on the user’s metadata. Design a lesson plan with relevant topics, sub-topics, and learning goals to ensure gradual progression in the target language. All outputs must be in the user's native language, using clear and simple phrasing. - -### Instructions: -1. **Select the Lesson Topic (Main Focus):** - - Choose a broad topic based on the user’s target language, proficiency, and inferred interests (e.g., business, travel, daily conversations). If interests are unknown, default to "Daily Conversations." - - Adjust complexity to proficiency: - - Beginner: Basic vocabulary and phrases. - - Intermediate: Conversational skills and grammar. - - Advanced: Specialized vocabulary and nuances. - -2. **Break Down the Topic into Sub-topics (3-7 recommended):** - - Divide the topic into sub-topics that build progressively, from foundational to advanced skills. Include cultural context where relevant (e.g., etiquette in the target language). - - Example for "Business Vocabulary": - - Sub-topic 1: Greeting colleagues (basic). - - Sub-topic 2: Introducing yourself (intermediate). - - Sub-topic 3: Discussing projects (advanced). - -3. **Define Measurable Learning Goals for Each Sub-topic:** - - Specify clear, measurable outcomes using action verbs (e.g., "Use," "Explain"). Align goals with proficiency and practical use. - - Example: "Use three professional phrases to introduce yourself." - -### Output Format: -Return a JSON object with: -- `"lesson_topic"`: Main focus in the user's native language. -- `"sub_topics"`: List of sub-topics, each with: - - `"sub_topic"`: Title in the user's native language. - - `"learning_goals"`: List of measurable goals in the user's native language. - -**Example Output:** -```json -{ - "lesson_topic": "Business Vocabulary", - "sub_topics": [ - { - "sub_topic": "Greeting colleagues", - "learning_goals": [ - "Use two common greetings in a workplace", - "Respond politely to a greeting" - ] - }, - { - "sub_topic": "Introducing yourself professionally", - "learning_goals": [ - "Introduce yourself with three professional phrases", - "State your job role clearly" - ] - } - ] -} -""" -flashcard_mode_instructions = """ -# Metadata: -# Native language: {native_language} -# Target language: {target_language} -# Proficiency level: {proficiency} - -You are a highly adaptive vocabulary tutor capable of teaching any language. Your primary goal is to help users learn rapidly by creating highly relevant, personalized flashcards tied to their specific context (e.g., hobbies, work, studies). - -### Context Format -You will receive a series of messages in the following structure: -[ - {"role": "user", "content": ""}, - {"role": "assistant", "content": ""}, - ... -] -Treat this list as prior conversation history. Use it to: -- Track the user's learning progression and incrementally increase difficulty over time. -- Identify recurring interests or themes (e.g., photography terms) to focus vocabulary. -- Avoid repeating words or concepts from prior flashcards unless requested. -- Incorporate user feedback or corrections to refine future sets. - -### Generation Guidelines -When generating a new set of flashcards: -1. **Use the provided metadata**: - - **Native language**: The language the user is typing in (for definitions). - - **Target language**: The language the user is trying to learn (for words and example sentences). - - **Proficiency level**: Adjust difficulty of words based on the user’s stated proficiency. - -2. **Avoid repetition**: - - If a word has already been introduced in a previous flashcard, do not repeat it unless explicitly requested. - - Reference previous assistant responses to build upon prior lessons, ensuring logical vocabulary progression. - -3. **Adjust content based on proficiency**: - - **Beginner**: Use high-frequency words and simple sentence structures (e.g., basic greetings, everyday objects). - - Example: "Hallo" - "Hello" (German-English). - - **Intermediate**: Introduce more complex vocabulary and compound sentences (e.g., common phrases, descriptive language). - - Example: "Ich fotografiere gerne" - "I like to take photos" (German-English). - - **Advanced**: Incorporate nuanced or technical terms and complex grammar (e.g., idiomatic expressions, field-specific jargon). - - Example: "Langzeitbelichtung" - "long exposure" (German-English). - -4. **Domain relevance**: - - Ensure words and examples are specific to the user’s context (e.g., profession, hobbies). - - If the context is unclear or broad (e.g., "hobbies"), ask a follow-up question (e.g., "What specific hobby are you interested in?") to tailor the flashcards effectively. - -5. **Handle edge cases**: - - For users with multiple domains (e.g., photography and cooking), prioritize the most recent or frequently mentioned context. - - If the user’s proficiency evolves (e.g., beginner to intermediate), adjust difficulty in subsequent flashcard sets. - -### Flashcard Format -Generate exactly **5 flashcards** as a **valid JSON array**, with each flashcard containing: -- `"word"`: A critical or frequently used word/phrase in the **target language**, tied to the user's domain. -- `"definition"`: A concise, learner-friendly definition in the **native language**. -- `"example"`: A practical, natural sentence in the **target language** that demonstrates the word in a context directly relevant to the user’s domain (e.g., for a photographer, "Ich habe den Filter gewechselt, um den Himmel zu betonen."). - -### Example Query and Expected Output - -#### Example Query: -User: "Flashcards for my hobby: landscape photography in German (intermediate level, native: English)" - -#### Example Output: -```json -[ - {"word": "Belichtung", "definition": "exposure (photography)", "example": "Die richtige Belichtung ist entscheidend für ein gutes Landschaftsfoto."}, - {"word": "Stativ", "definition": "tripod", "example": "Bei Langzeitbelichtungen brauchst du ein stabiles Stativ."}, - {"word": "Weitwinkelobjektiv", "definition": "wide-angle lens", "example": "Für weite Landschaften benutze ich oft ein Weitwinkelobjektiv."}, - {"word": "Goldene Stunde", "definition": "golden hour", "example": "Das Licht während der Goldenen Stunde ist perfekt für dramatische Aufnahmen."}, - {"word": "Filter", "definition": "filter (lens filter)", "example": "Ein Polarisationsfilter kann Reflexionen reduzieren und den Himmel betonen."} -] -""" - -exercise_mode_instructions = """ -# Metadata: -# Native language: {native_language} -# Target language: {target_language} -# Proficiency level: {proficiency} - -You are a smart, context-aware language exercise generator. Your task is to create personalized cloze-style exercises that help users rapidly reinforce vocabulary and grammar through realistic, domain-specific practice. You support any language. - -### Introduction -Cloze-style exercises are fill-in-the-blank activities where learners select the correct word or phrase to complete a sentence, reinforcing vocabulary and grammar in context. - -### Context Format -You will receive a list of previous messages: -[ - {"role": "user", "content": ""}, - {"role": "assistant", "content": ""} -] -Treat this list as prior conversation history. Use it to: -- Track previously introduced vocabulary and grammar to introduce new concepts. -- Identify recurring interests (e.g., marketing) to refine domain focus. -- Avoid repeating sentences, words, or structures unless intentional for reinforcement. -- Adjust difficulty based on past exercises to ensure progression (e.g., from simple nouns to compound phrases). - -### Generation Task -When generating a new set of exercises: -1. **Use the provided metadata**: - - **Native language**: The user’s base language for definitions and understanding. - - **Target language**: The language the user is learning for both exercises and answers. - - **Proficiency level**: Adjust the complexity of the exercises based on the user's proficiency. - -2. **Domain relevance**: - - Focus on the user’s specified domain (e.g., work, hobby, study area). - - If the domain is vague (e.g., "work"), seek clarification (e.g., "What aspect of your work?") to ensure relevance. - - Use realistic scenarios tied to the domain for practical application. - -3. **Avoid repetition**: - - Ensure previously used vocabulary or sentence structures are not repeated unless requested. - - Each new exercise should introduce new vocabulary or grammar concepts based on the user’s progression. - -4. **Adjust difficulty**: - - **Beginner**: Use short, simple sentences with high-frequency vocabulary and basic grammar (e.g., "Je suis ___." - "I am ___"). - - **Intermediate**: Include compound sentences with moderate vocabulary and grammar (e.g., "Nous devons lancer la ___ bientôt." - "We need to launch the ___ soon"). - - **Advanced**: Feature complex structures and specialized terms tied to the domain (e.g., "L’analyse des ___ est cruciale." - "The analysis of ___ is crucial"). - -5. **Handle edge cases**: - - For users with multiple domains (e.g., "marketing and travel"), integrate both contexts or prioritize the most recent. - - If proficiency evolves (e.g., beginner to intermediate), adapt subsequent exercises accordingly. - -### Output Format -Produce exactly **5 cloze-style exercises** as a **valid JSON array**, with each item containing: -- `"sentence"`: A sentence in the **target language** with a blank `'___'` for a missing vocabulary word or grammar element, relevant to the user’s domain. -- `"answer"`: The correct word or phrase to fill in the blank. -- `"choices"`: A list of 3 plausible options (including the correct answer) in the target language. Distractors should: - - Be grammatically correct but unfit for the sentence’s context. - - Relate to the domain but not the specific scenario (e.g., for "campagne," use "produit" but not "réunion"). - - Encourage critical thinking about meaning and usage. - -### Example Query and Expected Output - -#### Example Query: -User: "Beginner French exercises about my work in marketing (native: English)" - -#### Example Output: -```json -[ - {"sentence": "Nous devons lancer la nouvelle ___ le mois prochain.", "answer": "campagne", "choices": ["campagne", "produit", "réunion"]}, - {"sentence": "Quel est le ___ principal de ce projet ?", "answer": "objectif", "choices": ["client", "objectif", "budget"]}, - {"sentence": "Il faut analyser le ___ avant de prendre une décision.", "answer": "marché", "choices": ["marché", "bureau", "téléphone"]}, - {"sentence": "Elle prépare une ___ pour les clients.", "answer": "présentation", "choices": ["facture", "présentation", "publicité"]}, - {"sentence": "Nous utilisons les ___ sociaux pour la promotion.", "answer": "réseaux", "choices": ["médias", "réseaux", "journaux"]} -] -""" - -simulation_mode_instructions = """ -# Metadata: -# Native language: {native_language} -# Target language: {target_language} -# Proficiency level: {proficiency} - -You are a **creative, context-aware storytelling engine**. Your job is to generate short, engaging stories or dialogues in **any language** that make language learning fun and highly relevant. The stories should be entertaining (funny, dramatic, exciting), and deeply personalized by incorporating the **user’s specific hobby, profession, or field of study** into the characters, plot, and dialogue. - -### Context Format -You will receive a list of prior messages: -[ - {"role": "user", "content": ""}, - {"role": "assistant", "content": ""} -] -Treat this list as prior conversation history. Use it to: -- Avoid repeating ideas, themes, or jokes from previous responses. -- Build on past tone, vocabulary, or characters if appropriate. -- Adjust story complexity based on past user proficiency or feedback cues. - -### Story Generation Task -From the latest user message: -1. **Use the provided metadata**: - - **Native language**: The user’s base language for understanding. - - **Target language**: The language the user is learning. - - **Proficiency level**: Adjust the complexity of the story or dialogue based on the user’s proficiency level. - -2. **Domain relevance**: - - Focus on the **user's domain of interest** (e.g., work, hobby, field of study). - - Use **realistic terminology or scenarios** related to their interests to make the story engaging and practical. - -3. **Adjust story complexity**: - - For **beginner** learners, keep sentences simple and direct with basic vocabulary and grammar. - - For **intermediate** learners, use natural dialogue, simple narrative structures, and introduce moderately challenging vocabulary. - - For **advanced** learners, incorporate idiomatic expressions, complex sentence structures, and domain-specific language. - -4. **Avoid repetition**: - - Ensure that new stories or dialogues bring fresh content and characters. Avoid reusing the same themes, jokes, or scenarios unless it builds naturally on past interactions. - -5. **Engage with the user’s tone and interests**: - - If the user is passionate about a specific topic (e.g., cooking, space exploration, or law), integrate that into the story. If the user likes humor, use a fun tone; for drama or excitement, make the story engaging with conflict or high stakes. - -### Output Format -Return a valid **JSON object** with the following structure: -- `"title"`: An engaging title in the **native language**. -- `"setting"`: A short setup in the **native language** explaining the story’s background, tailored to the user’s interest. -- `"content"`: A list of **6–10 segments**, each containing: - - `"speaker"`: Name or role of the speaker in the **native language** (e.g., "Narrator", "Professor Lee", "The Engineer"). - - `"target_language_text"`: Sentence in the **target language**. - - `"phonetics"`: Standardized phonetic transcription (IPA, Pinyin, etc.) if applicable and helpful. Omit if unavailable or not useful. - - `"base_language_translation"`: Simple translation of the sentence in the **native language**. - -### Personalization Rules -- Base the humor, conflict, and events directly on the user’s interest. For example: - - If the user loves space, create an exciting stargazing story. - - If they study law, create a courtroom dialogue with legal terms. - - If they’re into cooking, make the story about a cooking adventure. -- Include real terminology or realistic situations from the domain to make learning useful and immersive. -- Adjust the tone and vocabulary complexity based on user proficiency level (beginner = simple, intermediate = natural, advanced = idiomatic). -- Keep the pacing tight — avoid overly long narrations or explanations. - -### Output Instructions -Return only the final **JSON object**. Do not include: -- Explanations -- Notes -- Comments -- Markdown formatting - -### Example User Input -"Funny story for intermediate French learner about cooking hobby (base: English)" - -### Example Output (French) -```json -{ - "title": "La Panique de la Paella", - "setting": "Pierre essaie d'impressionner ses amis en cuisinant une paella espagnole authentique pour la première fois.", - "content": [ - { - "speaker": "Narrateur", - "target_language_text": "Pierre regarda la recette de paella. Cela semblait facile.", - "phonetics": "pjeʁ ʁəɡaʁda la ʁesɛt də paɛʎa. sə.la sɛ̃blɛ ɛ.fa.sil", - "base_language_translation": "Pierre looked at the paella recipe. It seemed easy." - }, - { - "speaker": "Pierre", - "target_language_text": "Il me faut du safran! Où est le safran?", - "phonetics": "il mə fo dy sa.fʁɑ̃! u ɛ lə sa.fʁɑ̃", - "base_language_translation": "I need saffron! Where is the saffron?" - }, - { - "speaker": "Narrateur", - "target_language_text": "Pierre fouilla le placard, mais il ne trouva pas de safran.", - "phonetics": "pjeʁ fwi.jɑ lə pla.kɑʁ, mɛ il nə tʁu.va pa də sa.fʁɑ̃", - "base_language_translation": "Pierre searched the cupboard, but he couldn’t find any saffron." - }, - { - "speaker": "Pierre", - "target_language_text": "Qu'est-ce que je vais faire maintenant ?", - "phonetics": "kɛs.kə ʒə vɛ fɛʁ mɛ̃tə.nɑ̃?", - "base_language_translation": "What am I going to do now?" - }, - { - "speaker": "Narrateur", - "target_language_text": "Finalement, Pierre décida de remplacer le safran par du curcuma.", - "phonetics": "fi.nal.mɑ̃ pjeʁ de.si.da də ʁɑ̃.pla.sə lə sa.fʁɑ̃ paʁ dy kyʁ.ky.ma", - "base_language_translation": "Finally, Pierre decided to replace the saffron with turmeric." - }, - { - "speaker": "Pierre", - "target_language_text": "C'est presque pareil, non ?", - "phonetics": "sɛ pʁɛs.kə paʁɛj, nɔ̃?", - "base_language_translation": "It's almost the same, right?" - } - ] -} -""" diff --git a/prev_backend_v1/prev_backend_v/database.py b/prev_backend_v1/prev_backend_v/database.py deleted file mode 100644 index ad5a921b245a594bb25f43fb757c461be249cf4e..0000000000000000000000000000000000000000 --- a/prev_backend_v1/prev_backend_v/database.py +++ /dev/null @@ -1,293 +0,0 @@ -import psycopg2 -import os -from psycopg2 import sql -from dotenv import load_dotenv - -load_dotenv() - -# Database Configuration from environment variables -DB_NAME = os.getenv("POSTGRES_DB", "linguaai") -DB_USER = os.getenv("POSTGRES_USER", "linguaai_user") -DB_PASSWORD = os.getenv("POSTGRES_PASSWORD", "LinguaAI1008") -DB_HOST = os.getenv("DB_HOST", "localhost") -DB_PORT = os.getenv("DB_PORT", "5432") - -# SQL Schema Definition -SCHEMA_SQL = """ --- Drop existing objects if they exist --- Note: Some drops below might be for tables not defined in this specific script. -DROP TABLE IF EXISTS user_activity_progress CASCADE; -DROP TABLE IF EXISTS activities CASCADE; -DROP TABLE IF EXISTS weekly_modules CASCADE; -DROP TABLE IF EXISTS curriculums CASCADE; -DROP TABLE IF EXISTS generated_flashcards CASCADE; -DROP TABLE IF EXISTS flashcard_sets CASCADE; -- Corrected name -DROP TABLE IF EXISTS generated_exercises CASCADE; -DROP TABLE IF EXISTS exercise_sets CASCADE; -- Corrected name -DROP TABLE IF EXISTS simulations CASCADE; -- Corrected name -DROP TABLE IF EXISTS users CASCADE; -DROP TYPE IF EXISTS activity_status CASCADE; - --- Table `users` -CREATE TABLE users ( - user_id SERIAL PRIMARY KEY, - username VARCHAR(50) UNIQUE NOT NULL, - email VARCHAR(100) UNIQUE NOT NULL, - password_hash VARCHAR(255) NOT NULL, - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -); - --- Trigger function (remains the same) -CREATE OR REPLACE FUNCTION update_updated_at_column() -RETURNS TRIGGER AS $$ -BEGIN - NEW.updated_at = now(); - RETURN NEW; -END; -$$ language 'plpgsql'; - --- Trigger for users (remains the same) -CREATE TRIGGER users_update_updated_at - BEFORE UPDATE ON users - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); - - --- ============================================ --- Tables for Generated Content (Flashcards) --- ============================================ - --- Table `flashcard_sets` (Represents one request/query) -CREATE TABLE flashcard_sets ( - id SERIAL PRIMARY KEY, - user_id INTEGER NOT NULL REFERENCES users(user_id), -- Added FK reference for completeness - query TEXT NOT NULL, - flashcards JSONB NOT NULL, -- Stores an array of 5 flashcards - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- Added updated_at for consistency -); - -CREATE INDEX idx_flashcard_set_user ON flashcard_sets(user_id); - --- Corrected Trigger definition for flashcard_sets -CREATE TRIGGER flashcard_sets_update_updated_at -- Renamed trigger - BEFORE UPDATE ON flashcard_sets -- Corrected table name - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); -- Assumes you want updated_at here too - --- Table `generated_flashcards` (Individual flashcards within a set) -CREATE TABLE generated_flashcards ( - flashcard_id SERIAL PRIMARY KEY, - set_id INT NOT NULL REFERENCES flashcard_sets(id) ON DELETE CASCADE, -- Corrected FK reference (table and column) - word TEXT NOT NULL, - definition TEXT NOT NULL, - example TEXT, -- Example might be optional - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX idx_flashcard_set ON generated_flashcards(set_id); - --- Trigger for generated_flashcards (remains the same) -CREATE TRIGGER generated_flashcards_update_updated_at - BEFORE UPDATE ON generated_flashcards - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); - - --- ============================================ --- Tables for Generated Content (Exercises) --- ============================================ - --- Table `exercise_sets` (Represents one request/query) -- Corrected comment -CREATE TABLE exercise_sets ( - id SERIAL PRIMARY KEY, - user_id INTEGER NOT NULL REFERENCES users(user_id), -- Added FK reference for completeness - query TEXT NOT NULL, - exercises JSONB NOT NULL, -- Array of 5 exercises - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- Added updated_at for consistency -); - -CREATE INDEX idx_exercise_set_user ON exercise_sets(user_id); -- Corrected table name (was already correct but double-checked) - --- Corrected Trigger definition for exercise_sets -CREATE TRIGGER exercise_sets_update_updated_at -- Renamed trigger - BEFORE UPDATE ON exercise_sets -- Corrected table name - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); -- Assumes you want updated_at here too - --- Table `generated_exercises` (Individual exercises within a set) -CREATE TABLE generated_exercises ( - exercise_id SERIAL PRIMARY KEY, - set_id INT NOT NULL REFERENCES exercise_sets(id) ON DELETE CASCADE, -- Corrected FK reference (table and column) - sentence TEXT NOT NULL, - answer TEXT NOT NULL, - choices JSONB NOT NULL, -- Storing the array of choices - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX idx_exercise_set ON generated_exercises(set_id); - --- Trigger for generated_exercises (remains the same) -CREATE TRIGGER generated_exercises_update_updated_at - BEFORE UPDATE ON generated_exercises - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); - - --- ============================================ --- Table for Generated Content (Simulations) --- ============================================ - --- Table `simulations` (Represents one simulation request/result) -- Corrected comment -CREATE TABLE simulations ( - id SERIAL PRIMARY KEY, - user_id INTEGER NOT NULL REFERENCES users(user_id), -- Added FK reference for completeness - query TEXT NOT NULL, - scenario TEXT NOT NULL, - dialog JSONB NOT NULL, -- Array of turns with 'role', 'chinese', 'pinyin', 'english' - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- Added updated_at for consistency -); - -CREATE INDEX idx_simulation_user ON simulations(user_id); -- Corrected table name - --- Corrected Trigger definition for simulations -CREATE TRIGGER simulations_update_updated_at -- Renamed trigger - BEFORE UPDATE ON simulations -- Corrected table name - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); -- Assumes you want updated_at here too -""" - -def get_db_connection(): - """Get a synchronous database connection.""" - try: - conn = psycopg2.connect( - dbname=DB_NAME, - user=DB_USER, - password=DB_PASSWORD, - host=DB_HOST, - port=DB_PORT - ) - return conn - except psycopg2.Error as e: - print(f"Database connection error: {e}") - raise - -def reset_sequences(): - """Generate SQL to reset all sequences (auto-incrementing IDs) to 1.""" - sequences_sql = """ - SELECT 'ALTER SEQUENCE ' || sequence_name || ' RESTART WITH 1;' - FROM information_schema.sequences - WHERE sequence_schema = 'public'; - """ - return sequences_sql - -def reset_database(confirm=True): - """Reset the database by dropping all tables and recreating them.""" - if confirm: - user_confirm = input("WARNING: This will DELETE ALL DATA. Type 'yes' to proceed: ") - if user_confirm.lower() != 'yes': - print("Database reset cancelled.") - return - - conn = None - try: - conn = get_db_connection() - conn.autocommit = False - print("Database connection established.") - - with conn.cursor() as cur: - print("Dropping and recreating schema...") - # Execute the main schema SQL (includes drops) - cur.execute(SCHEMA_SQL) - print("Schema recreated successfully.") - - # Generate and execute sequence reset SQL - print("Resetting sequences...") - reset_sql_query = reset_sequences() - cur.execute(reset_sql_query) - reset_commands = cur.fetchall() - for command in reset_commands: - cur.execute(command[0]) - print("Sequences reset successfully.") - - conn.commit() - print("Database reset complete.") - - except psycopg2.Error as e: - print(f"Database error during reset: {e}") - if conn: - conn.rollback() - print("Transaction rolled back.") - except Exception as e: - print(f"An unexpected error occurred during reset: {e}") - if conn: - conn.rollback() - finally: - if conn: - conn.close() - print("Database connection closed.") - -def setup_database(confirm=True): - """Set up the database schema if tables do not exist.""" - if confirm: - user_confirm = input("Do you want to set up the database? Type 'yes' to proceed: ") - if user_confirm.lower() != 'yes': - print("Database setup cancelled.") - return - - conn = None - try: - conn = get_db_connection() - conn.autocommit = False - print("Database connection established.") - - with conn.cursor() as cur: - print("Checking if tables exist...") - cur.execute(""" - SELECT EXISTS ( - SELECT FROM information_schema.tables - WHERE table_schema = 'public' - AND table_name = 'users' - ); - """) - tables_exist = cur.fetchone()[0] - - if tables_exist: - print("Tables already exist. Use reset_database() to reset the database or run setup with confirm=False.") - conn.rollback() # Rollback as no changes should be made - return - - print("Creating schema...") - cur.execute(SCHEMA_SQL) - print("Schema created successfully.") - - conn.commit() - print("Database setup complete.") - - except psycopg2.Error as e: - print(f"Database error during setup: {e}") - if conn: - conn.rollback() - print("Transaction rolled back.") - except Exception as e: - print(f"An unexpected error occurred during setup: {e}") - if conn: - conn.rollback() - finally: - if conn: - conn.close() - print("Database connection closed.") - -if __name__ == "__main__": - action = input("Enter 'setup' to setup database or 'reset' to reset database: ").lower() - if action == 'reset': - reset_database() - elif action == 'setup': - setup_database() - else: - print("Invalid action. Use 'setup' or 'reset'.") \ No newline at end of file diff --git a/prev_backend_v1/prev_backend_v/main.py b/prev_backend_v1/prev_backend_v/main.py deleted file mode 100644 index dfcf2ee6b3de9618b0e8c76474651fcab9bbb211..0000000000000000000000000000000000000000 --- a/prev_backend_v1/prev_backend_v/main.py +++ /dev/null @@ -1,177 +0,0 @@ -from fastapi import FastAPI, HTTPException -from fastapi.responses import JSONResponse -from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel -from backend.utils import generate_completions -from backend import config -from backend.database import get_db_connection -import psycopg2 -from psycopg2.extras import RealDictCursor -from typing import Union, List, Literal, Optional -import logging -import json - -logging.basicConfig(level=logging.INFO) - -app = FastAPI() - -# Add CORS middleware -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # Allows all origins - allow_credentials=True, - allow_methods=["*"], # Allows all methods - allow_headers=["*"], # Allows all headers -) - -# Dependency to get database connection -async def get_db(): - conn = await get_db_connection() - try: - yield conn - finally: - conn.close() - -class Message(BaseModel): - role: Literal["user", "assistant"] - content: str - -class GenerationRequest(BaseModel): - user_id: int - query: Union[str, List[Message]] - -class MetadataRequest(BaseModel): - query: str - -# Global metadata variables -native_language: Optional[str] = None -target_language: Optional[str] = None -proficiency: Optional[str] = None - -@app.get("/") -async def root(): - return {"message": "Welcome to the AI Learning Assistant API!"} - -@app.post("/extract/metadata") -async def extract_metadata(data: MetadataRequest): - logging.info(f"Query: {data.query}") - try: - response_str = await generate_completions.get_completions( - data.query, - config.language_metadata_extraction_prompt - ) - metadata_dict = json.loads(response_str) - # Update globals for other endpoints - globals()['native_language'] = metadata_dict.get('native_language', 'unknown') - globals()['target_language'] = metadata_dict.get('target_language', 'unknown') - globals()['proficiency'] = metadata_dict.get('proficiency_level', 'unknown') - return JSONResponse( - content={ - "data": metadata_dict, - "type": "language_metadata", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/curriculum") -async def generate_curriculum(data: GenerationRequest): - try: - # Use previously extracted metadata - instructions = ( - config.curriculum_instructions - .replace("{native_language}", native_language or "unknown") - .replace("{target_language}", target_language or "unknown") - .replace("{proficiency}", proficiency or "unknown") - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "curriculum", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/flashcards") -async def generate_flashcards(data: GenerationRequest): - try: - # Use previously extracted metadata - instructions = ( - config.flashcard_mode_instructions - .replace("{native_language}", native_language or "unknown") - .replace("{target_language}", target_language or "unknown") - .replace("{proficiency}", proficiency or "unknown") - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "flashcards", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/exercises") -async def generate_exercises(data: GenerationRequest): - try: - # Use previously extracted metadata - instructions = ( - config.exercise_mode_instructions - .replace("{native_language}", native_language or "unknown") - .replace("{target_language}", target_language or "unknown") - .replace("{proficiency}", proficiency or "unknown") - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "exercises", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/simulation") -async def generate_simulation(data: GenerationRequest): - try: - # Use previously extracted metadata - instructions = ( - config.simulation_mode_instructions - .replace("{native_language}", native_language or "unknown") - .replace("{target_language}", target_language or "unknown") - .replace("{proficiency}", proficiency or "unknown") - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "simulation", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/prev_backend_v1/prev_backend_v/utils/__pycache__/generate_completions.cpython-310.pyc b/prev_backend_v1/prev_backend_v/utils/__pycache__/generate_completions.cpython-310.pyc deleted file mode 100644 index b515240d65695a530ce3ca667042d874bd615015..0000000000000000000000000000000000000000 Binary files a/prev_backend_v1/prev_backend_v/utils/__pycache__/generate_completions.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v1/prev_backend_v/utils/__pycache__/generate_completions.cpython-312.pyc b/prev_backend_v1/prev_backend_v/utils/__pycache__/generate_completions.cpython-312.pyc deleted file mode 100644 index 385409736b07934d04493bdefc3f95968dc13143..0000000000000000000000000000000000000000 Binary files a/prev_backend_v1/prev_backend_v/utils/__pycache__/generate_completions.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v1/prev_backend_v/utils/generate_completions.py b/prev_backend_v1/prev_backend_v/utils/generate_completions.py deleted file mode 100644 index aa7c669c860fee3070be08323db52c97c799c18d..0000000000000000000000000000000000000000 --- a/prev_backend_v1/prev_backend_v/utils/generate_completions.py +++ /dev/null @@ -1,107 +0,0 @@ -from openai import AsyncOpenAI, OpenAI -import asyncio -import json -from typing import AsyncIterator -from typing import Union, List, Dict, Literal -from dotenv import load_dotenv -import os -from pydantic import BaseModel -load_dotenv() - -# Initialize the async client -client = AsyncOpenAI( - base_url=os.getenv("BASE_URL"), - api_key=os.getenv("API_KEY"), -) - -class Message(BaseModel): - role: Literal["user", "assistant"] - content: str - -# Helper function to flatten chat messages into a single string prompt -def flatten_messages(messages: List[Message]) -> str: - return "\n".join([f"{m.role}: {m.content}" for m in messages]) - -def process_input(data: Union[str, List[Dict[str, str]]]) -> Union[str, List[Dict[str, str]]]: - """ - Processes input to either uppercase a string or modify the 'content' field - of a list of dictionaries. - """ - if isinstance(data, str): - return data.strip() # Ensures prompt is cleaned up (optional) - - elif isinstance(data, list): - # Ensure each item in the list is a dictionary with a 'content' key - return [ - {**item, "content": item["content"].strip()} # Trims whitespace in 'content' - for item in data if isinstance(item, dict) and "content" in item - ] - - else: - raise TypeError("Input must be a string or a list of dictionaries with a 'content' field") - - -# async def get_completions( -# prompt: Union[str, List[Dict[str, str]]], -# instructions: str -# ) -> str: -# processed_prompt = process_input(prompt) # Ensures the input format is correct - -# if isinstance(processed_prompt, str): -# messages = [ -# {"role": "system", "content": instructions}, -# {"role": "user", "content": processed_prompt} -# ] -# elif isinstance(processed_prompt, list): -# messages = [{"role": "system", "content": instructions}] + processed_prompt -# else: -# raise TypeError("Unexpected processed input type.") - -# response = await client.chat.completions.create( -# model=os.getenv("MODEL"), -# messages=messages, -# response_format={"type": "json_object"} -# ) - -# output: str = response.choices[0].message.content -# return output - -async def get_completions( - prompt: Union[str, List[Dict[str, str]]], - instructions: str -) -> str: - if isinstance(prompt, list): - formatted_query = flatten_messages(prompt) - else: - formatted_query = prompt - - processed_prompt = process_input(formatted_query) - - messages = [{"role": "system", "content": instructions}] - - if isinstance(processed_prompt, str): - messages.append({"role": "user", "content": processed_prompt}) - - elif isinstance(processed_prompt, list): - # Only keep the history for context and append the latest user query at the end - history = processed_prompt[:-1] - last_user_msg = processed_prompt[-1] - - # Optional: Validate that the last message is from the user - if last_user_msg.get("role") != "user": - raise ValueError("Last message must be from the user.") - - messages += history - messages.append(last_user_msg) - - else: - raise TypeError("Unexpected processed input type.") - - # print(os.getenv("MODEL")) - response = await client.chat.completions.create( - model=os.getenv("MODEL"), - messages=messages, - response_format={"type": "json_object"} - ) - - return response.choices[0].message.content # adjust based on your client diff --git a/prev_backend_v2/backend/__pycache__/config.cpython-310.pyc b/prev_backend_v2/backend/__pycache__/config.cpython-310.pyc deleted file mode 100644 index 38f9237afe72e4b30ce819a9b3c3834d303b0d70..0000000000000000000000000000000000000000 Binary files a/prev_backend_v2/backend/__pycache__/config.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v2/backend/__pycache__/config.cpython-312.pyc b/prev_backend_v2/backend/__pycache__/config.cpython-312.pyc deleted file mode 100644 index ef41d93c8fed4652e3cad673a34e33cb9d9fc02a..0000000000000000000000000000000000000000 Binary files a/prev_backend_v2/backend/__pycache__/config.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v2/backend/__pycache__/database.cpython-310.pyc b/prev_backend_v2/backend/__pycache__/database.cpython-310.pyc deleted file mode 100644 index 0ac106045c9ea5bff004fb9383dea8140b427016..0000000000000000000000000000000000000000 Binary files a/prev_backend_v2/backend/__pycache__/database.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v2/backend/__pycache__/database.cpython-312.pyc b/prev_backend_v2/backend/__pycache__/database.cpython-312.pyc deleted file mode 100644 index 7705ed5fd8c946f48cfc000ee64827dd0f50a264..0000000000000000000000000000000000000000 Binary files a/prev_backend_v2/backend/__pycache__/database.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v2/backend/__pycache__/main.cpython-310.pyc b/prev_backend_v2/backend/__pycache__/main.cpython-310.pyc deleted file mode 100644 index 60784ac2618fd43d4ee8b1daef6e6c210c798f36..0000000000000000000000000000000000000000 Binary files a/prev_backend_v2/backend/__pycache__/main.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v2/backend/__pycache__/main.cpython-312.pyc b/prev_backend_v2/backend/__pycache__/main.cpython-312.pyc deleted file mode 100644 index ed181365dda81881b789051f39b473612a261e5c..0000000000000000000000000000000000000000 Binary files a/prev_backend_v2/backend/__pycache__/main.cpython-312.pyc and /dev/null differ diff --git a/prev_backend_v2/backend/database.py b/prev_backend_v2/backend/database.py deleted file mode 100644 index ad5a921b245a594bb25f43fb757c461be249cf4e..0000000000000000000000000000000000000000 --- a/prev_backend_v2/backend/database.py +++ /dev/null @@ -1,293 +0,0 @@ -import psycopg2 -import os -from psycopg2 import sql -from dotenv import load_dotenv - -load_dotenv() - -# Database Configuration from environment variables -DB_NAME = os.getenv("POSTGRES_DB", "linguaai") -DB_USER = os.getenv("POSTGRES_USER", "linguaai_user") -DB_PASSWORD = os.getenv("POSTGRES_PASSWORD", "LinguaAI1008") -DB_HOST = os.getenv("DB_HOST", "localhost") -DB_PORT = os.getenv("DB_PORT", "5432") - -# SQL Schema Definition -SCHEMA_SQL = """ --- Drop existing objects if they exist --- Note: Some drops below might be for tables not defined in this specific script. -DROP TABLE IF EXISTS user_activity_progress CASCADE; -DROP TABLE IF EXISTS activities CASCADE; -DROP TABLE IF EXISTS weekly_modules CASCADE; -DROP TABLE IF EXISTS curriculums CASCADE; -DROP TABLE IF EXISTS generated_flashcards CASCADE; -DROP TABLE IF EXISTS flashcard_sets CASCADE; -- Corrected name -DROP TABLE IF EXISTS generated_exercises CASCADE; -DROP TABLE IF EXISTS exercise_sets CASCADE; -- Corrected name -DROP TABLE IF EXISTS simulations CASCADE; -- Corrected name -DROP TABLE IF EXISTS users CASCADE; -DROP TYPE IF EXISTS activity_status CASCADE; - --- Table `users` -CREATE TABLE users ( - user_id SERIAL PRIMARY KEY, - username VARCHAR(50) UNIQUE NOT NULL, - email VARCHAR(100) UNIQUE NOT NULL, - password_hash VARCHAR(255) NOT NULL, - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -); - --- Trigger function (remains the same) -CREATE OR REPLACE FUNCTION update_updated_at_column() -RETURNS TRIGGER AS $$ -BEGIN - NEW.updated_at = now(); - RETURN NEW; -END; -$$ language 'plpgsql'; - --- Trigger for users (remains the same) -CREATE TRIGGER users_update_updated_at - BEFORE UPDATE ON users - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); - - --- ============================================ --- Tables for Generated Content (Flashcards) --- ============================================ - --- Table `flashcard_sets` (Represents one request/query) -CREATE TABLE flashcard_sets ( - id SERIAL PRIMARY KEY, - user_id INTEGER NOT NULL REFERENCES users(user_id), -- Added FK reference for completeness - query TEXT NOT NULL, - flashcards JSONB NOT NULL, -- Stores an array of 5 flashcards - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- Added updated_at for consistency -); - -CREATE INDEX idx_flashcard_set_user ON flashcard_sets(user_id); - --- Corrected Trigger definition for flashcard_sets -CREATE TRIGGER flashcard_sets_update_updated_at -- Renamed trigger - BEFORE UPDATE ON flashcard_sets -- Corrected table name - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); -- Assumes you want updated_at here too - --- Table `generated_flashcards` (Individual flashcards within a set) -CREATE TABLE generated_flashcards ( - flashcard_id SERIAL PRIMARY KEY, - set_id INT NOT NULL REFERENCES flashcard_sets(id) ON DELETE CASCADE, -- Corrected FK reference (table and column) - word TEXT NOT NULL, - definition TEXT NOT NULL, - example TEXT, -- Example might be optional - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX idx_flashcard_set ON generated_flashcards(set_id); - --- Trigger for generated_flashcards (remains the same) -CREATE TRIGGER generated_flashcards_update_updated_at - BEFORE UPDATE ON generated_flashcards - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); - - --- ============================================ --- Tables for Generated Content (Exercises) --- ============================================ - --- Table `exercise_sets` (Represents one request/query) -- Corrected comment -CREATE TABLE exercise_sets ( - id SERIAL PRIMARY KEY, - user_id INTEGER NOT NULL REFERENCES users(user_id), -- Added FK reference for completeness - query TEXT NOT NULL, - exercises JSONB NOT NULL, -- Array of 5 exercises - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- Added updated_at for consistency -); - -CREATE INDEX idx_exercise_set_user ON exercise_sets(user_id); -- Corrected table name (was already correct but double-checked) - --- Corrected Trigger definition for exercise_sets -CREATE TRIGGER exercise_sets_update_updated_at -- Renamed trigger - BEFORE UPDATE ON exercise_sets -- Corrected table name - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); -- Assumes you want updated_at here too - --- Table `generated_exercises` (Individual exercises within a set) -CREATE TABLE generated_exercises ( - exercise_id SERIAL PRIMARY KEY, - set_id INT NOT NULL REFERENCES exercise_sets(id) ON DELETE CASCADE, -- Corrected FK reference (table and column) - sentence TEXT NOT NULL, - answer TEXT NOT NULL, - choices JSONB NOT NULL, -- Storing the array of choices - created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX idx_exercise_set ON generated_exercises(set_id); - --- Trigger for generated_exercises (remains the same) -CREATE TRIGGER generated_exercises_update_updated_at - BEFORE UPDATE ON generated_exercises - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); - - --- ============================================ --- Table for Generated Content (Simulations) --- ============================================ - --- Table `simulations` (Represents one simulation request/result) -- Corrected comment -CREATE TABLE simulations ( - id SERIAL PRIMARY KEY, - user_id INTEGER NOT NULL REFERENCES users(user_id), -- Added FK reference for completeness - query TEXT NOT NULL, - scenario TEXT NOT NULL, - dialog JSONB NOT NULL, -- Array of turns with 'role', 'chinese', 'pinyin', 'english' - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- Added updated_at for consistency -); - -CREATE INDEX idx_simulation_user ON simulations(user_id); -- Corrected table name - --- Corrected Trigger definition for simulations -CREATE TRIGGER simulations_update_updated_at -- Renamed trigger - BEFORE UPDATE ON simulations -- Corrected table name - FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); -- Assumes you want updated_at here too -""" - -def get_db_connection(): - """Get a synchronous database connection.""" - try: - conn = psycopg2.connect( - dbname=DB_NAME, - user=DB_USER, - password=DB_PASSWORD, - host=DB_HOST, - port=DB_PORT - ) - return conn - except psycopg2.Error as e: - print(f"Database connection error: {e}") - raise - -def reset_sequences(): - """Generate SQL to reset all sequences (auto-incrementing IDs) to 1.""" - sequences_sql = """ - SELECT 'ALTER SEQUENCE ' || sequence_name || ' RESTART WITH 1;' - FROM information_schema.sequences - WHERE sequence_schema = 'public'; - """ - return sequences_sql - -def reset_database(confirm=True): - """Reset the database by dropping all tables and recreating them.""" - if confirm: - user_confirm = input("WARNING: This will DELETE ALL DATA. Type 'yes' to proceed: ") - if user_confirm.lower() != 'yes': - print("Database reset cancelled.") - return - - conn = None - try: - conn = get_db_connection() - conn.autocommit = False - print("Database connection established.") - - with conn.cursor() as cur: - print("Dropping and recreating schema...") - # Execute the main schema SQL (includes drops) - cur.execute(SCHEMA_SQL) - print("Schema recreated successfully.") - - # Generate and execute sequence reset SQL - print("Resetting sequences...") - reset_sql_query = reset_sequences() - cur.execute(reset_sql_query) - reset_commands = cur.fetchall() - for command in reset_commands: - cur.execute(command[0]) - print("Sequences reset successfully.") - - conn.commit() - print("Database reset complete.") - - except psycopg2.Error as e: - print(f"Database error during reset: {e}") - if conn: - conn.rollback() - print("Transaction rolled back.") - except Exception as e: - print(f"An unexpected error occurred during reset: {e}") - if conn: - conn.rollback() - finally: - if conn: - conn.close() - print("Database connection closed.") - -def setup_database(confirm=True): - """Set up the database schema if tables do not exist.""" - if confirm: - user_confirm = input("Do you want to set up the database? Type 'yes' to proceed: ") - if user_confirm.lower() != 'yes': - print("Database setup cancelled.") - return - - conn = None - try: - conn = get_db_connection() - conn.autocommit = False - print("Database connection established.") - - with conn.cursor() as cur: - print("Checking if tables exist...") - cur.execute(""" - SELECT EXISTS ( - SELECT FROM information_schema.tables - WHERE table_schema = 'public' - AND table_name = 'users' - ); - """) - tables_exist = cur.fetchone()[0] - - if tables_exist: - print("Tables already exist. Use reset_database() to reset the database or run setup with confirm=False.") - conn.rollback() # Rollback as no changes should be made - return - - print("Creating schema...") - cur.execute(SCHEMA_SQL) - print("Schema created successfully.") - - conn.commit() - print("Database setup complete.") - - except psycopg2.Error as e: - print(f"Database error during setup: {e}") - if conn: - conn.rollback() - print("Transaction rolled back.") - except Exception as e: - print(f"An unexpected error occurred during setup: {e}") - if conn: - conn.rollback() - finally: - if conn: - conn.close() - print("Database connection closed.") - -if __name__ == "__main__": - action = input("Enter 'setup' to setup database or 'reset' to reset database: ").lower() - if action == 'reset': - reset_database() - elif action == 'setup': - setup_database() - else: - print("Invalid action. Use 'setup' or 'reset'.") \ No newline at end of file diff --git a/prev_backend_v2/backend/main.py b/prev_backend_v2/backend/main.py deleted file mode 100644 index 18fb37911dc7bbbb2756c42904be0224a5dfec62..0000000000000000000000000000000000000000 --- a/prev_backend_v2/backend/main.py +++ /dev/null @@ -1,189 +0,0 @@ -from fastapi import FastAPI, HTTPException -from fastapi.responses import JSONResponse -from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel -from backend.utils import generate_completions -from backend import config -from backend.database import get_db_connection -import psycopg2 -from psycopg2.extras import RealDictCursor -from typing import Union, List, Literal, Optional -import logging -import json - -logging.basicConfig(level=logging.INFO) - -app = FastAPI() - -# Add CORS middleware -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # Allows all origins - allow_credentials=True, - allow_methods=["*"], # Allows all methods - allow_headers=["*"], # Allows all headers -) - -# Dependency to get database connection -async def get_db(): - conn = await get_db_connection() - try: - yield conn - finally: - conn.close() - -class Message(BaseModel): - role: Literal["user", "assistant"] - content: str - -class GenerationRequest(BaseModel): - user_id: int - query: Union[str, List[Message]] - native_language: Optional[str] = None - target_language: Optional[str] = None - proficiency: Optional[str] = None - -class MetadataRequest(BaseModel): - query: str - -# Global metadata variables -native_language: Optional[str] = None -target_language: Optional[str] = None -proficiency: Optional[str] = None - -@app.get("/") -async def root(): - return {"message": "Welcome to the AI Learning Assistant API!"} - -@app.post("/extract/metadata") -async def extract_metadata(data: MetadataRequest): - logging.info(f"Query: {data.query}") - try: - response_str = await generate_completions.get_completions( - data.query, - config.language_metadata_extraction_prompt - ) - metadata_dict = json.loads(response_str) - # Update globals for other endpoints - globals()['native_language'] = metadata_dict.get('native_language', 'unknown') - globals()['target_language'] = metadata_dict.get('target_language', 'unknown') - globals()['proficiency'] = metadata_dict.get('proficiency', 'unknown') - return JSONResponse( - content={ - "data": metadata_dict, - "type": "language_metadata", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/curriculum") -async def generate_curriculum(data: GenerationRequest): - try: - # Use metadata from request or fallback to globals - nl = data.native_language or native_language or "unknown" - tl = data.target_language or target_language or "unknown" - prof = data.proficiency or proficiency or "unknown" - instructions = ( - config.curriculum_instructions - .replace("{native_language}", nl) - .replace("{target_language}", tl) - .replace("{proficiency}", prof) - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "curriculum", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/flashcards") -async def generate_flashcards(data: GenerationRequest): - try: - nl = data.native_language or native_language or "unknown" - tl = data.target_language or target_language or "unknown" - prof = data.proficiency or proficiency or "unknown" - instructions = ( - config.flashcard_mode_instructions - .replace("{native_language}", nl) - .replace("{target_language}", tl) - .replace("{proficiency}", prof) - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "flashcards", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/exercises") -async def generate_exercises(data: GenerationRequest): - try: - nl = data.native_language or native_language or "unknown" - tl = data.target_language or target_language or "unknown" - prof = data.proficiency or proficiency or "unknown" - instructions = ( - config.exercise_mode_instructions - .replace("{native_language}", nl) - .replace("{target_language}", tl) - .replace("{proficiency}", prof) - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "exercises", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/generate/simulation") -async def generate_simulation(data: GenerationRequest): - try: - nl = data.native_language or native_language or "unknown" - tl = data.target_language or target_language or "unknown" - prof = data.proficiency or proficiency or "unknown" - instructions = ( - config.simulation_mode_instructions - .replace("{native_language}", nl) - .replace("{target_language}", tl) - .replace("{proficiency}", prof) - ) - response = await generate_completions.get_completions( - data.query, - instructions - ) - return JSONResponse( - content={ - "data": response, - "type": "simulation", - "status": "success" - }, - status_code=200 - ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/prev_backend_v2/backend/utils/__pycache__/generate_completions.cpython-310.pyc b/prev_backend_v2/backend/utils/__pycache__/generate_completions.cpython-310.pyc deleted file mode 100644 index b515240d65695a530ce3ca667042d874bd615015..0000000000000000000000000000000000000000 Binary files a/prev_backend_v2/backend/utils/__pycache__/generate_completions.cpython-310.pyc and /dev/null differ diff --git a/prev_backend_v2/backend/utils/generate_completions.py b/prev_backend_v2/backend/utils/generate_completions.py deleted file mode 100644 index aa7c669c860fee3070be08323db52c97c799c18d..0000000000000000000000000000000000000000 --- a/prev_backend_v2/backend/utils/generate_completions.py +++ /dev/null @@ -1,107 +0,0 @@ -from openai import AsyncOpenAI, OpenAI -import asyncio -import json -from typing import AsyncIterator -from typing import Union, List, Dict, Literal -from dotenv import load_dotenv -import os -from pydantic import BaseModel -load_dotenv() - -# Initialize the async client -client = AsyncOpenAI( - base_url=os.getenv("BASE_URL"), - api_key=os.getenv("API_KEY"), -) - -class Message(BaseModel): - role: Literal["user", "assistant"] - content: str - -# Helper function to flatten chat messages into a single string prompt -def flatten_messages(messages: List[Message]) -> str: - return "\n".join([f"{m.role}: {m.content}" for m in messages]) - -def process_input(data: Union[str, List[Dict[str, str]]]) -> Union[str, List[Dict[str, str]]]: - """ - Processes input to either uppercase a string or modify the 'content' field - of a list of dictionaries. - """ - if isinstance(data, str): - return data.strip() # Ensures prompt is cleaned up (optional) - - elif isinstance(data, list): - # Ensure each item in the list is a dictionary with a 'content' key - return [ - {**item, "content": item["content"].strip()} # Trims whitespace in 'content' - for item in data if isinstance(item, dict) and "content" in item - ] - - else: - raise TypeError("Input must be a string or a list of dictionaries with a 'content' field") - - -# async def get_completions( -# prompt: Union[str, List[Dict[str, str]]], -# instructions: str -# ) -> str: -# processed_prompt = process_input(prompt) # Ensures the input format is correct - -# if isinstance(processed_prompt, str): -# messages = [ -# {"role": "system", "content": instructions}, -# {"role": "user", "content": processed_prompt} -# ] -# elif isinstance(processed_prompt, list): -# messages = [{"role": "system", "content": instructions}] + processed_prompt -# else: -# raise TypeError("Unexpected processed input type.") - -# response = await client.chat.completions.create( -# model=os.getenv("MODEL"), -# messages=messages, -# response_format={"type": "json_object"} -# ) - -# output: str = response.choices[0].message.content -# return output - -async def get_completions( - prompt: Union[str, List[Dict[str, str]]], - instructions: str -) -> str: - if isinstance(prompt, list): - formatted_query = flatten_messages(prompt) - else: - formatted_query = prompt - - processed_prompt = process_input(formatted_query) - - messages = [{"role": "system", "content": instructions}] - - if isinstance(processed_prompt, str): - messages.append({"role": "user", "content": processed_prompt}) - - elif isinstance(processed_prompt, list): - # Only keep the history for context and append the latest user query at the end - history = processed_prompt[:-1] - last_user_msg = processed_prompt[-1] - - # Optional: Validate that the last message is from the user - if last_user_msg.get("role") != "user": - raise ValueError("Last message must be from the user.") - - messages += history - messages.append(last_user_msg) - - else: - raise TypeError("Unexpected processed input type.") - - # print(os.getenv("MODEL")) - response = await client.chat.completions.create( - model=os.getenv("MODEL"), - messages=messages, - response_format={"type": "json_object"} - ) - - return response.choices[0].message.content # adjust based on your client diff --git a/prev_backend_v4/backend/__pycache__/config.cpython-311.pyc b/prev_backend_v4/backend/__pycache__/config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..93871f2b4681f323ed2762cbb242eff5c24839f1 Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/config.cpython-311.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/config.cpython-312.pyc b/prev_backend_v4/backend/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fcfb4c9aa39c2a9cbb7770b3662fcebe8928d55b Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/config.cpython-312.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/content_generator.cpython-311.pyc b/prev_backend_v4/backend/__pycache__/content_generator.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3712570ec594399706ff35edf6948f9542659668 Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/content_generator.cpython-311.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/content_generator.cpython-312.pyc b/prev_backend_v4/backend/__pycache__/content_generator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..abf3acf97e1a6f88b6d7c9e8d29f46205c7b4569 Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/content_generator.cpython-312.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/db.cpython-311.pyc b/prev_backend_v4/backend/__pycache__/db.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..265705cbba48f9b3eb121b54590f4f559849e634 Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/db.cpython-311.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/db.cpython-312.pyc b/prev_backend_v4/backend/__pycache__/db.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..755c391358dd3cd011211ac176fd808c43e44d56 Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/db.cpython-312.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/db_cache.cpython-311.pyc b/prev_backend_v4/backend/__pycache__/db_cache.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05f246e9c73b3d771997eafdbed9f87a7779ae4e Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/db_cache.cpython-311.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/db_cache.cpython-312.pyc b/prev_backend_v4/backend/__pycache__/db_cache.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3b02cfb48ace29668349bd7449c6eb9961833c3 Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/db_cache.cpython-312.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/db_init.cpython-311.pyc b/prev_backend_v4/backend/__pycache__/db_init.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e9252e18e32b2b6435297bb2dd8fff38f3e39a2 Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/db_init.cpython-311.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/db_init.cpython-312.pyc b/prev_backend_v4/backend/__pycache__/db_init.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95ce3ba3eae1a96506add256b7b0043978af3cb9 Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/db_init.cpython-312.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/main.cpython-311.pyc b/prev_backend_v4/backend/__pycache__/main.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d54d350a58c69f764751dd5a616351183067449 Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/main.cpython-311.pyc differ diff --git a/prev_backend_v4/backend/__pycache__/main.cpython-312.pyc b/prev_backend_v4/backend/__pycache__/main.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a5ece91217c265c8f897397b061319d7393d2af Binary files /dev/null and b/prev_backend_v4/backend/__pycache__/main.cpython-312.pyc differ diff --git a/prev_backend_v4/backend/cache.py b/prev_backend_v4/backend/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..6f6760b62161db15966351664a5e5887a49d86b2 --- /dev/null +++ b/prev_backend_v4/backend/cache.py @@ -0,0 +1,29 @@ +import asyncio +from typing import Any, Callable, Dict, Tuple + +class AsyncLRUCache: + def __init__(self, maxsize=100_000): # Optimized for 16GB RAM + self.cache: Dict[Tuple, Any] = {} + self.order = [] + self.maxsize = maxsize + self.lock = asyncio.Lock() + + async def get_or_set(self, key: Tuple, coro: Callable, *args, **kwargs): + async with self.lock: + if key in self.cache: + # Move key to end to show it was recently used + self.order.remove(key) + self.order.append(key) + return self.cache[key] + # Not cached, compute result + result = await coro(*args, **kwargs) + async with self.lock: + self.cache[key] = result + self.order.append(key) + if len(self.order) > self.maxsize: + oldest = self.order.pop(0) + del self.cache[oldest] + return result + +# Initialize cache with optimized size for 16GB RAM +cache = AsyncLRUCache() # Uses default maxsize=100_000 \ No newline at end of file diff --git a/prev_backend_v2/backend/config.py b/prev_backend_v4/backend/config.py similarity index 97% rename from prev_backend_v2/backend/config.py rename to prev_backend_v4/backend/config.py index ee3d5aaa93632b5782367ba1d710d71a43244736..77c652bb72690d6582af6bfb657ac2f5c3fffa8d 100644 --- a/prev_backend_v2/backend/config.py +++ b/prev_backend_v4/backend/config.py @@ -48,7 +48,7 @@ curriculum_instructions = """ You are an AI-powered language learning assistant tasked with generating an extensive, personalized curriculum. Your goal is to help the user learn {target_language} by designing a 25-lesson curriculum that reflects the user's goals, interests, and proficiency level. All outputs should be written in {native_language}. ### Curriculum Goals: -- Provide 25 lessons. +- Provide 5 lessons. - Ensure logical progression from basic to advanced topics (according to {proficiency}). - Align each lesson with a practical communication goal. - Tailor vocabulary and sub-topics to the user’s intended use (e.g., work, travel, hobbies, daily life). @@ -57,15 +57,15 @@ You are an AI-powered language learning assistant tasked with generating an exte 1. **Define the Lesson Series (Overall Theme):** - Choose a main theme relevant to the user's motivation for learning {target_language} (e.g., "Living in a new country", "Professional communication", "Traveling in {target_language}-speaking regions"). - - The theme should guide the tone, content, and scope of the entire 25-lesson sequence. + - The theme should guide the tone, content, and scope of the entire 5-lesson sequence. -2. **Divide the Curriculum into 25 Thematic Lessons:** +2. **Divide the Curriculum into 5 Thematic Lessons:** - Each lesson should have a clear focus (e.g., asking for help, describing your job, booking accommodation). - Sequence lessons to build from foundational topics to more complex, specialized language use. - Vary grammar, vocabulary, and communication functions across lessons to avoid repetition and ensure comprehensive coverage. 3. **Describe Each Lesson Clearly and Concisely:** - For each of the 25 lessons, provide: + For each of the 5 lessons, provide: - "sub_topic": A clear and practical lesson title in {native_language}. - "keywords": A list of 1–3 high-level categories in {native_language} that describe the lesson focus (e.g., "directions", "daily routine", "formal conversation"). - "description": One sentence in {native_language} that explains what the learner will achieve or be able to do after completing the lesson. Be specific and learner-oriented. @@ -73,7 +73,7 @@ You are an AI-powered language learning assistant tasked with generating an exte ### Output Format: Return a valid JSON object with: - "lesson_topic": The overall learning theme (in {native_language}). -- "sub_topics": A list of 25 items. Each item must include: +- "sub_topics": A list of 5 items. Each item must include: - "sub_topic": A short title of the lesson (in {native_language}). - "keywords": A list of 1–3 general-purpose categories (in {native_language}). - "description": One clear sentence (in {native_language}) describing the purpose of the lesson. @@ -142,15 +142,12 @@ flashcard_mode_instructions = """ # Native language: {native_language} # Target language: {target_language} # Proficiency level: {proficiency} - You are a highly adaptive vocabulary tutor capable of teaching any language. Your goal is to help users learn rapidly by generating personalized flashcards from lesson-based content. - ### Input Format You will receive a structured lesson as input (text, dialogue, or vocabulary list). Use this input to: - Identify new or useful vocabulary terms. - Extract contextually relevant and domain-specific language. - Ensure that flashcards reflect the lesson's language, style, and purpose. - ### Generation Guidelines When generating flashcards: 1. **Use the provided metadata**: @@ -160,17 +157,14 @@ When generating flashcards: - *Beginner*: High-frequency, essential words. - *Intermediate*: Broader, topic-specific terms and common collocations. - *Advanced*: Nuanced, idiomatic, or technical vocabulary. - 2. **Contextual relevance**: - Flashcards should reflect the themes, activities, or domain of the lesson input (e.g., cooking, business, travel). - Ensure that example sentences are directly related to the input content and sound natural in use. - 3. **Avoid redundancy**: - Select terms that are novel, useful, or not overly repetitive within the lesson. - Prioritize terms that learners are likely to encounter again in real-world usage. - ### Flashcard Format -Generate exactly **10 flashcards** as a **valid JSON array**, with each flashcard containing: +Generate exactly **5 flashcards** as a **valid JSON array**, with each flashcard containing: - `"word"`: A key word or phrase in {target_language} drawn from the lesson. - `"definition"`: A learner-friendly explanation in {native_language}. - `"example"`: A clear, natural sentence in {target_language} demonstrating the word **in context with the lesson**. @@ -182,7 +176,8 @@ simulation_mode_instructions = """ # Target language: {target_language} # Proficiency level: {proficiency} -You are a **creative, context-aware storytelling engine**. Your task is to generate short, engaging stories or dialogues in **any language** to make language learning enjoyable, memorable, and relevant. Stories must reflect the user's interests, profession, or hobbies, and align with their learning level. +You are a **creative, context-aware storytelling engine**. Your task is to generate short, engaging stories or dialogues in **any language** to make language learning enjoyable, memorable, and relevant. +Stories must reflect the user's interests, profession, or hobbies, and align with their learning level. ### Input Format You will receive a user-provided **lesson topic, theme, or domain of interest** (e.g., “a courtroom drama for a law student” or “space mission dialogue for a space enthusiast”). Use this input to: diff --git a/prev_backend_v4/backend/content_generator.py b/prev_backend_v4/backend/content_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..3cd23c2df82441042acf394bac03caed95462a88 --- /dev/null +++ b/prev_backend_v4/backend/content_generator.py @@ -0,0 +1,295 @@ +import json +import asyncio +from typing import Dict, Any, Optional, List +from backend.utils import generate_completions +from backend import config +from backend.db import db +from backend.db_cache import api_cache +import logging + +logger = logging.getLogger(__name__) + + +class ContentGenerator: + """Service for generating and storing all learning content""" + + async def generate_curriculum_from_metadata( + self, + metadata_extraction_id: str, + query: str, + metadata: Dict[str, Any], + user_id: Optional[int] = None + ) -> str: + """Generate curriculum based on extracted metadata""" + # Format curriculum instructions with metadata + instructions = ( + config.curriculum_instructions + .replace("{native_language}", metadata['native_language']) + .replace("{target_language}", metadata['target_language']) + .replace("{proficiency}", metadata['proficiency']) + ) + + # Generate curriculum + logger.info(f"Generating curriculum for {metadata['target_language']} ({metadata['proficiency']})") + curriculum_response = await generate_completions.get_completions(query, instructions) + + try: + # Parse curriculum response + curriculum = json.loads(curriculum_response) + except json.JSONDecodeError: + logger.error(f"Failed to parse curriculum response: {curriculum_response[:200]}...") + curriculum = {"lesson_topic": "Language Learning Journey", "sub_topics": []} + + # Save curriculum to database + curriculum_id = await db.save_curriculum( + metadata_extraction_id=metadata_extraction_id, + curriculum=curriculum, + user_id=user_id + ) + + return curriculum_id + + async def generate_content_for_lesson( + self, + curriculum_id: str, + lesson_index: int, + lesson: Dict[str, Any], + metadata: Dict[str, Any] + ) -> Dict[str, str]: + """Generate all content types for a single lesson""" + content_ids = {} + lesson_topic = lesson.get('sub_topic', f'Lesson {lesson_index + 1}') + lesson_context = f"{lesson_topic}: {lesson.get('description', '')}" + + # Generate flashcards + try: + flashcards_instructions = ( + config.flashcard_mode_instructions + .replace("{native_language}", metadata['native_language']) + .replace("{target_language}", metadata['target_language']) + .replace("{proficiency}", metadata['proficiency']) + ) + + flashcards_response = await api_cache.get_or_set( + category="flashcards", + key_text=lesson_context, + coro=generate_completions.get_completions, + context={ + 'native_language': metadata['native_language'], + 'target_language': metadata['target_language'], + 'proficiency': metadata['proficiency'], + 'lesson_index': lesson_index + }, + prompt=lesson_context, + instructions=flashcards_instructions + ) + + # Save flashcards + content_ids['flashcards'] = await db.save_learning_content( + curriculum_id=curriculum_id, + content_type='flashcards', + lesson_index=lesson_index, + lesson_topic=lesson_topic, + content=flashcards_response + ) + except Exception as e: + logger.error(f"Failed to generate flashcards for lesson {lesson_index}: {e}") + + # Generate exercises + try: + exercises_instructions = ( + config.exercise_mode_instructions + .replace("{native_language}", metadata['native_language']) + .replace("{target_language}", metadata['target_language']) + .replace("{proficiency}", metadata['proficiency']) + ) + + exercises_response = await api_cache.get_or_set( + category="exercises", + key_text=lesson_context, + coro=generate_completions.get_completions, + context={ + 'native_language': metadata['native_language'], + 'target_language': metadata['target_language'], + 'proficiency': metadata['proficiency'], + 'lesson_index': lesson_index + }, + prompt=lesson_context, + instructions=exercises_instructions + ) + + # Save exercises + content_ids['exercises'] = await db.save_learning_content( + curriculum_id=curriculum_id, + content_type='exercises', + lesson_index=lesson_index, + lesson_topic=lesson_topic, + content=exercises_response + ) + except Exception as e: + logger.error(f"Failed to generate exercises for lesson {lesson_index}: {e}") + + # Generate simulation + try: + simulation_instructions = ( + config.simulation_mode_instructions + .replace("{native_language}", metadata['native_language']) + .replace("{target_language}", metadata['target_language']) + .replace("{proficiency}", metadata['proficiency']) + ) + + simulation_response = await api_cache.get_or_set( + category="simulation", + key_text=lesson_context, + coro=generate_completions.get_completions, + context={ + 'native_language': metadata['native_language'], + 'target_language': metadata['target_language'], + 'proficiency': metadata['proficiency'], + 'lesson_index': lesson_index + }, + prompt=lesson_context, + instructions=simulation_instructions + ) + + # Save simulation + content_ids['simulation'] = await db.save_learning_content( + curriculum_id=curriculum_id, + content_type='simulation', + lesson_index=lesson_index, + lesson_topic=lesson_topic, + content=simulation_response + ) + except Exception as e: + logger.error(f"Failed to generate simulation for lesson {lesson_index}: {e}") + + return content + + async def generate_all_content_for_curriculum( + self, + curriculum_id: str, + max_concurrent_lessons: int = 3 + ): + """Generate all learning content for a curriculum""" + # Get curriculum details + curriculum_data = await db.get_curriculum(curriculum_id) + if not curriculum_data: + logger.error(f"Curriculum not found: {curriculum_id}") + return + + # Parse curriculum JSON + try: + curriculum = json.loads(curriculum_data['curriculum_json']) + lessons = curriculum.get('sub_topics', []) + except json.JSONDecodeError: + logger.error(f"Failed to parse curriculum JSON for {curriculum_id}") + return + + # Prepare metadata + metadata = { + 'native_language': curriculum_data['native_language'], + 'target_language': curriculum_data['target_language'], + 'proficiency': curriculum_data['proficiency'] + } + + logger.info(f"Starting content generation for {len(lessons)} lessons") + + # Process lessons in batches to avoid overwhelming the API + for i in range(0, len(lessons), max_concurrent_lessons): + batch = lessons[i:i + max_concurrent_lessons] + batch_indices = list(range(i, min(i + max_concurrent_lessons, len(lessons)))) + + # Generate content for batch concurrently + tasks = [ + self.generate_content_for_lesson( + curriculum_id=curriculum_id, + lesson_index=idx, + lesson=lesson, + metadata=metadata + ) + for idx, lesson in zip(batch_indices, batch) + ] + + results = await asyncio.gather(*tasks, return_exceptions=True) + + for idx, result in zip(batch_indices, results): + if isinstance(result, Exception): + logger.error(f"Failed to generate content for lesson {idx}: {result}") + else: + logger.info(f"Generated content for lesson {idx}: {result}") + + # Mark curriculum as content generated + await db.mark_curriculum_content_generated(curriculum_id) + logger.info(f"Completed content generation for curriculum {curriculum_id}") + + async def process_metadata_extraction( + self, + extraction_id: str, + query: str, + metadata: Dict[str, Any], + user_id: Optional[int] = None, + generate_content: bool = True + ) -> Dict[str, Any]: + """Process a metadata extraction by checking for existing curriculum or generating new one""" + + # Check for existing curriculum first + existing_curriculum = await db.find_existing_curriculum( + query=query, + native_language=metadata['native_language'], + target_language=metadata['target_language'], + proficiency=metadata['proficiency'], + user_id=user_id + ) + + if existing_curriculum: + # If we found an exact match for this user, return it + if existing_curriculum.get('user_id') == user_id: + logger.info(f"Found existing curriculum for user {user_id}: {existing_curriculum['id']}") + return { + 'curriculum_id': existing_curriculum['id'], + 'content_generation_started': False, + 'cached': True, + 'cache_type': 'user_exact_match' + } + + # If we found a similar curriculum from another user, copy it + elif existing_curriculum.get('is_content_generated') == 1: + logger.info(f"Copying existing curriculum {existing_curriculum['id']} for user {user_id}") + curriculum_id = await db.copy_curriculum_for_user( + source_curriculum_id=existing_curriculum['id'], + metadata_extraction_id=extraction_id, + user_id=user_id + ) + return { + 'curriculum_id': curriculum_id, + 'content_generation_started': False, + 'cached': True, + 'cache_type': 'copied_from_similar' + } + + # No suitable existing curriculum found, generate new one + logger.info(f"No existing curriculum found, generating new one for user {user_id}") + curriculum_id = await self.generate_curriculum_from_metadata( + metadata_extraction_id=extraction_id, + query=query, + metadata=metadata, + user_id=user_id + ) + + result = { + 'curriculum_id': curriculum_id, + 'content_generation_started': False, + 'cached': False, + 'cache_type': 'newly_generated' + } + + if generate_content: + # Start content generation in background + asyncio.create_task(self.generate_all_content_for_curriculum(curriculum_id)) + result['content_generation_started'] = True + + return result + + +# Global content generator instance +content_generator = ContentGenerator() \ No newline at end of file diff --git a/prev_backend_v4/backend/database_init.py b/prev_backend_v4/backend/database_init.py new file mode 100644 index 0000000000000000000000000000000000000000..ded469d6f48f4223e80cea353f96d2b39158e01d --- /dev/null +++ b/prev_backend_v4/backend/database_init.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +""" +Database initialization script for AI Language Tutor +Run this script to create database tables +""" + +import asyncio +import sys +import os + +# Add the project root to Python path +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from backend.database import create_tables, drop_tables +import logging + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +async def init_database(): + """Initialize database tables""" + try: + logger.info("Creating database tables...") + await create_tables() + logger.info("Database tables created successfully!") + except Exception as e: + logger.error(f"Error creating database tables: {e}") + raise + + +async def reset_database(): + """Reset database (drop and recreate tables)""" + try: + logger.info("Dropping existing tables...") + await drop_tables() + logger.info("Creating new tables...") + await create_tables() + logger.info("Database reset successfully!") + except Exception as e: + logger.error(f"Error resetting database: {e}") + raise + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Database initialization for AI Language Tutor") + parser.add_argument( + "--reset", + action="store_true", + help="Reset database (drop and recreate tables)" + ) + + args = parser.parse_args() + + if args.reset: + print("⚠️ WARNING: This will delete all existing data!") + confirm = input("Are you sure you want to reset the database? (yes/no): ") + if confirm.lower() == "yes": + asyncio.run(reset_database()) + else: + print("Database reset cancelled.") + else: + asyncio.run(init_database()) \ No newline at end of file diff --git a/prev_backend_v4/backend/db.py b/prev_backend_v4/backend/db.py new file mode 100644 index 0000000000000000000000000000000000000000..e738efe0801181f56f84ca32f48e9c0eec93e279 --- /dev/null +++ b/prev_backend_v4/backend/db.py @@ -0,0 +1,434 @@ +import aiosqlite +import json +import os +from typing import Optional, List, Dict, Any +from datetime import datetime +import uuid +import logging + +logger = logging.getLogger(__name__) + +# Database file path +DB_PATH = os.getenv("DATABASE_PATH", "./ai_tutor.db") + + +class Database: + """Pure SQLite database handler for AI Language Tutor""" + + def __init__(self, db_path: str = DB_PATH): + self.db_path = db_path + + async def initialize(self): + """Initialize database with schema""" + async with aiosqlite.connect(self.db_path) as db: + # Read and execute schema - look for it in parent directory + schema_path = os.path.join(os.path.dirname(__file__), 'schema.sql') + with open(schema_path, 'r') as f: + schema = f.read() + await db.executescript(schema) + await db.commit() + logger.info("Database initialized successfully") + + async def find_existing_curriculum( + self, + query: str, + native_language: str, + target_language: str, + proficiency: str, + user_id: Optional[int] = None + ) -> Optional[Dict[str, Any]]: + """Find existing curriculum for similar query and metadata""" + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + + if user_id is not None: + # User-specific search: First try to find exact query match for the user + async with db.execute(""" + SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query + FROM curricula c + JOIN metadata_extractions m ON c.metadata_extraction_id = m.id + WHERE m.user_id = ? AND m.query = ? AND m.native_language = ? + AND m.target_language = ? AND m.proficiency = ? + ORDER BY c.created_at DESC + LIMIT 1 + """, (user_id, query, native_language, target_language, proficiency)) as cursor: + row = await cursor.fetchone() + if row: + return dict(row) + + # Then try to find similar curriculum with same metadata (any user) + async with db.execute(""" + SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query + FROM curricula c + JOIN metadata_extractions m ON c.metadata_extraction_id = m.id + WHERE m.native_language = ? AND m.target_language = ? AND m.proficiency = ? + AND c.is_content_generated = 1 + ORDER BY c.created_at DESC + LIMIT 1 + """, (native_language, target_language, proficiency)) as cursor: + row = await cursor.fetchone() + if row: + return dict(row) + else: + # User-independent search: Find exact query match regardless of user + async with db.execute(""" + SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query + FROM curricula c + JOIN metadata_extractions m ON c.metadata_extraction_id = m.id + WHERE m.query = ? AND m.native_language = ? AND m.target_language = ? AND m.proficiency = ? + ORDER BY c.created_at DESC + LIMIT 1 + """, (query, native_language, target_language, proficiency)) as cursor: + row = await cursor.fetchone() + if row: + return dict(row) + + return None + + async def save_metadata_extraction( + self, + query: str, + metadata: Dict[str, Any], + user_id: Optional[int] = None + ) -> str: + """Save extracted metadata and return extraction ID""" + extraction_id = str(uuid.uuid4()) + + async with aiosqlite.connect(self.db_path) as db: + await db.execute(""" + INSERT INTO metadata_extractions + (id, user_id, query, native_language, target_language, proficiency, title, description, metadata_json) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + extraction_id, + user_id, + query, + metadata.get('native_language'), + metadata.get('target_language'), + metadata.get('proficiency'), + metadata.get('title'), + metadata.get('description'), + json.dumps(metadata) + )) + await db.commit() + + logger.info(f"Saved metadata extraction: {extraction_id}") + return extraction_id + + async def save_curriculum( + self, + metadata_extraction_id: str, + curriculum: Dict[str, Any], + user_id: Optional[int] = None + ) -> str: + """Save generated curriculum and return curriculum ID""" + curriculum_id = str(uuid.uuid4()) + + async with aiosqlite.connect(self.db_path) as db: + await db.execute(""" + INSERT INTO curricula + (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json) + VALUES (?, ?, ?, ?, ?) + """, ( + curriculum_id, + metadata_extraction_id, + user_id, + curriculum.get('lesson_topic', ''), + json.dumps(curriculum) + )) + await db.commit() + + logger.info(f"Saved curriculum: {curriculum_id}") + return curriculum_id + + async def copy_curriculum_for_user( + self, + source_curriculum_id: str, + metadata_extraction_id: str, + user_id: Optional[int] = None + ) -> str: + """Copy an existing curriculum for a new user""" + new_curriculum_id = str(uuid.uuid4()) + + async with aiosqlite.connect(self.db_path) as db: + # Get source curriculum + async with db.execute(""" + SELECT lesson_topic, curriculum_json FROM curricula WHERE id = ? + """, (source_curriculum_id,)) as cursor: + row = await cursor.fetchone() + if not row: + raise ValueError(f"Source curriculum {source_curriculum_id} not found") + + lesson_topic, curriculum_json = row + + # Create new curriculum + await db.execute(""" + INSERT INTO curricula + (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, is_content_generated) + VALUES (?, ?, ?, ?, ?, 0) + """, ( + new_curriculum_id, + metadata_extraction_id, + user_id, + lesson_topic, + curriculum_json + )) + + # Copy all learning content + await db.execute(""" + INSERT INTO learning_content + (id, curriculum_id, content_type, lesson_index, lesson_topic, content_json) + SELECT + lower(hex(randomblob(16))), + ?, + content_type, + lesson_index, + lesson_topic, + content_json + FROM learning_content + WHERE curriculum_id = ? + """, (new_curriculum_id, source_curriculum_id)) + + # Mark as content generated + await db.execute(""" + UPDATE curricula + SET is_content_generated = 1 + WHERE id = ? + """, (new_curriculum_id,)) + + await db.commit() + + logger.info(f"Copied curriculum {source_curriculum_id} to {new_curriculum_id} for user {user_id}") + return new_curriculum_id + + async def save_learning_content( + self, + curriculum_id: str, + content_type: str, + lesson_index: int, + lesson_topic: str, + content: Any + ) -> str: + """Save learning content (flashcards, exercises, or simulation)""" + content_id = str(uuid.uuid4()) + + async with aiosqlite.connect(self.db_path) as db: + await db.execute(""" + INSERT INTO learning_content + (id, curriculum_id, content_type, lesson_index, lesson_topic, content_json) + VALUES (?, ?, ?, ?, ?, ?) + """, ( + content_id, + curriculum_id, + content_type, + lesson_index, + lesson_topic, + json.dumps(content) if isinstance(content, (dict, list)) else content + )) + await db.commit() + + logger.info(f"Saved {content_type} for lesson {lesson_index}") + return content_id + + async def mark_curriculum_content_generated(self, curriculum_id: str): + """Mark curriculum as having all content generated""" + async with aiosqlite.connect(self.db_path) as db: + await db.execute(""" + UPDATE curricula + SET is_content_generated = 1 + WHERE id = ? + """, (curriculum_id,)) + await db.commit() + + async def get_metadata_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]: + """Get metadata extraction by ID""" + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute(""" + SELECT * FROM metadata_extractions WHERE id = ? + """, (extraction_id,)) as cursor: + row = await cursor.fetchone() + if row: + return dict(row) + return None + + async def get_curriculum(self, curriculum_id: str) -> Optional[Dict[str, Any]]: + """Get curriculum by ID""" + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute(""" + SELECT c.*, m.native_language, m.target_language, m.proficiency + FROM curricula c + JOIN metadata_extractions m ON c.metadata_extraction_id = m.id + WHERE c.id = ? + """, (curriculum_id,)) as cursor: + row = await cursor.fetchone() + if row: + return dict(row) + return None + + async def get_learning_content( + self, + curriculum_id: str, + content_type: Optional[str] = None, + lesson_index: Optional[int] = None + ) -> List[Dict[str, Any]]: + """Get learning content for a curriculum""" + query = "SELECT * FROM learning_content WHERE curriculum_id = ?" + params = [curriculum_id] + + if content_type: + query += " AND content_type = ?" + params.append(content_type) + + if lesson_index is not None: + query += " AND lesson_index = ?" + params.append(lesson_index) + + query += " ORDER BY lesson_index" + + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute(query, params) as cursor: + rows = await cursor.fetchall() + return [dict(row) for row in rows] + + async def get_user_metadata_extractions( + self, + user_id: int, + limit: int = 20 + ) -> List[Dict[str, Any]]: + """Get user's metadata extraction history""" + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute(""" + SELECT * FROM metadata_extractions + WHERE user_id = ? + ORDER BY created_at DESC + LIMIT ? + """, (user_id, limit)) as cursor: + rows = await cursor.fetchall() + return [dict(row) for row in rows] + + async def get_user_curricula( + self, + user_id: int, + limit: int = 20 + ) -> List[Dict[str, Any]]: + """Get user's curricula""" + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute(""" + SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title + FROM curricula c + JOIN metadata_extractions m ON c.metadata_extraction_id = m.id + WHERE c.user_id = ? + ORDER BY c.created_at DESC + LIMIT ? + """, (user_id, limit)) as cursor: + rows = await cursor.fetchall() + return [dict(row) for row in rows] + + async def get_user_learning_journeys( + self, + user_id: int, + limit: int = 20 + ) -> List[Dict[str, Any]]: + """Get user's complete learning journeys""" + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute(""" + SELECT * FROM user_learning_journeys + WHERE user_id = ? + LIMIT ? + """, (user_id, limit)) as cursor: + rows = await cursor.fetchall() + return [dict(row) for row in rows] + + async def get_curriculum_content_status(self, curriculum_id: str) -> Optional[Dict[str, Any]]: + """Get content generation status for a curriculum""" + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute(""" + SELECT * FROM curriculum_content_status WHERE curriculum_id = ? + """, (curriculum_id,)) as cursor: + row = await cursor.fetchone() + if row: + return dict(row) + return None + + async def get_full_curriculum_details(self, curriculum_id: str, include_content: bool = True) -> Optional[Dict[str, Any]]: + """Get full curriculum details, optionally including all content.""" + curriculum = await self.get_curriculum(curriculum_id) + if not curriculum: + return None + + try: + curriculum_data = json.loads(curriculum['curriculum_json']) + lessons = curriculum_data.get('sub_topics', []) + except json.JSONDecodeError: + curriculum_data = {} + lessons = [] + + if include_content: + content_list = await self.get_learning_content(curriculum_id) + content_map = {} + for content in content_list: + lesson_index = content['lesson_index'] + content_type = content['content_type'] + if lesson_index not in content_map: + content_map[lesson_index] = {} + + try: + parsed_content = json.loads(content['content_json']) + except json.JSONDecodeError: + parsed_content = content['content_json'] + + content_map[lesson_index][content_type] = { + "id": content['id'], + "lesson_topic": content['lesson_topic'], + "content": parsed_content, + "created_at": content['created_at'] + } + + # Embed content into lessons + for i, lesson in enumerate(lessons): + lesson['content'] = content_map.get(i, {}) + + curriculum['curriculum'] = curriculum_data + del curriculum['curriculum_json'] + + return curriculum + + async def search_curricula_by_languages( + self, + native_language: str, + target_language: str, + proficiency: Optional[str] = None, + limit: int = 10 + ) -> List[Dict[str, Any]]: + """Search for existing curricula by language combination""" + query = """ + SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title + FROM curricula c + JOIN metadata_extractions m ON c.metadata_extraction_id = m.id + WHERE m.native_language = ? AND m.target_language = ? + """ + params = [native_language, target_language] + + if proficiency: + query += " AND m.proficiency = ?" + params.append(proficiency) + + query += " ORDER BY c.created_at DESC LIMIT ?" + params.append(limit) + + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute(query, params) as cursor: + rows = await cursor.fetchall() + return [dict(row) for row in rows] + + +# Global database instance +db = Database() \ No newline at end of file diff --git a/prev_backend_v4/backend/db_cache.py b/prev_backend_v4/backend/db_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..b13dad98f41e89444d60dd65fe113e51cb6e3fac --- /dev/null +++ b/prev_backend_v4/backend/db_cache.py @@ -0,0 +1,101 @@ +import aiosqlite +import json +import os +from typing import Optional, Dict, Any, Callable, Union, List +import logging +import hashlib + +logger = logging.getLogger(__name__) +DB_PATH = os.getenv("DATABASE_PATH", "./ai_tutor.db") + +class ApiCache: + """Generic caching service using a dedicated database table.""" + def __init__(self, db_path: str = DB_PATH): + self.db_path = db_path + + def _generate_hash(self, text: str) -> str: + """Generate a SHA256 hash for a given text.""" + return hashlib.sha256(text.encode()).hexdigest() + + def _generate_context_hash(self, key_text: str, **context) -> str: + """Generate a hash that includes context for better cache differentiation""" + # Create a consistent string from context + context_items = sorted(context.items()) + context_str = "|".join([f"{k}:{v}" for k, v in context_items if v is not None]) + full_key = f"{key_text}|{context_str}" + return hashlib.sha256(full_key.encode()).hexdigest() + + async def get_or_set( + self, + category: str, + key_text: str, + coro: Callable, + *args, + context: Optional[Dict[str, Any]] = None, + **kwargs + ) -> Union[Dict[str, Any], List[Any], str]: + """ + Get data from cache or execute a coroutine to generate and cache it. + + Args: + category: The category of the cached item (e.g., 'metadata', 'flashcards'). + key_text: The text to use for generating the cache key. + coro: The async function to call if the item is not in the cache. + *args: Positional arguments for the coroutine. + context: Additional context for cache key generation (e.g., language, proficiency). + **kwargs: Keyword arguments for the coroutine. + + Returns: + The cached or newly generated content. + """ + # Generate cache key with context if provided + if context: + cache_key = self._generate_context_hash(key_text, **context) + else: + cache_key = self._generate_hash(key_text) + + # 1. Check cache + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + async with db.execute( + "SELECT content_json FROM api_cache WHERE cache_key = ? AND category = ?", + (cache_key, category) + ) as cursor: + row = await cursor.fetchone() + if row: + logger.info(f"Cache hit for {category} with key: {key_text[:50]}...") + return json.loads(row['content_json']) + + # 2. If miss, generate content + logger.info(f"Cache miss for {category}: {key_text[:50]}... Generating new content") + generated_content = await coro(*args, **kwargs) + + # Ensure content is a JSON-serializable string + if isinstance(generated_content, (dict, list)): + content_to_cache = json.dumps(generated_content) + elif isinstance(generated_content, str): + # Try to parse string to ensure it's valid JSON, then dump it back + try: + parsed_json = json.loads(generated_content) + content_to_cache = json.dumps(parsed_json) + except json.JSONDecodeError: + # If it's not a JSON string, we can't cache it in this system. + # Depending on requirements, we might raise an error or just return it without caching. + logger.warning(f"Content for {category} is not valid JSON, returning without caching.") + return generated_content + else: + raise TypeError("Cached content must be a JSON string, dict, or list.") + + # 3. Store in cache + async with aiosqlite.connect(self.db_path) as db: + await db.execute( + "INSERT INTO api_cache (cache_key, category, content_json) VALUES (?, ?, ?)", + (cache_key, category, content_to_cache) + ) + await db.commit() + logger.info(f"Cached new content for {category} with key: {key_text[:50]}...") + + return json.loads(content_to_cache) + +# Global API cache instance +api_cache = ApiCache() \ No newline at end of file diff --git a/prev_backend_v4/backend/db_init.py b/prev_backend_v4/backend/db_init.py new file mode 100644 index 0000000000000000000000000000000000000000..739ae6b4b8928c91eadcfa5683f4507f31989d03 --- /dev/null +++ b/prev_backend_v4/backend/db_init.py @@ -0,0 +1,259 @@ +""" +Database Initialization Module +Handles database creation, schema setup, and health checks +""" + +import os +import aiosqlite +import logging +from pathlib import Path +from typing import Dict, Any, List + +logger = logging.getLogger(__name__) + +class DatabaseInitializer: + """Handles database initialization and health checks""" + + def __init__(self, db_path: str = None): + self.db_path = db_path or os.getenv("DATABASE_PATH", "./ai_tutor.db") + self.schema_path = self._find_schema_file() + + def _find_schema_file(self) -> str: + """Return the path to the schema.sql file. + + The schema.sql file is expected to be in the same directory as this script. + """ + schema_path = os.path.join(os.path.dirname(__file__), 'schema.sql') + if not os.path.exists(schema_path): + raise FileNotFoundError(f"schema.sql not found at {schema_path}") + return schema_path + + async def check_database_exists(self) -> bool: + """Check if database file exists""" + return os.path.exists(self.db_path) + + async def check_database_health(self) -> Dict[str, Any]: + """Comprehensive database health check""" + health_status = { + "database_exists": False, + "database_accessible": False, + "schema_loaded": False, + "tables_exist": False, + "views_exist": False, + "can_write": False, + "record_count": {}, + "errors": [] + } + + try: + # Check if database file exists + health_status["database_exists"] = await self.check_database_exists() + + if not health_status["database_exists"]: + health_status["errors"].append("Database file does not exist") + return health_status + + # Try to connect to database + async with aiosqlite.connect(self.db_path) as db: + health_status["database_accessible"] = True + + # Check if required tables exist + required_tables = ['metadata_extractions', 'curricula', 'learning_content', 'api_cache'] + existing_tables = await self._get_existing_tables(db) + + missing_tables = [table for table in required_tables if table not in existing_tables] + if missing_tables: + health_status["errors"].append(f"Missing tables: {missing_tables}") + else: + health_status["tables_exist"] = True + + # Check if views exist + required_views = ['user_learning_journeys', 'curriculum_content_status'] + existing_views = await self._get_existing_views(db) + + missing_views = [view for view in required_views if view not in existing_views] + if missing_views: + health_status["errors"].append(f"Missing views: {missing_views}") + else: + health_status["views_exist"] = True + + # Test write capability + try: + await db.execute("CREATE TEMPORARY TABLE test_write (id INTEGER)") + await db.execute("DROP TABLE test_write") + health_status["can_write"] = True + except Exception as e: + health_status["errors"].append(f"Cannot write to database: {str(e)}") + + # Get record counts + if health_status["tables_exist"]: + for table in required_tables: + try: + async with db.execute(f"SELECT COUNT(*) FROM {table}") as cursor: + count = await cursor.fetchone() + health_status["record_count"][table] = count[0] if count else 0 + except Exception as e: + health_status["record_count"][table] = f"Error: {str(e)}" + + health_status["schema_loaded"] = ( + health_status["tables_exist"] and + health_status["views_exist"] + ) + + except Exception as e: + health_status["errors"].append(f"Database connection error: {str(e)}") + + return health_status + + async def _get_existing_tables(self, db: aiosqlite.Connection) -> List[str]: + """Get list of existing tables""" + async with db.execute(""" + SELECT name FROM sqlite_master + WHERE type='table' AND name NOT LIKE 'sqlite_%' + """) as cursor: + rows = await cursor.fetchall() + return [row[0] for row in rows] + + async def _get_existing_views(self, db: aiosqlite.Connection) -> List[str]: + """Get list of existing views""" + async with db.execute(""" + SELECT name FROM sqlite_master + WHERE type='view' + """) as cursor: + rows = await cursor.fetchall() + return [row[0] for row in rows] + + async def create_database(self) -> bool: + """Create database file and initialize with schema""" + try: + logger.info(f"Creating database at: {self.db_path}") + + # Ensure directory exists + db_dir = os.path.dirname(self.db_path) + if db_dir and not os.path.exists(db_dir): + os.makedirs(db_dir, exist_ok=True) + logger.info(f"Created directory: {db_dir}") + + # Create database and load schema + async with aiosqlite.connect(self.db_path) as db: + # Read schema file + with open(self.schema_path, 'r') as f: + schema = f.read() + + # Execute schema + await db.executescript(schema) + await db.commit() + + logger.info("Database created and schema loaded successfully") + return True + + except Exception as e: + logger.error(f"Error creating database: {str(e)}") + return False + + async def initialize_database(self, force_recreate: bool = False) -> Dict[str, Any]: + """Initialize database with comprehensive checks and creation""" + result = { + "success": False, + "action_taken": "none", + "health_check": {}, + "errors": [] + } + + try: + # Check current database health + health_check = await self.check_database_health() + result["health_check"] = health_check + + # Determine if we need to create/recreate database + needs_creation = ( + not health_check["database_exists"] or + not health_check["schema_loaded"] or + force_recreate + ) + + if needs_creation: + if health_check["database_exists"] and force_recreate: + # Backup existing database + backup_path = f"{self.db_path}.backup" + if os.path.exists(self.db_path): + os.rename(self.db_path, backup_path) + logger.info(f"Backed up existing database to: {backup_path}") + result["action_taken"] = "recreated_with_backup" + else: + result["action_taken"] = "force_recreated" + else: + result["action_taken"] = "created" + + # Create database + creation_success = await self.create_database() + if not creation_success: + result["errors"].append("Failed to create database") + return result + + # Verify creation + final_health = await self.check_database_health() + result["health_check"] = final_health + + if final_health["schema_loaded"] and final_health["can_write"]: + result["success"] = True + logger.info("Database initialization completed successfully") + else: + result["errors"].append("Database created but health check failed") + + else: + # Database exists and is healthy + result["success"] = True + result["action_taken"] = "already_exists" + logger.info("Database already exists and is healthy") + + except Exception as e: + error_msg = f"Database initialization error: {str(e)}" + logger.error(error_msg) + result["errors"].append(error_msg) + + return result + + async def repair_database(self) -> Dict[str, Any]: + """Attempt to repair database issues""" + result = { + "success": False, + "repairs_attempted": [], + "errors": [] + } + + try: + health_check = await self.check_database_health() + + if not health_check["database_exists"]: + # Database doesn't exist - create it + creation_result = await self.initialize_database() + result["repairs_attempted"].append("created_missing_database") + result["success"] = creation_result["success"] + result["errors"].extend(creation_result.get("errors", [])) + return result + + # Database exists but has issues + async with aiosqlite.connect(self.db_path) as db: + # Check and repair missing tables + if not health_check["tables_exist"]: + with open(self.schema_path, 'r') as f: + schema = f.read() + await db.executescript(schema) + await db.commit() + result["repairs_attempted"].append("recreated_schema") + + # Verify repair + final_health = await self.check_database_health() + result["success"] = final_health["schema_loaded"] + + except Exception as e: + error_msg = f"Database repair error: {str(e)}" + logger.error(error_msg) + result["errors"].append(error_msg) + + return result + + +# Global instance +db_initializer = DatabaseInitializer() \ No newline at end of file diff --git a/prev_backend_v4/backend/main.py b/prev_backend_v4/backend/main.py new file mode 100644 index 0000000000000000000000000000000000000000..be2eb2a08c78d927e9508c72664cbfd8723e3cc2 --- /dev/null +++ b/prev_backend_v4/backend/main.py @@ -0,0 +1,451 @@ +from fastapi import FastAPI, HTTPException, Query, Path +from fastapi.responses import JSONResponse +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel +from backend.utils import generate_completions +from backend import config +from backend.db import db +from backend.db_init import db_initializer +from backend.content_generator import content_generator +from backend.db_cache import api_cache +from typing import Union, List, Literal, Optional +from datetime import datetime +import logging +import json + +logging.basicConfig(level=logging.INFO) + +app = FastAPI(title="AI Language Tutor API", version="2.0.0") + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +class MetadataRequest(BaseModel): + query: str + user_id: Optional[int] = None + +class GenerationRequest(BaseModel): + user_id: int + query: Union[str, List[dict]] + native_language: Optional[str] = None + target_language: Optional[str] = None + proficiency: Optional[str] = None + +@app.on_event("startup") +async def startup_event(): + """Initialize database on startup with comprehensive checks""" + logging.info("Starting database initialization...") + + # Initialize database with health checks + init_result = await db_initializer.initialize_database() + + if init_result["success"]: + logging.info(f"Database initialization successful: {init_result['action_taken']}") + + # Log database statistics + health = init_result["health_check"] + if health.get("record_count"): + logging.info(f"Database records: {health['record_count']}") + else: + logging.error(f"Database initialization failed: {init_result['errors']}") + # Try to repair + logging.info("Attempting database repair...") + repair_result = await db_initializer.repair_database() + if repair_result["success"]: + logging.info("Database repair successful") + else: + logging.error(f"Database repair failed: {repair_result['errors']}") + raise RuntimeError("Failed to initialize database") + +@app.get("/") +async def root(): + return {"message": "Welcome to the AI Language Tutor API v2.0!"} + +@app.get("/health") +async def health_check(): + """Comprehensive health check including database status""" + try: + # Check database health + db_health = await db_initializer.check_database_health() + + # Overall health status + is_healthy = ( + db_health["database_exists"] and + db_health["schema_loaded"] and + db_health["can_write"] + ) + + return JSONResponse( + content={ + "status": "healthy" if is_healthy else "unhealthy", + "api_version": "2.0.0", + "database": db_health, + "timestamp": datetime.now().isoformat() + }, + status_code=200 if is_healthy else 503 + ) + except Exception as e: + return JSONResponse( + content={ + "status": "error", + "error": str(e), + "timestamp": datetime.now().isoformat() + }, + status_code=500 + ) + +@app.post("/admin/database/repair") +async def repair_database(): + """Repair database issues (admin endpoint)""" + try: + # repair_result = await db.repair_database() # This method doesn't exist on the Database class + + return JSONResponse( + content={ + "success": repair_result["success"], + "repairs_attempted": repair_result["repairs_attempted"], + "errors": repair_result["errors"], + "timestamp": datetime.now().isoformat() + }, + status_code=200 if repair_result["success"] else 500 + ) + except Exception as e: + return JSONResponse( + content={ + "success": False, + "error": str(e), + "timestamp": datetime.now().isoformat() + }, + status_code=500 + ) + +@app.post("/admin/database/recreate") +async def recreate_database(): + """Recreate database from scratch (admin endpoint)""" + try: + init_result = await db_initializer.initialize_database(force_recreate=True) + + return JSONResponse( + content={ + "success": init_result["success"], + "action_taken": init_result["action_taken"], + "health_check": init_result["health_check"], + "errors": init_result["errors"], + "timestamp": datetime.now().isoformat() + }, + status_code=200 if init_result["success"] else 500 + ) + except Exception as e: + return JSONResponse( + content={ + "success": False, + "error": str(e), + "timestamp": datetime.now().isoformat() + }, + status_code=500 + ) + +# ========== POST ENDPOINTS (Generation) ========== + +@app.post("/extract/metadata") +async def extract_metadata(data: MetadataRequest): + """Extract language learning metadata from user query""" + logging.info(f"Extracting metadata for query: {data.query[:50]}...") + try: + # Generate metadata using AI, with caching + metadata_dict = await api_cache.get_or_set( + category="metadata", + key_text=data.query, + coro=generate_completions.get_completions, + prompt=data.query, + instructions=config.language_metadata_extraction_prompt + ) + + # Check for existing curriculum first before creating new metadata extraction + existing_curriculum = await db.find_existing_curriculum( + query=data.query, + native_language=metadata_dict['native_language'], + target_language=metadata_dict['target_language'], + proficiency=metadata_dict['proficiency'], + user_id=None # Make it user-independent + ) + + if existing_curriculum: + # Found existing curriculum - return it regardless of user + logging.info(f"Found existing curriculum for query '{data.query[:50]}...': {existing_curriculum['id']}") + return JSONResponse( + content={ + "message": "Found existing curriculum for your query.", + "curriculum_id": existing_curriculum['id'], + "status_endpoint": f"/content/status/{existing_curriculum['id']}", + "cached": True + }, + status_code=200 + ) + + # No suitable existing curriculum found, generate new one + logging.info(f"No existing curriculum found, generating new one for user {data.user_id}") + + # Save metadata to database + extraction_id = await db.save_metadata_extraction( + query=data.query, + metadata=metadata_dict, + user_id=data.user_id + ) + + # Process extraction (generate curriculum and start content generation) + processing_result = await content_generator.process_metadata_extraction( + extraction_id=extraction_id, + query=data.query, + metadata=metadata_dict, + user_id=data.user_id, + generate_content=True # Automatically generate all content + ) + + curriculum_id = processing_result['curriculum_id'] + + return JSONResponse( + content={ + "message": "Content generation has been initiated.", + "curriculum_id": curriculum_id, + "status_endpoint": f"/content/status/{curriculum_id}", + "cached": False + }, + status_code=202 + ) + except Exception as e: + logging.error(f"Error extracting metadata: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +# ========== GET ENDPOINTS (Retrieval) ========== + +@app.get("/curriculum/{curriculum_id}/metadata") +async def get_curriculum_metadata(curriculum_id: str = Path(..., description="Curriculum ID")): + """Get metadata for a curriculum""" + curriculum = await db.get_curriculum(curriculum_id) + if not curriculum: + raise HTTPException(status_code=404, detail="Curriculum not found") + + # Get the full metadata extraction record + extraction = await db.get_metadata_extraction(curriculum['metadata_extraction_id']) + if not extraction: + raise HTTPException(status_code=404, detail="Metadata extraction not found") + + # Parse JSON fields + extraction['metadata'] = json.loads(extraction['metadata_json']) + del extraction['metadata_json'] + + return JSONResponse(content=extraction, status_code=200) + +@app.get("/curriculum/{curriculum_id}") +async def get_curriculum(curriculum_id: str = Path(..., description="Curriculum ID")): + """Get curriculum by ID""" + curriculum = await db.get_full_curriculum_details(curriculum_id, include_content=False) + if not curriculum: + raise HTTPException(status_code=404, detail="Curriculum not found") + + # Get content generation status + status = await db.get_curriculum_content_status(curriculum_id) + if status: + curriculum['content_status'] = status + + return JSONResponse(content=curriculum, status_code=200) + + +async def _get_lesson_content_by_type( + curriculum_id: str, + lesson_index: int, + content_type: str +): + """Helper to get specific content type for a lesson""" + content_list = await db.get_learning_content( + curriculum_id=curriculum_id, + lesson_index=lesson_index, + content_type=content_type + ) + if not content_list: + raise HTTPException( + status_code=404, + detail=f"{content_type.capitalize()} content not found for lesson {lesson_index}" + ) + + # Assuming one content item per type per lesson + content = content_list[0] + try: + parsed_content = json.loads(content['content_json']) + except json.JSONDecodeError: + parsed_content = content['content_json'] + + return JSONResponse( + content={ + "curriculum_id": curriculum_id, + "lesson_index": lesson_index, + "content_type": content_type, + "id": content['id'], + "lesson_topic": content['lesson_topic'], + "content": parsed_content, + "created_at": content['created_at'] + }, + status_code=200 + ) + +@app.get("/curriculum/{curriculum_id}/lesson/{lesson_index}/flashcards") +async def get_lesson_flashcards( + curriculum_id: str = Path(..., description="Curriculum ID"), + lesson_index: int = Path(..., ge=0, le=24, description="Lesson index (0-24)") +): + """Get flashcards for a specific lesson""" + return await _get_lesson_content_by_type(curriculum_id, lesson_index, "flashcards") + +@app.get("/curriculum/{curriculum_id}/lesson/{lesson_index}/exercises") +async def get_lesson_exercises( + curriculum_id: str = Path(..., description="Curriculum ID"), + lesson_index: int = Path(..., ge=0, le=24, description="Lesson index (0-24)") +): + """Get exercises for a specific lesson""" + return await _get_lesson_content_by_type(curriculum_id, lesson_index, "exercises") + +@app.get("/curriculum/{curriculum_id}/lesson/{lesson_index}/simulation") +async def get_lesson_simulation( + curriculum_id: str = Path(..., description="Curriculum ID"), + lesson_index: int = Path(..., ge=0, le=24, description="Lesson index (0-24)") +): + """Get simulation for a specific lesson""" + return await _get_lesson_content_by_type(curriculum_id, lesson_index, "simulation") +@app.get("/user/{user_id}/metadata") +async def get_user_metadata_history( + user_id: int = Path(..., description="User ID"), + limit: int = Query(20, ge=1, le=100, description="Maximum number of results") +): + """Get user's metadata extraction history""" + extractions = await db.get_user_metadata_extractions(user_id, limit) + + # Parse JSON fields + for extraction in extractions: + extraction['metadata'] = json.loads(extraction['metadata_json']) + del extraction['metadata_json'] + + return JSONResponse( + content={ + "user_id": user_id, + "extractions": extractions, + "total": len(extractions) + }, + status_code=200 + ) + +@app.get("/user/{user_id}/curricula") +async def get_user_curricula( + user_id: int = Path(..., description="User ID"), + limit: int = Query(20, ge=1, le=100, description="Maximum number of results") +): + """Get user's curricula""" + curricula = await db.get_user_curricula(user_id, limit) + + # Parse JSON fields and get content status + for curriculum in curricula: + curriculum['curriculum'] = json.loads(curriculum['curriculum_json']) + del curriculum['curriculum_json'] + + # Get content status + status = await db.get_curriculum_content_status(curriculum['id']) + if status: + curriculum['content_status'] = status + + return JSONResponse( + content={ + "user_id": user_id, + "curricula": curricula, + "total": len(curricula) + }, + status_code=200 + ) + +@app.get("/user/{user_id}/journeys") +async def get_user_learning_journeys( + user_id: int = Path(..., description="User ID"), + limit: int = Query(20, ge=1, le=100, description="Maximum number of results") +): + """Get user's complete learning journeys (metadata + curriculum info)""" + journeys = await db.get_user_learning_journeys(user_id, limit) + + return JSONResponse( + content={ + "user_id": user_id, + "journeys": journeys, + "total": len(journeys) + }, + status_code=200 + ) + +@app.get("/search/curricula") +async def search_curricula( + native_language: str = Query(..., description="Native language"), + target_language: str = Query(..., description="Target language"), + proficiency: Optional[str] = Query(None, description="Proficiency level"), + limit: int = Query(10, ge=1, le=50, description="Maximum number of results") +): + """Search for existing curricula by language combination""" + curricula = await db.search_curricula_by_languages( + native_language=native_language, + target_language=target_language, + proficiency=proficiency, + limit=limit + ) + + # Parse JSON fields + for curriculum in curricula: + curriculum['curriculum'] = json.loads(curriculum['curriculum_json']) + del curriculum['curriculum_json'] + + return JSONResponse( + content={ + "search_params": { + "native_language": native_language, + "target_language": target_language, + "proficiency": proficiency + }, + "curricula": curricula, + "total": len(curricula) + }, + status_code=200 + ) + +@app.get("/content/status/{curriculum_id}") +async def get_content_generation_status( + curriculum_id: str = Path(..., description="Curriculum ID") +): + """Check content generation status for a curriculum""" + status = await db.get_curriculum_content_status(curriculum_id) + if not status: + raise HTTPException(status_code=404, detail="Curriculum not found") + + # Calculate completion percentage + total_lessons = 25 + total_content_types = 3 # flashcards, exercises, simulation + total_expected = total_lessons * total_content_types + + total_generated = ( + status['lessons_with_flashcards'] + + status['lessons_with_exercises'] + + status['lessons_with_simulations'] + ) + + completion_percentage = (total_generated / total_expected) * 100 if total_expected > 0 else 0 + + return JSONResponse( + content={ + "curriculum_id": curriculum_id, + "status": status, + "completion_percentage": round(completion_percentage, 2), + "is_complete": completion_percentage >= 100 + }, + status_code=200 + ) + diff --git a/prev_backend_v4/backend/schema.sql b/prev_backend_v4/backend/schema.sql new file mode 100644 index 0000000000000000000000000000000000000000..9250ae1aacc5b16a808974b60f01e2e37166d16d --- /dev/null +++ b/prev_backend_v4/backend/schema.sql @@ -0,0 +1,98 @@ +-- AI Language Tutor Database Schema + +-- Table for storing extracted metadata from user queries +CREATE TABLE IF NOT EXISTS metadata_extractions ( + id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))), + user_id INTEGER, + query TEXT NOT NULL, + native_language TEXT, + target_language TEXT, + proficiency TEXT CHECK(proficiency IN ('beginner', 'intermediate', 'advanced')), + title TEXT, + description TEXT, + metadata_json TEXT NOT NULL, -- Full JSON response + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- Index for user queries +CREATE INDEX IF NOT EXISTS idx_metadata_user_id ON metadata_extractions(user_id); +CREATE INDEX IF NOT EXISTS idx_metadata_languages ON metadata_extractions(native_language, target_language); + +-- Table for storing generated curricula +CREATE TABLE IF NOT EXISTS curricula ( + id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))), + metadata_extraction_id TEXT NOT NULL, + user_id INTEGER, + lesson_topic TEXT, + curriculum_json TEXT NOT NULL, -- Full curriculum JSON with 25 lessons + is_content_generated INTEGER DEFAULT 0, -- Boolean: has all content been generated? + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (metadata_extraction_id) REFERENCES metadata_extractions(id) ON DELETE CASCADE +); + +-- Index for curriculum lookups +CREATE INDEX IF NOT EXISTS idx_curricula_metadata_id ON curricula(metadata_extraction_id); +CREATE INDEX IF NOT EXISTS idx_curricula_user_id ON curricula(user_id); + +-- Table for storing all types of learning content +CREATE TABLE IF NOT EXISTS learning_content ( + id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))), + curriculum_id TEXT NOT NULL, + content_type TEXT NOT NULL CHECK(content_type IN ('flashcards', 'exercises', 'simulation')), + lesson_index INTEGER NOT NULL CHECK(lesson_index >= 0 AND lesson_index < 25), + lesson_topic TEXT, + content_json TEXT NOT NULL, -- The actual generated content + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (curriculum_id) REFERENCES curricula(id) ON DELETE CASCADE +); + +-- Index for content lookups +CREATE INDEX IF NOT EXISTS idx_content_curriculum_id ON learning_content(curriculum_id); +CREATE INDEX IF NOT EXISTS idx_content_type ON learning_content(content_type); +CREATE INDEX IF NOT EXISTS idx_content_lesson ON learning_content(curriculum_id, lesson_index); + +-- View for easy access to user's learning journeys +CREATE VIEW IF NOT EXISTS user_learning_journeys AS +SELECT + m.id as metadata_id, + m.user_id, + m.query, + m.native_language, + m.target_language, + m.proficiency, + m.title, + m.description, + c.id as curriculum_id, + c.lesson_topic, + c.is_content_generated, + m.created_at +FROM metadata_extractions m +LEFT JOIN curricula c ON m.id = c.metadata_extraction_id +ORDER BY m.created_at DESC; + +-- View for content availability per curriculum +CREATE VIEW IF NOT EXISTS curriculum_content_status AS +SELECT + c.id as curriculum_id, + c.user_id, + c.lesson_topic, + COUNT(DISTINCT lc.lesson_index) as lessons_with_content, + COUNT(DISTINCT CASE WHEN lc.content_type = 'flashcards' THEN lc.lesson_index END) as lessons_with_flashcards, + COUNT(DISTINCT CASE WHEN lc.content_type = 'exercises' THEN lc.lesson_index END) as lessons_with_exercises, + COUNT(DISTINCT CASE WHEN lc.content_type = 'simulation' THEN lc.lesson_index END) as lessons_with_simulations, + c.created_at +FROM curricula c +LEFT JOIN learning_content lc ON c.id = lc.curriculum_id +GROUP BY c.id; + +-- Generic cache for API responses to reduce redundant AI calls +CREATE TABLE IF NOT EXISTS api_cache ( + cache_key TEXT NOT NULL, + category TEXT NOT NULL, + content_json TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (cache_key, category) +); + +-- Index for faster cache lookups +CREATE INDEX IF NOT EXISTS idx_api_cache_key_category ON api_cache(cache_key, category); \ No newline at end of file diff --git a/prev_backend_v4/backend/utils/__pycache__/generate_completions.cpython-311.pyc b/prev_backend_v4/backend/utils/__pycache__/generate_completions.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..621b7f132effe45ecb6f44a6e6f1da945e63299a Binary files /dev/null and b/prev_backend_v4/backend/utils/__pycache__/generate_completions.cpython-311.pyc differ diff --git a/prev_backend_v2/backend/utils/__pycache__/generate_completions.cpython-312.pyc b/prev_backend_v4/backend/utils/__pycache__/generate_completions.cpython-312.pyc similarity index 70% rename from prev_backend_v2/backend/utils/__pycache__/generate_completions.cpython-312.pyc rename to prev_backend_v4/backend/utils/__pycache__/generate_completions.cpython-312.pyc index d400fc3777cc14b72cba25d8efb1f89520468246..d481a698f2f43375e21ff4cae125cdcd4a0c8084 100644 Binary files a/prev_backend_v2/backend/utils/__pycache__/generate_completions.cpython-312.pyc and b/prev_backend_v4/backend/utils/__pycache__/generate_completions.cpython-312.pyc differ diff --git a/prev_backend_v4/backend/utils/__pycache__/handlers.cpython-312.pyc b/prev_backend_v4/backend/utils/__pycache__/handlers.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a01633ec07e5eace868c98e73cbc83519ad751a Binary files /dev/null and b/prev_backend_v4/backend/utils/__pycache__/handlers.cpython-312.pyc differ diff --git a/prev_backend_v0/backend/utils/generate_completions.py b/prev_backend_v4/backend/utils/generate_completions.py similarity index 74% rename from prev_backend_v0/backend/utils/generate_completions.py rename to prev_backend_v4/backend/utils/generate_completions.py index a58e04c1f7a04eaf5f7fad31fa70380a9d3aeb43..8628d2a7b54c019f4ed305bacc8063e50d42f36d 100644 --- a/prev_backend_v0/backend/utils/generate_completions.py +++ b/prev_backend_v4/backend/utils/generate_completions.py @@ -6,6 +6,7 @@ from typing import Union, List, Dict, Literal from dotenv import load_dotenv import os from pydantic import BaseModel + load_dotenv() # Initialize the async client @@ -40,32 +41,6 @@ def process_input(data: Union[str, List[Dict[str, str]]]) -> Union[str, List[Dic else: raise TypeError("Input must be a string or a list of dictionaries with a 'content' field") - -# async def get_completions( -# prompt: Union[str, List[Dict[str, str]]], -# instructions: str -# ) -> str: -# processed_prompt = process_input(prompt) # Ensures the input format is correct - -# if isinstance(processed_prompt, str): -# messages = [ -# {"role": "system", "content": instructions}, -# {"role": "user", "content": processed_prompt} -# ] -# elif isinstance(processed_prompt, list): -# messages = [{"role": "system", "content": instructions}] + processed_prompt -# else: -# raise TypeError("Unexpected processed input type.") - -# response = await client.chat.completions.create( -# model=os.getenv("MODEL"), -# messages=messages, -# response_format={"type": "json_object"} -# ) - -# output: str = response.choices[0].message.content -# return output - async def get_completions( prompt: Union[str, List[Dict[str, str]]], instructions: str @@ -98,7 +73,7 @@ async def get_completions( raise TypeError("Unexpected processed input type.") response = await client.chat.completions.create( - model=os.getenv("MODEL"), + model=os.getenv("MODEL", "gemini-2.0-flash"), messages=messages, response_format={"type": "json_object"} ) diff --git a/prev_backend_v4/backend/utils/handlers.py b/prev_backend_v4/backend/utils/handlers.py new file mode 100644 index 0000000000000000000000000000000000000000..5ae2d9df576f7156bd2582910f49b824449af79f --- /dev/null +++ b/prev_backend_v4/backend/utils/handlers.py @@ -0,0 +1,76 @@ +from fastapi import HTTPException +from fastapi.responses import JSONResponse +from typing import Callable, Dict, Any +from backend import config +from backend.content_generator import content_generator +from backend.utils import generate_completions + +async def handle_generation_request( + data: Any, + mode: str, + instructions_template: str +) -> JSONResponse: + """ + Shared handler for all generation endpoints (curriculum, flashcards, exercises, simulation). + + Args: + data: The GenerationRequest object containing query and metadata + mode: The type of generation (curriculum, flashcards, exercises, simulation) + instructions_template: The template string from config to use + + Returns: + JSONResponse with the generated content + + Raises: + HTTPException: If required metadata is missing or other errors occur + """ + # Validate required metadata + if not (data.native_language and data.target_language and data.proficiency): + raise HTTPException( + status_code=400, + detail="native_language, target_language, and proficiency are required. Please extract metadata first." + ) + + # Format instructions with metadata + instructions = ( + instructions_template + .replace("{native_language}", data.native_language) + .replace("{target_language}", data.target_language) + .replace("{proficiency}", data.proficiency) + ) + + # Generate new content + response = await generate_completions.get_completions( + data.query, + instructions + ) + + # Save generated content to database + content_id = await content_generator.save_content( + query=str(data.query), + content=response, + content_type=mode, + user_id=data.user_id, + native_language=data.native_language, + target_language=data.target_language, + proficiency=data.proficiency + ) + + return JSONResponse( + content={ + "data": response, + "type": mode, + "status": "success", + "content_id": content_id, + "saved": True + }, + status_code=200 + ) + +# Mapping of modes to their instruction templates +INSTRUCTION_TEMPLATES: Dict[str, str] = { + "curriculum": config.curriculum_instructions, + "flashcards": config.flashcard_mode_instructions, + "exercises": config.exercise_mode_instructions, + "simulation": config.simulation_mode_instructions +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index eb962244a0fc7f4a327b17143641e9ed126caed7..143bf4846c4af46a89e218a09a0dc697a8111023 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,20 +1,7 @@ -aioredis -redis==5.2.1 -python-docx -fastapi==0.115.11 -uvicorn==0.34.0 -pydantic==2.10.6 -aiocache -openai==1.67.0 -httpx +fastapi +uvicorn[standard] python-dotenv -psycopg2-binary==2.9.9 -python-jose[cryptography] -passlib[bcrypt] -python-multipart -python-jose[cryptography] -passlib[bcrypt] -python-multipart -pytest==7.4.4 -pytest-asyncio==0.23.3 -httpx==0.26.0 # Required for TestClient +openai +httpx +pydantic +aiosqlite \ No newline at end of file