Spaces:
Sleeping
Sleeping
improved backend
Browse files- backend/__pycache__/config.cpython-311.pyc +0 -0
- backend/__pycache__/config.cpython-312.pyc +0 -0
- backend/__pycache__/content_generator.cpython-311.pyc +0 -0
- backend/__pycache__/content_generator.cpython-312.pyc +0 -0
- backend/__pycache__/db.cpython-311.pyc +0 -0
- backend/__pycache__/db.cpython-312.pyc +0 -0
- backend/__pycache__/db_cache.cpython-311.pyc +0 -0
- backend/__pycache__/db_cache.cpython-312.pyc +0 -0
- backend/__pycache__/db_init.cpython-311.pyc +0 -0
- backend/__pycache__/db_init.cpython-312.pyc +0 -0
- backend/__pycache__/main.cpython-311.pyc +0 -0
- backend/__pycache__/main.cpython-312.pyc +0 -0
- backend/content_generator.py +110 -79
- backend/db.py +107 -36
- backend/db_cache.py +2 -2
- backend/main.py +54 -20
- backend/schema.sql +13 -1
- backend/utils/__pycache__/generate_completions.cpython-311.pyc +0 -0
- backend/utils/__pycache__/generate_completions.cpython-312.pyc +0 -0
backend/__pycache__/config.cpython-311.pyc
CHANGED
Binary files a/backend/__pycache__/config.cpython-311.pyc and b/backend/__pycache__/config.cpython-311.pyc differ
|
|
backend/__pycache__/config.cpython-312.pyc
CHANGED
Binary files a/backend/__pycache__/config.cpython-312.pyc and b/backend/__pycache__/config.cpython-312.pyc differ
|
|
backend/__pycache__/content_generator.cpython-311.pyc
CHANGED
Binary files a/backend/__pycache__/content_generator.cpython-311.pyc and b/backend/__pycache__/content_generator.cpython-311.pyc differ
|
|
backend/__pycache__/content_generator.cpython-312.pyc
CHANGED
Binary files a/backend/__pycache__/content_generator.cpython-312.pyc and b/backend/__pycache__/content_generator.cpython-312.pyc differ
|
|
backend/__pycache__/db.cpython-311.pyc
CHANGED
Binary files a/backend/__pycache__/db.cpython-311.pyc and b/backend/__pycache__/db.cpython-311.pyc differ
|
|
backend/__pycache__/db.cpython-312.pyc
CHANGED
Binary files a/backend/__pycache__/db.cpython-312.pyc and b/backend/__pycache__/db.cpython-312.pyc differ
|
|
backend/__pycache__/db_cache.cpython-311.pyc
CHANGED
Binary files a/backend/__pycache__/db_cache.cpython-311.pyc and b/backend/__pycache__/db_cache.cpython-311.pyc differ
|
|
backend/__pycache__/db_cache.cpython-312.pyc
CHANGED
Binary files a/backend/__pycache__/db_cache.cpython-312.pyc and b/backend/__pycache__/db_cache.cpython-312.pyc differ
|
|
backend/__pycache__/db_init.cpython-311.pyc
CHANGED
Binary files a/backend/__pycache__/db_init.cpython-311.pyc and b/backend/__pycache__/db_init.cpython-311.pyc differ
|
|
backend/__pycache__/db_init.cpython-312.pyc
CHANGED
Binary files a/backend/__pycache__/db_init.cpython-312.pyc and b/backend/__pycache__/db_init.cpython-312.pyc differ
|
|
backend/__pycache__/main.cpython-311.pyc
CHANGED
Binary files a/backend/__pycache__/main.cpython-311.pyc and b/backend/__pycache__/main.cpython-311.pyc differ
|
|
backend/__pycache__/main.cpython-312.pyc
CHANGED
Binary files a/backend/__pycache__/main.cpython-312.pyc and b/backend/__pycache__/main.cpython-312.pyc differ
|
|
backend/content_generator.py
CHANGED
@@ -163,7 +163,7 @@ class ContentGenerator:
|
|
163 |
except Exception as e:
|
164 |
logger.error(f"Failed to generate simulation for lesson {lesson_index}: {e}")
|
165 |
|
166 |
-
return
|
167 |
|
168 |
async def generate_all_content_for_curriculum(
|
169 |
self,
|
@@ -171,56 +171,85 @@ class ContentGenerator:
|
|
171 |
max_concurrent_lessons: int = 3
|
172 |
):
|
173 |
"""Generate all learning content for a curriculum"""
|
174 |
-
# Get curriculum details
|
175 |
-
curriculum_data = await db.get_curriculum(curriculum_id)
|
176 |
-
if not curriculum_data:
|
177 |
-
logger.error(f"Curriculum not found: {curriculum_id}")
|
178 |
-
return
|
179 |
-
|
180 |
-
# Parse curriculum JSON
|
181 |
try:
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
# Prepare metadata
|
189 |
-
metadata = {
|
190 |
-
'native_language': curriculum_data['native_language'],
|
191 |
-
'target_language': curriculum_data['target_language'],
|
192 |
-
'proficiency': curriculum_data['proficiency']
|
193 |
-
}
|
194 |
-
|
195 |
-
logger.info(f"Starting content generation for {len(lessons)} lessons")
|
196 |
-
|
197 |
-
# Process lessons in batches to avoid overwhelming the API
|
198 |
-
for i in range(0, len(lessons), max_concurrent_lessons):
|
199 |
-
batch = lessons[i:i + max_concurrent_lessons]
|
200 |
-
batch_indices = list(range(i, min(i + max_concurrent_lessons, len(lessons))))
|
201 |
|
202 |
-
#
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
curriculum_id=curriculum_id,
|
206 |
-
|
207 |
-
|
208 |
-
metadata=metadata
|
209 |
)
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
-
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
async def process_metadata_extraction(
|
226 |
self,
|
@@ -228,44 +257,46 @@ class ContentGenerator:
|
|
228 |
query: str,
|
229 |
metadata: Dict[str, Any],
|
230 |
user_id: Optional[int] = None,
|
231 |
-
generate_content: bool = True
|
|
|
232 |
) -> Dict[str, Any]:
|
233 |
"""Process a metadata extraction by checking for existing curriculum or generating new one"""
|
234 |
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
if existing_curriculum:
|
245 |
-
# If we found an exact match for this user, return it
|
246 |
-
if existing_curriculum.get('user_id') == user_id:
|
247 |
-
logger.info(f"Found existing curriculum for user {user_id}: {existing_curriculum['id']}")
|
248 |
-
return {
|
249 |
-
'curriculum_id': existing_curriculum['id'],
|
250 |
-
'content_generation_started': False,
|
251 |
-
'cached': True,
|
252 |
-
'cache_type': 'user_exact_match'
|
253 |
-
}
|
254 |
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
# No suitable existing curriculum found, generate new one
|
271 |
logger.info(f"No existing curriculum found, generating new one for user {user_id}")
|
|
|
163 |
except Exception as e:
|
164 |
logger.error(f"Failed to generate simulation for lesson {lesson_index}: {e}")
|
165 |
|
166 |
+
return content_ids
|
167 |
|
168 |
async def generate_all_content_for_curriculum(
|
169 |
self,
|
|
|
171 |
max_concurrent_lessons: int = 3
|
172 |
):
|
173 |
"""Generate all learning content for a curriculum"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
try:
|
175 |
+
# Update status to generating
|
176 |
+
await db.update_content_generation_status(
|
177 |
+
curriculum_id=curriculum_id,
|
178 |
+
status='generating'
|
179 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
+
# Get curriculum details
|
182 |
+
curriculum_data = await db.get_curriculum(curriculum_id)
|
183 |
+
if not curriculum_data:
|
184 |
+
logger.error(f"Curriculum not found: {curriculum_id}")
|
185 |
+
await db.update_content_generation_status(
|
186 |
+
curriculum_id=curriculum_id,
|
187 |
+
status='failed',
|
188 |
+
error_message="Curriculum not found"
|
189 |
+
)
|
190 |
+
return
|
191 |
+
|
192 |
+
# Parse curriculum JSON
|
193 |
+
try:
|
194 |
+
curriculum = json.loads(curriculum_data['curriculum_json'])
|
195 |
+
lessons = curriculum.get('sub_topics', [])
|
196 |
+
except json.JSONDecodeError:
|
197 |
+
logger.error(f"Failed to parse curriculum JSON for {curriculum_id}")
|
198 |
+
await db.update_content_generation_status(
|
199 |
curriculum_id=curriculum_id,
|
200 |
+
status='failed',
|
201 |
+
error_message="Failed to parse curriculum JSON"
|
|
|
202 |
)
|
203 |
+
return
|
204 |
+
|
205 |
+
# Prepare metadata
|
206 |
+
metadata = {
|
207 |
+
'native_language': curriculum_data['native_language'],
|
208 |
+
'target_language': curriculum_data['target_language'],
|
209 |
+
'proficiency': curriculum_data['proficiency']
|
210 |
+
}
|
211 |
|
212 |
+
logger.info(f"Starting content generation for {len(lessons)} lessons")
|
213 |
|
214 |
+
# Process lessons in batches to avoid overwhelming the API
|
215 |
+
for i in range(0, len(lessons), max_concurrent_lessons):
|
216 |
+
batch = lessons[i:i + max_concurrent_lessons]
|
217 |
+
batch_indices = list(range(i, min(i + max_concurrent_lessons, len(lessons))))
|
218 |
+
|
219 |
+
# Generate content for batch concurrently
|
220 |
+
tasks = [
|
221 |
+
self.generate_content_for_lesson(
|
222 |
+
curriculum_id=curriculum_id,
|
223 |
+
lesson_index=idx,
|
224 |
+
lesson=lesson,
|
225 |
+
metadata=metadata
|
226 |
+
)
|
227 |
+
for idx, lesson in zip(batch_indices, batch)
|
228 |
+
]
|
229 |
+
|
230 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
231 |
+
|
232 |
+
for idx, result in zip(batch_indices, results):
|
233 |
+
if isinstance(result, Exception):
|
234 |
+
logger.error(f"Failed to generate content for lesson {idx}: {result}")
|
235 |
+
else:
|
236 |
+
logger.info(f"Generated content for lesson {idx}: {result}")
|
237 |
+
|
238 |
+
# Mark curriculum as content generated
|
239 |
+
await db.mark_curriculum_content_generated(curriculum_id)
|
240 |
+
await db.update_content_generation_status(
|
241 |
+
curriculum_id=curriculum_id,
|
242 |
+
status='completed'
|
243 |
+
)
|
244 |
+
logger.info(f"Completed content generation for curriculum {curriculum_id}")
|
245 |
+
|
246 |
+
except Exception as e:
|
247 |
+
logger.error(f"Failed to generate content for curriculum {curriculum_id}: {e}")
|
248 |
+
await db.update_content_generation_status(
|
249 |
+
curriculum_id=curriculum_id,
|
250 |
+
status='failed',
|
251 |
+
error_message=str(e)
|
252 |
+
)
|
253 |
|
254 |
async def process_metadata_extraction(
|
255 |
self,
|
|
|
257 |
query: str,
|
258 |
metadata: Dict[str, Any],
|
259 |
user_id: Optional[int] = None,
|
260 |
+
generate_content: bool = True,
|
261 |
+
skip_curriculum_lookup: bool = False
|
262 |
) -> Dict[str, Any]:
|
263 |
"""Process a metadata extraction by checking for existing curriculum or generating new one"""
|
264 |
|
265 |
+
if not skip_curriculum_lookup:
|
266 |
+
# Check for existing curriculum first
|
267 |
+
existing_curriculum = await db.find_existing_curriculum(
|
268 |
+
query=query,
|
269 |
+
native_language=metadata['native_language'],
|
270 |
+
target_language=metadata['target_language'],
|
271 |
+
proficiency=metadata['proficiency'],
|
272 |
+
user_id=user_id
|
273 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
+
if existing_curriculum:
|
276 |
+
# If we found an exact match for this user, return it
|
277 |
+
if existing_curriculum.get('user_id') == user_id:
|
278 |
+
logger.info(f"Found existing curriculum for user {user_id}: {existing_curriculum['id']}")
|
279 |
+
return {
|
280 |
+
'curriculum_id': existing_curriculum['id'],
|
281 |
+
'content_generation_started': False,
|
282 |
+
'cached': True,
|
283 |
+
'cache_type': 'user_exact_match'
|
284 |
+
}
|
285 |
+
|
286 |
+
# If we found a similar curriculum from another user, copy it
|
287 |
+
elif existing_curriculum.get('is_content_generated') == 1:
|
288 |
+
logger.info(f"Copying existing curriculum {existing_curriculum['id']} for user {user_id}")
|
289 |
+
curriculum_id = await db.copy_curriculum_for_user(
|
290 |
+
source_curriculum_id=existing_curriculum['id'],
|
291 |
+
metadata_extraction_id=extraction_id,
|
292 |
+
user_id=user_id
|
293 |
+
)
|
294 |
+
return {
|
295 |
+
'curriculum_id': curriculum_id,
|
296 |
+
'content_generation_started': False,
|
297 |
+
'cached': True,
|
298 |
+
'cache_type': 'copied_from_similar'
|
299 |
+
}
|
300 |
|
301 |
# No suitable existing curriculum found, generate new one
|
302 |
logger.info(f"No existing curriculum found, generating new one for user {user_id}")
|
backend/db.py
CHANGED
@@ -37,12 +37,16 @@ class Database:
|
|
37 |
proficiency: str,
|
38 |
user_id: Optional[int] = None
|
39 |
) -> Optional[Dict[str, Any]]:
|
40 |
-
"""Find existing curriculum for
|
|
|
|
|
41 |
async with aiosqlite.connect(self.db_path) as db:
|
42 |
db.row_factory = aiosqlite.Row
|
43 |
|
|
|
44 |
if user_id is not None:
|
45 |
-
# User-specific search:
|
|
|
46 |
async with db.execute("""
|
47 |
SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
|
48 |
FROM curricula c
|
@@ -54,35 +58,27 @@ class Database:
|
|
54 |
""", (user_id, query, native_language, target_language, proficiency)) as cursor:
|
55 |
row = await cursor.fetchone()
|
56 |
if row:
|
|
|
57 |
return dict(row)
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
|
76 |
-
FROM curricula c
|
77 |
-
JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
|
78 |
-
WHERE m.query = ? AND m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
|
79 |
-
ORDER BY c.created_at DESC
|
80 |
-
LIMIT 1
|
81 |
-
""", (query, native_language, target_language, proficiency)) as cursor:
|
82 |
-
row = await cursor.fetchone()
|
83 |
-
if row:
|
84 |
-
return dict(row)
|
85 |
|
|
|
86 |
return None
|
87 |
|
88 |
async def save_metadata_extraction(
|
@@ -93,7 +89,17 @@ class Database:
|
|
93 |
) -> str:
|
94 |
"""Save extracted metadata and return extraction ID"""
|
95 |
extraction_id = str(uuid.uuid4())
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
async with aiosqlite.connect(self.db_path) as db:
|
98 |
await db.execute("""
|
99 |
INSERT INTO metadata_extractions
|
@@ -127,8 +133,8 @@ class Database:
|
|
127 |
async with aiosqlite.connect(self.db_path) as db:
|
128 |
await db.execute("""
|
129 |
INSERT INTO curricula
|
130 |
-
(id, metadata_extraction_id, user_id, lesson_topic, curriculum_json)
|
131 |
-
VALUES (?, ?, ?, ?,
|
132 |
""", (
|
133 |
curriculum_id,
|
134 |
metadata_extraction_id,
|
@@ -164,8 +170,8 @@ class Database:
|
|
164 |
# Create new curriculum
|
165 |
await db.execute("""
|
166 |
INSERT INTO curricula
|
167 |
-
(id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, is_content_generated)
|
168 |
-
VALUES (?, ?, ?, ?, ?, 0)
|
169 |
""", (
|
170 |
new_curriculum_id,
|
171 |
metadata_extraction_id,
|
@@ -192,7 +198,9 @@ class Database:
|
|
192 |
# Mark as content generated
|
193 |
await db.execute("""
|
194 |
UPDATE curricula
|
195 |
-
SET is_content_generated = 1
|
|
|
|
|
196 |
WHERE id = ?
|
197 |
""", (new_curriculum_id,))
|
198 |
|
@@ -235,11 +243,74 @@ class Database:
|
|
235 |
async with aiosqlite.connect(self.db_path) as db:
|
236 |
await db.execute("""
|
237 |
UPDATE curricula
|
238 |
-
SET is_content_generated = 1
|
|
|
|
|
239 |
WHERE id = ?
|
240 |
""", (curriculum_id,))
|
241 |
await db.commit()
|
242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
async def get_metadata_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
|
244 |
"""Get metadata extraction by ID"""
|
245 |
async with aiosqlite.connect(self.db_path) as db:
|
|
|
37 |
proficiency: str,
|
38 |
user_id: Optional[int] = None
|
39 |
) -> Optional[Dict[str, Any]]:
|
40 |
+
"""Find existing curriculum for exact query and metadata match"""
|
41 |
+
logger.info(f"Looking for curriculum: query='{query[:50]}...', native={native_language}, target={target_language}, proficiency={proficiency}, user_id={user_id}")
|
42 |
+
|
43 |
async with aiosqlite.connect(self.db_path) as db:
|
44 |
db.row_factory = aiosqlite.Row
|
45 |
|
46 |
+
# Always look for exact query matches first, prioritizing user-specific matches
|
47 |
if user_id is not None:
|
48 |
+
# User-specific search: Find exact query match for the user
|
49 |
+
logger.info(f"Searching for exact match for user {user_id}")
|
50 |
async with db.execute("""
|
51 |
SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
|
52 |
FROM curricula c
|
|
|
58 |
""", (user_id, query, native_language, target_language, proficiency)) as cursor:
|
59 |
row = await cursor.fetchone()
|
60 |
if row:
|
61 |
+
logger.info(f"Found exact user match: {dict(row)['id']}")
|
62 |
return dict(row)
|
63 |
+
|
64 |
+
# Look for exact query match from any user (only if the query is exactly the same)
|
65 |
+
logger.info("Searching for exact query match (any user)")
|
66 |
+
async with db.execute("""
|
67 |
+
SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
|
68 |
+
FROM curricula c
|
69 |
+
JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
|
70 |
+
WHERE m.query = ? AND m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
|
71 |
+
ORDER BY c.created_at DESC
|
72 |
+
LIMIT 1
|
73 |
+
""", (query, native_language, target_language, proficiency)) as cursor:
|
74 |
+
row = await cursor.fetchone()
|
75 |
+
if row:
|
76 |
+
logger.info(f"Found exact query match: {dict(row)['id']}")
|
77 |
+
return dict(row)
|
78 |
+
else:
|
79 |
+
logger.info("No exact query match found")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
+
logger.info("No existing curriculum found")
|
82 |
return None
|
83 |
|
84 |
async def save_metadata_extraction(
|
|
|
89 |
) -> str:
|
90 |
"""Save extracted metadata and return extraction ID"""
|
91 |
extraction_id = str(uuid.uuid4())
|
92 |
+
|
93 |
+
# Validate proficiency before inserting into the database
|
94 |
+
allowed_proficiencies = {"beginner", "intermediate", "advanced"}
|
95 |
+
proficiency = metadata.get('proficiency')
|
96 |
+
if proficiency not in allowed_proficiencies:
|
97 |
+
logger.warning(
|
98 |
+
f"Unknown proficiency '{proficiency}' received; defaulting to 'beginner'."
|
99 |
+
)
|
100 |
+
proficiency = "beginner"
|
101 |
+
metadata["proficiency"] = "beginner"
|
102 |
+
|
103 |
async with aiosqlite.connect(self.db_path) as db:
|
104 |
await db.execute("""
|
105 |
INSERT INTO metadata_extractions
|
|
|
133 |
async with aiosqlite.connect(self.db_path) as db:
|
134 |
await db.execute("""
|
135 |
INSERT INTO curricula
|
136 |
+
(id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, content_generation_status)
|
137 |
+
VALUES (?, ?, ?, ?, ?, 'pending')
|
138 |
""", (
|
139 |
curriculum_id,
|
140 |
metadata_extraction_id,
|
|
|
170 |
# Create new curriculum
|
171 |
await db.execute("""
|
172 |
INSERT INTO curricula
|
173 |
+
(id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, is_content_generated, content_generation_status)
|
174 |
+
VALUES (?, ?, ?, ?, ?, 0, 'pending')
|
175 |
""", (
|
176 |
new_curriculum_id,
|
177 |
metadata_extraction_id,
|
|
|
198 |
# Mark as content generated
|
199 |
await db.execute("""
|
200 |
UPDATE curricula
|
201 |
+
SET is_content_generated = 1,
|
202 |
+
content_generation_status = 'completed',
|
203 |
+
content_generation_completed_at = CURRENT_TIMESTAMP
|
204 |
WHERE id = ?
|
205 |
""", (new_curriculum_id,))
|
206 |
|
|
|
243 |
async with aiosqlite.connect(self.db_path) as db:
|
244 |
await db.execute("""
|
245 |
UPDATE curricula
|
246 |
+
SET is_content_generated = 1,
|
247 |
+
content_generation_status = 'completed',
|
248 |
+
content_generation_completed_at = CURRENT_TIMESTAMP
|
249 |
WHERE id = ?
|
250 |
""", (curriculum_id,))
|
251 |
await db.commit()
|
252 |
|
253 |
+
async def update_content_generation_status(
|
254 |
+
self,
|
255 |
+
curriculum_id: str,
|
256 |
+
status: str,
|
257 |
+
error_message: Optional[str] = None
|
258 |
+
):
|
259 |
+
"""Update content generation status for a curriculum"""
|
260 |
+
async with aiosqlite.connect(self.db_path) as db:
|
261 |
+
if status == 'generating':
|
262 |
+
await db.execute("""
|
263 |
+
UPDATE curricula
|
264 |
+
SET content_generation_status = ?,
|
265 |
+
content_generation_started_at = CURRENT_TIMESTAMP,
|
266 |
+
content_generation_error = NULL
|
267 |
+
WHERE id = ?
|
268 |
+
""", (status, curriculum_id))
|
269 |
+
elif status == 'completed':
|
270 |
+
await db.execute("""
|
271 |
+
UPDATE curricula
|
272 |
+
SET content_generation_status = ?,
|
273 |
+
content_generation_completed_at = CURRENT_TIMESTAMP,
|
274 |
+
content_generation_error = NULL,
|
275 |
+
is_content_generated = 1
|
276 |
+
WHERE id = ?
|
277 |
+
""", (status, curriculum_id))
|
278 |
+
elif status == 'failed':
|
279 |
+
await db.execute("""
|
280 |
+
UPDATE curricula
|
281 |
+
SET content_generation_status = ?,
|
282 |
+
content_generation_error = ?
|
283 |
+
WHERE id = ?
|
284 |
+
""", (status, error_message, curriculum_id))
|
285 |
+
else:
|
286 |
+
await db.execute("""
|
287 |
+
UPDATE curricula
|
288 |
+
SET content_generation_status = ?,
|
289 |
+
content_generation_error = ?
|
290 |
+
WHERE id = ?
|
291 |
+
""", (status, error_message, curriculum_id))
|
292 |
+
await db.commit()
|
293 |
+
|
294 |
+
async def get_content_generation_status(self, curriculum_id: str) -> Optional[Dict[str, Any]]:
|
295 |
+
"""Get content generation status for a curriculum"""
|
296 |
+
async with aiosqlite.connect(self.db_path) as db:
|
297 |
+
db.row_factory = aiosqlite.Row
|
298 |
+
async with db.execute("""
|
299 |
+
SELECT
|
300 |
+
id,
|
301 |
+
content_generation_status,
|
302 |
+
content_generation_error,
|
303 |
+
content_generation_started_at,
|
304 |
+
content_generation_completed_at,
|
305 |
+
is_content_generated
|
306 |
+
FROM curricula
|
307 |
+
WHERE id = ?
|
308 |
+
""", (curriculum_id,)) as cursor:
|
309 |
+
row = await cursor.fetchone()
|
310 |
+
if row:
|
311 |
+
return dict(row)
|
312 |
+
return None
|
313 |
+
|
314 |
async def get_metadata_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
|
315 |
"""Get metadata extraction by ID"""
|
316 |
async with aiosqlite.connect(self.db_path) as db:
|
backend/db_cache.py
CHANGED
@@ -86,10 +86,10 @@ class ApiCache:
|
|
86 |
else:
|
87 |
raise TypeError("Cached content must be a JSON string, dict, or list.")
|
88 |
|
89 |
-
# 3. Store in cache
|
90 |
async with aiosqlite.connect(self.db_path) as db:
|
91 |
await db.execute(
|
92 |
-
"INSERT INTO api_cache (cache_key, category, content_json) VALUES (?, ?, ?)",
|
93 |
(cache_key, category, content_to_cache)
|
94 |
)
|
95 |
await db.commit()
|
|
|
86 |
else:
|
87 |
raise TypeError("Cached content must be a JSON string, dict, or list.")
|
88 |
|
89 |
+
# 3. Store in cache (use INSERT OR REPLACE to handle duplicates)
|
90 |
async with aiosqlite.connect(self.db_path) as db:
|
91 |
await db.execute(
|
92 |
+
"INSERT OR REPLACE INTO api_cache (cache_key, category, content_json) VALUES (?, ?, ?)",
|
93 |
(cache_key, category, content_to_cache)
|
94 |
)
|
95 |
await db.commit()
|
backend/main.py
CHANGED
@@ -90,6 +90,9 @@ async def health_check():
|
|
90 |
},
|
91 |
status_code=200 if is_healthy else 503
|
92 |
)
|
|
|
|
|
|
|
93 |
except Exception as e:
|
94 |
return JSONResponse(
|
95 |
content={
|
@@ -104,7 +107,7 @@ async def health_check():
|
|
104 |
async def repair_database():
|
105 |
"""Repair database issues (admin endpoint)"""
|
106 |
try:
|
107 |
-
|
108 |
|
109 |
return JSONResponse(
|
110 |
content={
|
@@ -158,11 +161,14 @@ async def extract_metadata(data: MetadataRequest):
|
|
158 |
"""Extract language learning metadata from user query"""
|
159 |
logging.info(f"Extracting metadata for query: {data.query[:50]}...")
|
160 |
try:
|
161 |
-
# Generate metadata using AI, with caching
|
162 |
metadata_dict = await api_cache.get_or_set(
|
163 |
category="metadata",
|
164 |
key_text=data.query,
|
165 |
coro=generate_completions.get_completions,
|
|
|
|
|
|
|
166 |
prompt=data.query,
|
167 |
instructions=config.language_metadata_extraction_prompt
|
168 |
)
|
@@ -173,7 +179,7 @@ async def extract_metadata(data: MetadataRequest):
|
|
173 |
native_language=metadata_dict['native_language'],
|
174 |
target_language=metadata_dict['target_language'],
|
175 |
proficiency=metadata_dict['proficiency'],
|
176 |
-
user_id=
|
177 |
)
|
178 |
|
179 |
if existing_curriculum:
|
@@ -200,25 +206,37 @@ async def extract_metadata(data: MetadataRequest):
|
|
200 |
)
|
201 |
|
202 |
# Process extraction (generate curriculum and start content generation)
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
|
|
|
|
210 |
|
211 |
-
|
|
|
|
|
|
|
212 |
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
except Exception as e:
|
223 |
logging.error(f"Error extracting metadata: {e}")
|
224 |
raise HTTPException(status_code=500, detail=str(e))
|
@@ -257,6 +275,22 @@ async def get_curriculum(curriculum_id: str = Path(..., description="Curriculum
|
|
257 |
|
258 |
return JSONResponse(content=curriculum, status_code=200)
|
259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
async def _get_lesson_content_by_type(
|
262 |
curriculum_id: str,
|
|
|
90 |
},
|
91 |
status_code=200 if is_healthy else 503
|
92 |
)
|
93 |
+
except ValueError as ve:
|
94 |
+
logging.error(f"Invalid input: {ve}")
|
95 |
+
raise HTTPException(status_code=400, detail=str(ve))
|
96 |
except Exception as e:
|
97 |
return JSONResponse(
|
98 |
content={
|
|
|
107 |
async def repair_database():
|
108 |
"""Repair database issues (admin endpoint)"""
|
109 |
try:
|
110 |
+
repair_result = await db_initializer.repair_database()
|
111 |
|
112 |
return JSONResponse(
|
113 |
content={
|
|
|
161 |
"""Extract language learning metadata from user query"""
|
162 |
logging.info(f"Extracting metadata for query: {data.query[:50]}...")
|
163 |
try:
|
164 |
+
# Generate metadata using AI, with caching (include user context)
|
165 |
metadata_dict = await api_cache.get_or_set(
|
166 |
category="metadata",
|
167 |
key_text=data.query,
|
168 |
coro=generate_completions.get_completions,
|
169 |
+
context={
|
170 |
+
'user_id': data.user_id
|
171 |
+
},
|
172 |
prompt=data.query,
|
173 |
instructions=config.language_metadata_extraction_prompt
|
174 |
)
|
|
|
179 |
native_language=metadata_dict['native_language'],
|
180 |
target_language=metadata_dict['target_language'],
|
181 |
proficiency=metadata_dict['proficiency'],
|
182 |
+
user_id=data.user_id # Use the actual user_id for consistent lookup
|
183 |
)
|
184 |
|
185 |
if existing_curriculum:
|
|
|
206 |
)
|
207 |
|
208 |
# Process extraction (generate curriculum and start content generation)
|
209 |
+
try:
|
210 |
+
processing_result = await content_generator.process_metadata_extraction(
|
211 |
+
extraction_id=extraction_id,
|
212 |
+
query=data.query,
|
213 |
+
metadata=metadata_dict,
|
214 |
+
user_id=data.user_id,
|
215 |
+
generate_content=True, # Automatically generate all content
|
216 |
+
skip_curriculum_lookup=True # Skip lookup since we already did it above
|
217 |
+
)
|
218 |
|
219 |
+
curriculum_id = processing_result['curriculum_id']
|
220 |
+
|
221 |
+
# Update status to generating
|
222 |
+
await db.update_content_generation_status(curriculum_id, 'generating')
|
223 |
|
224 |
+
return JSONResponse(
|
225 |
+
content={
|
226 |
+
"message": "Content generation has been initiated.",
|
227 |
+
"curriculum_id": curriculum_id,
|
228 |
+
"status_endpoint": f"/content/status/{curriculum_id}",
|
229 |
+
"cached": False
|
230 |
+
},
|
231 |
+
status_code=202
|
232 |
+
)
|
233 |
+
except Exception as content_error:
|
234 |
+
# If content generation fails, update status to failed
|
235 |
+
if 'curriculum_id' in locals():
|
236 |
+
await db.update_content_generation_status(
|
237 |
+
curriculum_id, 'failed', str(content_error)
|
238 |
+
)
|
239 |
+
raise content_error
|
240 |
except Exception as e:
|
241 |
logging.error(f"Error extracting metadata: {e}")
|
242 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
275 |
|
276 |
return JSONResponse(content=curriculum, status_code=200)
|
277 |
|
278 |
+
@app.get("/content/status/{curriculum_id}")
|
279 |
+
async def get_content_generation_status(curriculum_id: str = Path(..., description="Curriculum ID")):
|
280 |
+
"""Get content generation status for a curriculum"""
|
281 |
+
status = await db.get_content_generation_status(curriculum_id)
|
282 |
+
if not status:
|
283 |
+
raise HTTPException(status_code=404, detail="Curriculum not found")
|
284 |
+
|
285 |
+
return JSONResponse(content={
|
286 |
+
"curriculum_id": status['id'],
|
287 |
+
"status": status['content_generation_status'],
|
288 |
+
"error": status['content_generation_error'],
|
289 |
+
"started_at": status['content_generation_started_at'],
|
290 |
+
"completed_at": status['content_generation_completed_at'],
|
291 |
+
"is_content_generated": bool(status['is_content_generated'])
|
292 |
+
}, status_code=200)
|
293 |
+
|
294 |
|
295 |
async def _get_lesson_content_by_type(
|
296 |
curriculum_id: str,
|
backend/schema.sql
CHANGED
@@ -26,6 +26,10 @@ CREATE TABLE IF NOT EXISTS curricula (
|
|
26 |
lesson_topic TEXT,
|
27 |
curriculum_json TEXT NOT NULL, -- Full curriculum JSON with 25 lessons
|
28 |
is_content_generated INTEGER DEFAULT 0, -- Boolean: has all content been generated?
|
|
|
|
|
|
|
|
|
29 |
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
30 |
FOREIGN KEY (metadata_extraction_id) REFERENCES metadata_extractions(id) ON DELETE CASCADE
|
31 |
);
|
@@ -65,6 +69,10 @@ SELECT
|
|
65 |
c.id as curriculum_id,
|
66 |
c.lesson_topic,
|
67 |
c.is_content_generated,
|
|
|
|
|
|
|
|
|
68 |
m.created_at
|
69 |
FROM metadata_extractions m
|
70 |
LEFT JOIN curricula c ON m.id = c.metadata_extraction_id
|
@@ -76,6 +84,10 @@ SELECT
|
|
76 |
c.id as curriculum_id,
|
77 |
c.user_id,
|
78 |
c.lesson_topic,
|
|
|
|
|
|
|
|
|
79 |
COUNT(DISTINCT lc.lesson_index) as lessons_with_content,
|
80 |
COUNT(DISTINCT CASE WHEN lc.content_type = 'flashcards' THEN lc.lesson_index END) as lessons_with_flashcards,
|
81 |
COUNT(DISTINCT CASE WHEN lc.content_type = 'exercises' THEN lc.lesson_index END) as lessons_with_exercises,
|
@@ -92,7 +104,7 @@ CREATE TABLE IF NOT EXISTS api_cache (
|
|
92 |
content_json TEXT NOT NULL,
|
93 |
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
94 |
PRIMARY KEY (cache_key, category)
|
95 |
-
);
|
96 |
|
97 |
-- Index for faster cache lookups
|
98 |
CREATE INDEX IF NOT EXISTS idx_api_cache_key_category ON api_cache(cache_key, category);
|
|
|
26 |
lesson_topic TEXT,
|
27 |
curriculum_json TEXT NOT NULL, -- Full curriculum JSON with 25 lessons
|
28 |
is_content_generated INTEGER DEFAULT 0, -- Boolean: has all content been generated?
|
29 |
+
content_generation_status TEXT DEFAULT 'pending' CHECK(content_generation_status IN ('pending', 'generating', 'completed', 'failed')),
|
30 |
+
content_generation_error TEXT, -- Store error message if generation fails
|
31 |
+
content_generation_started_at TIMESTAMP,
|
32 |
+
content_generation_completed_at TIMESTAMP,
|
33 |
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
34 |
FOREIGN KEY (metadata_extraction_id) REFERENCES metadata_extractions(id) ON DELETE CASCADE
|
35 |
);
|
|
|
69 |
c.id as curriculum_id,
|
70 |
c.lesson_topic,
|
71 |
c.is_content_generated,
|
72 |
+
c.content_generation_status,
|
73 |
+
c.content_generation_error,
|
74 |
+
c.content_generation_started_at,
|
75 |
+
c.content_generation_completed_at,
|
76 |
m.created_at
|
77 |
FROM metadata_extractions m
|
78 |
LEFT JOIN curricula c ON m.id = c.metadata_extraction_id
|
|
|
84 |
c.id as curriculum_id,
|
85 |
c.user_id,
|
86 |
c.lesson_topic,
|
87 |
+
c.content_generation_status,
|
88 |
+
c.content_generation_error,
|
89 |
+
c.content_generation_started_at,
|
90 |
+
c.content_generation_completed_at,
|
91 |
COUNT(DISTINCT lc.lesson_index) as lessons_with_content,
|
92 |
COUNT(DISTINCT CASE WHEN lc.content_type = 'flashcards' THEN lc.lesson_index END) as lessons_with_flashcards,
|
93 |
COUNT(DISTINCT CASE WHEN lc.content_type = 'exercises' THEN lc.lesson_index END) as lessons_with_exercises,
|
|
|
104 |
content_json TEXT NOT NULL,
|
105 |
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
106 |
PRIMARY KEY (cache_key, category)
|
107 |
+
) WITHOUT ROWID;
|
108 |
|
109 |
-- Index for faster cache lookups
|
110 |
CREATE INDEX IF NOT EXISTS idx_api_cache_key_category ON api_cache(cache_key, category);
|
backend/utils/__pycache__/generate_completions.cpython-311.pyc
CHANGED
Binary files a/backend/utils/__pycache__/generate_completions.cpython-311.pyc and b/backend/utils/__pycache__/generate_completions.cpython-311.pyc differ
|
|
backend/utils/__pycache__/generate_completions.cpython-312.pyc
CHANGED
Binary files a/backend/utils/__pycache__/generate_completions.cpython-312.pyc and b/backend/utils/__pycache__/generate_completions.cpython-312.pyc differ
|
|