samu commited on
Commit
f6a6a60
·
1 Parent(s): 2832da8

improved backend

Browse files
backend/__pycache__/config.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/config.cpython-311.pyc and b/backend/__pycache__/config.cpython-311.pyc differ
 
backend/__pycache__/config.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/config.cpython-312.pyc and b/backend/__pycache__/config.cpython-312.pyc differ
 
backend/__pycache__/content_generator.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/content_generator.cpython-311.pyc and b/backend/__pycache__/content_generator.cpython-311.pyc differ
 
backend/__pycache__/content_generator.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/content_generator.cpython-312.pyc and b/backend/__pycache__/content_generator.cpython-312.pyc differ
 
backend/__pycache__/db.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/db.cpython-311.pyc and b/backend/__pycache__/db.cpython-311.pyc differ
 
backend/__pycache__/db.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/db.cpython-312.pyc and b/backend/__pycache__/db.cpython-312.pyc differ
 
backend/__pycache__/db_cache.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/db_cache.cpython-311.pyc and b/backend/__pycache__/db_cache.cpython-311.pyc differ
 
backend/__pycache__/db_cache.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/db_cache.cpython-312.pyc and b/backend/__pycache__/db_cache.cpython-312.pyc differ
 
backend/__pycache__/db_init.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/db_init.cpython-311.pyc and b/backend/__pycache__/db_init.cpython-311.pyc differ
 
backend/__pycache__/db_init.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/db_init.cpython-312.pyc and b/backend/__pycache__/db_init.cpython-312.pyc differ
 
backend/__pycache__/main.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/main.cpython-311.pyc and b/backend/__pycache__/main.cpython-311.pyc differ
 
backend/__pycache__/main.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/main.cpython-312.pyc and b/backend/__pycache__/main.cpython-312.pyc differ
 
backend/content_generator.py CHANGED
@@ -163,7 +163,7 @@ class ContentGenerator:
163
  except Exception as e:
164
  logger.error(f"Failed to generate simulation for lesson {lesson_index}: {e}")
165
 
166
- return content
167
 
168
  async def generate_all_content_for_curriculum(
169
  self,
@@ -171,56 +171,85 @@ class ContentGenerator:
171
  max_concurrent_lessons: int = 3
172
  ):
173
  """Generate all learning content for a curriculum"""
174
- # Get curriculum details
175
- curriculum_data = await db.get_curriculum(curriculum_id)
176
- if not curriculum_data:
177
- logger.error(f"Curriculum not found: {curriculum_id}")
178
- return
179
-
180
- # Parse curriculum JSON
181
  try:
182
- curriculum = json.loads(curriculum_data['curriculum_json'])
183
- lessons = curriculum.get('sub_topics', [])
184
- except json.JSONDecodeError:
185
- logger.error(f"Failed to parse curriculum JSON for {curriculum_id}")
186
- return
187
-
188
- # Prepare metadata
189
- metadata = {
190
- 'native_language': curriculum_data['native_language'],
191
- 'target_language': curriculum_data['target_language'],
192
- 'proficiency': curriculum_data['proficiency']
193
- }
194
-
195
- logger.info(f"Starting content generation for {len(lessons)} lessons")
196
-
197
- # Process lessons in batches to avoid overwhelming the API
198
- for i in range(0, len(lessons), max_concurrent_lessons):
199
- batch = lessons[i:i + max_concurrent_lessons]
200
- batch_indices = list(range(i, min(i + max_concurrent_lessons, len(lessons))))
201
 
202
- # Generate content for batch concurrently
203
- tasks = [
204
- self.generate_content_for_lesson(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  curriculum_id=curriculum_id,
206
- lesson_index=idx,
207
- lesson=lesson,
208
- metadata=metadata
209
  )
210
- for idx, lesson in zip(batch_indices, batch)
211
- ]
 
 
 
 
 
 
212
 
213
- results = await asyncio.gather(*tasks, return_exceptions=True)
214
 
215
- for idx, result in zip(batch_indices, results):
216
- if isinstance(result, Exception):
217
- logger.error(f"Failed to generate content for lesson {idx}: {result}")
218
- else:
219
- logger.info(f"Generated content for lesson {idx}: {result}")
220
-
221
- # Mark curriculum as content generated
222
- await db.mark_curriculum_content_generated(curriculum_id)
223
- logger.info(f"Completed content generation for curriculum {curriculum_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
  async def process_metadata_extraction(
226
  self,
@@ -228,44 +257,46 @@ class ContentGenerator:
228
  query: str,
229
  metadata: Dict[str, Any],
230
  user_id: Optional[int] = None,
231
- generate_content: bool = True
 
232
  ) -> Dict[str, Any]:
233
  """Process a metadata extraction by checking for existing curriculum or generating new one"""
234
 
235
- # Check for existing curriculum first
236
- existing_curriculum = await db.find_existing_curriculum(
237
- query=query,
238
- native_language=metadata['native_language'],
239
- target_language=metadata['target_language'],
240
- proficiency=metadata['proficiency'],
241
- user_id=user_id
242
- )
243
-
244
- if existing_curriculum:
245
- # If we found an exact match for this user, return it
246
- if existing_curriculum.get('user_id') == user_id:
247
- logger.info(f"Found existing curriculum for user {user_id}: {existing_curriculum['id']}")
248
- return {
249
- 'curriculum_id': existing_curriculum['id'],
250
- 'content_generation_started': False,
251
- 'cached': True,
252
- 'cache_type': 'user_exact_match'
253
- }
254
 
255
- # If we found a similar curriculum from another user, copy it
256
- elif existing_curriculum.get('is_content_generated') == 1:
257
- logger.info(f"Copying existing curriculum {existing_curriculum['id']} for user {user_id}")
258
- curriculum_id = await db.copy_curriculum_for_user(
259
- source_curriculum_id=existing_curriculum['id'],
260
- metadata_extraction_id=extraction_id,
261
- user_id=user_id
262
- )
263
- return {
264
- 'curriculum_id': curriculum_id,
265
- 'content_generation_started': False,
266
- 'cached': True,
267
- 'cache_type': 'copied_from_similar'
268
- }
 
 
 
 
 
 
 
 
 
 
 
269
 
270
  # No suitable existing curriculum found, generate new one
271
  logger.info(f"No existing curriculum found, generating new one for user {user_id}")
 
163
  except Exception as e:
164
  logger.error(f"Failed to generate simulation for lesson {lesson_index}: {e}")
165
 
166
+ return content_ids
167
 
168
  async def generate_all_content_for_curriculum(
169
  self,
 
171
  max_concurrent_lessons: int = 3
172
  ):
173
  """Generate all learning content for a curriculum"""
 
 
 
 
 
 
 
174
  try:
175
+ # Update status to generating
176
+ await db.update_content_generation_status(
177
+ curriculum_id=curriculum_id,
178
+ status='generating'
179
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ # Get curriculum details
182
+ curriculum_data = await db.get_curriculum(curriculum_id)
183
+ if not curriculum_data:
184
+ logger.error(f"Curriculum not found: {curriculum_id}")
185
+ await db.update_content_generation_status(
186
+ curriculum_id=curriculum_id,
187
+ status='failed',
188
+ error_message="Curriculum not found"
189
+ )
190
+ return
191
+
192
+ # Parse curriculum JSON
193
+ try:
194
+ curriculum = json.loads(curriculum_data['curriculum_json'])
195
+ lessons = curriculum.get('sub_topics', [])
196
+ except json.JSONDecodeError:
197
+ logger.error(f"Failed to parse curriculum JSON for {curriculum_id}")
198
+ await db.update_content_generation_status(
199
  curriculum_id=curriculum_id,
200
+ status='failed',
201
+ error_message="Failed to parse curriculum JSON"
 
202
  )
203
+ return
204
+
205
+ # Prepare metadata
206
+ metadata = {
207
+ 'native_language': curriculum_data['native_language'],
208
+ 'target_language': curriculum_data['target_language'],
209
+ 'proficiency': curriculum_data['proficiency']
210
+ }
211
 
212
+ logger.info(f"Starting content generation for {len(lessons)} lessons")
213
 
214
+ # Process lessons in batches to avoid overwhelming the API
215
+ for i in range(0, len(lessons), max_concurrent_lessons):
216
+ batch = lessons[i:i + max_concurrent_lessons]
217
+ batch_indices = list(range(i, min(i + max_concurrent_lessons, len(lessons))))
218
+
219
+ # Generate content for batch concurrently
220
+ tasks = [
221
+ self.generate_content_for_lesson(
222
+ curriculum_id=curriculum_id,
223
+ lesson_index=idx,
224
+ lesson=lesson,
225
+ metadata=metadata
226
+ )
227
+ for idx, lesson in zip(batch_indices, batch)
228
+ ]
229
+
230
+ results = await asyncio.gather(*tasks, return_exceptions=True)
231
+
232
+ for idx, result in zip(batch_indices, results):
233
+ if isinstance(result, Exception):
234
+ logger.error(f"Failed to generate content for lesson {idx}: {result}")
235
+ else:
236
+ logger.info(f"Generated content for lesson {idx}: {result}")
237
+
238
+ # Mark curriculum as content generated
239
+ await db.mark_curriculum_content_generated(curriculum_id)
240
+ await db.update_content_generation_status(
241
+ curriculum_id=curriculum_id,
242
+ status='completed'
243
+ )
244
+ logger.info(f"Completed content generation for curriculum {curriculum_id}")
245
+
246
+ except Exception as e:
247
+ logger.error(f"Failed to generate content for curriculum {curriculum_id}: {e}")
248
+ await db.update_content_generation_status(
249
+ curriculum_id=curriculum_id,
250
+ status='failed',
251
+ error_message=str(e)
252
+ )
253
 
254
  async def process_metadata_extraction(
255
  self,
 
257
  query: str,
258
  metadata: Dict[str, Any],
259
  user_id: Optional[int] = None,
260
+ generate_content: bool = True,
261
+ skip_curriculum_lookup: bool = False
262
  ) -> Dict[str, Any]:
263
  """Process a metadata extraction by checking for existing curriculum or generating new one"""
264
 
265
+ if not skip_curriculum_lookup:
266
+ # Check for existing curriculum first
267
+ existing_curriculum = await db.find_existing_curriculum(
268
+ query=query,
269
+ native_language=metadata['native_language'],
270
+ target_language=metadata['target_language'],
271
+ proficiency=metadata['proficiency'],
272
+ user_id=user_id
273
+ )
 
 
 
 
 
 
 
 
 
 
274
 
275
+ if existing_curriculum:
276
+ # If we found an exact match for this user, return it
277
+ if existing_curriculum.get('user_id') == user_id:
278
+ logger.info(f"Found existing curriculum for user {user_id}: {existing_curriculum['id']}")
279
+ return {
280
+ 'curriculum_id': existing_curriculum['id'],
281
+ 'content_generation_started': False,
282
+ 'cached': True,
283
+ 'cache_type': 'user_exact_match'
284
+ }
285
+
286
+ # If we found a similar curriculum from another user, copy it
287
+ elif existing_curriculum.get('is_content_generated') == 1:
288
+ logger.info(f"Copying existing curriculum {existing_curriculum['id']} for user {user_id}")
289
+ curriculum_id = await db.copy_curriculum_for_user(
290
+ source_curriculum_id=existing_curriculum['id'],
291
+ metadata_extraction_id=extraction_id,
292
+ user_id=user_id
293
+ )
294
+ return {
295
+ 'curriculum_id': curriculum_id,
296
+ 'content_generation_started': False,
297
+ 'cached': True,
298
+ 'cache_type': 'copied_from_similar'
299
+ }
300
 
301
  # No suitable existing curriculum found, generate new one
302
  logger.info(f"No existing curriculum found, generating new one for user {user_id}")
backend/db.py CHANGED
@@ -37,12 +37,16 @@ class Database:
37
  proficiency: str,
38
  user_id: Optional[int] = None
39
  ) -> Optional[Dict[str, Any]]:
40
- """Find existing curriculum for similar query and metadata"""
 
 
41
  async with aiosqlite.connect(self.db_path) as db:
42
  db.row_factory = aiosqlite.Row
43
 
 
44
  if user_id is not None:
45
- # User-specific search: First try to find exact query match for the user
 
46
  async with db.execute("""
47
  SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
48
  FROM curricula c
@@ -54,35 +58,27 @@ class Database:
54
  """, (user_id, query, native_language, target_language, proficiency)) as cursor:
55
  row = await cursor.fetchone()
56
  if row:
 
57
  return dict(row)
58
-
59
- # Then try to find similar curriculum with same metadata (any user)
60
- async with db.execute("""
61
- SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
62
- FROM curricula c
63
- JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
64
- WHERE m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
65
- AND c.is_content_generated = 1
66
- ORDER BY c.created_at DESC
67
- LIMIT 1
68
- """, (native_language, target_language, proficiency)) as cursor:
69
- row = await cursor.fetchone()
70
- if row:
71
- return dict(row)
72
- else:
73
- # User-independent search: Find exact query match regardless of user
74
- async with db.execute("""
75
- SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
76
- FROM curricula c
77
- JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
78
- WHERE m.query = ? AND m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
79
- ORDER BY c.created_at DESC
80
- LIMIT 1
81
- """, (query, native_language, target_language, proficiency)) as cursor:
82
- row = await cursor.fetchone()
83
- if row:
84
- return dict(row)
85
 
 
86
  return None
87
 
88
  async def save_metadata_extraction(
@@ -93,7 +89,17 @@ class Database:
93
  ) -> str:
94
  """Save extracted metadata and return extraction ID"""
95
  extraction_id = str(uuid.uuid4())
96
-
 
 
 
 
 
 
 
 
 
 
97
  async with aiosqlite.connect(self.db_path) as db:
98
  await db.execute("""
99
  INSERT INTO metadata_extractions
@@ -127,8 +133,8 @@ class Database:
127
  async with aiosqlite.connect(self.db_path) as db:
128
  await db.execute("""
129
  INSERT INTO curricula
130
- (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json)
131
- VALUES (?, ?, ?, ?, ?)
132
  """, (
133
  curriculum_id,
134
  metadata_extraction_id,
@@ -164,8 +170,8 @@ class Database:
164
  # Create new curriculum
165
  await db.execute("""
166
  INSERT INTO curricula
167
- (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, is_content_generated)
168
- VALUES (?, ?, ?, ?, ?, 0)
169
  """, (
170
  new_curriculum_id,
171
  metadata_extraction_id,
@@ -192,7 +198,9 @@ class Database:
192
  # Mark as content generated
193
  await db.execute("""
194
  UPDATE curricula
195
- SET is_content_generated = 1
 
 
196
  WHERE id = ?
197
  """, (new_curriculum_id,))
198
 
@@ -235,11 +243,74 @@ class Database:
235
  async with aiosqlite.connect(self.db_path) as db:
236
  await db.execute("""
237
  UPDATE curricula
238
- SET is_content_generated = 1
 
 
239
  WHERE id = ?
240
  """, (curriculum_id,))
241
  await db.commit()
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  async def get_metadata_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
244
  """Get metadata extraction by ID"""
245
  async with aiosqlite.connect(self.db_path) as db:
 
37
  proficiency: str,
38
  user_id: Optional[int] = None
39
  ) -> Optional[Dict[str, Any]]:
40
+ """Find existing curriculum for exact query and metadata match"""
41
+ logger.info(f"Looking for curriculum: query='{query[:50]}...', native={native_language}, target={target_language}, proficiency={proficiency}, user_id={user_id}")
42
+
43
  async with aiosqlite.connect(self.db_path) as db:
44
  db.row_factory = aiosqlite.Row
45
 
46
+ # Always look for exact query matches first, prioritizing user-specific matches
47
  if user_id is not None:
48
+ # User-specific search: Find exact query match for the user
49
+ logger.info(f"Searching for exact match for user {user_id}")
50
  async with db.execute("""
51
  SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
52
  FROM curricula c
 
58
  """, (user_id, query, native_language, target_language, proficiency)) as cursor:
59
  row = await cursor.fetchone()
60
  if row:
61
+ logger.info(f"Found exact user match: {dict(row)['id']}")
62
  return dict(row)
63
+
64
+ # Look for exact query match from any user (only if the query is exactly the same)
65
+ logger.info("Searching for exact query match (any user)")
66
+ async with db.execute("""
67
+ SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
68
+ FROM curricula c
69
+ JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
70
+ WHERE m.query = ? AND m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
71
+ ORDER BY c.created_at DESC
72
+ LIMIT 1
73
+ """, (query, native_language, target_language, proficiency)) as cursor:
74
+ row = await cursor.fetchone()
75
+ if row:
76
+ logger.info(f"Found exact query match: {dict(row)['id']}")
77
+ return dict(row)
78
+ else:
79
+ logger.info("No exact query match found")
 
 
 
 
 
 
 
 
 
 
80
 
81
+ logger.info("No existing curriculum found")
82
  return None
83
 
84
  async def save_metadata_extraction(
 
89
  ) -> str:
90
  """Save extracted metadata and return extraction ID"""
91
  extraction_id = str(uuid.uuid4())
92
+
93
+ # Validate proficiency before inserting into the database
94
+ allowed_proficiencies = {"beginner", "intermediate", "advanced"}
95
+ proficiency = metadata.get('proficiency')
96
+ if proficiency not in allowed_proficiencies:
97
+ logger.warning(
98
+ f"Unknown proficiency '{proficiency}' received; defaulting to 'beginner'."
99
+ )
100
+ proficiency = "beginner"
101
+ metadata["proficiency"] = "beginner"
102
+
103
  async with aiosqlite.connect(self.db_path) as db:
104
  await db.execute("""
105
  INSERT INTO metadata_extractions
 
133
  async with aiosqlite.connect(self.db_path) as db:
134
  await db.execute("""
135
  INSERT INTO curricula
136
+ (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, content_generation_status)
137
+ VALUES (?, ?, ?, ?, ?, 'pending')
138
  """, (
139
  curriculum_id,
140
  metadata_extraction_id,
 
170
  # Create new curriculum
171
  await db.execute("""
172
  INSERT INTO curricula
173
+ (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, is_content_generated, content_generation_status)
174
+ VALUES (?, ?, ?, ?, ?, 0, 'pending')
175
  """, (
176
  new_curriculum_id,
177
  metadata_extraction_id,
 
198
  # Mark as content generated
199
  await db.execute("""
200
  UPDATE curricula
201
+ SET is_content_generated = 1,
202
+ content_generation_status = 'completed',
203
+ content_generation_completed_at = CURRENT_TIMESTAMP
204
  WHERE id = ?
205
  """, (new_curriculum_id,))
206
 
 
243
  async with aiosqlite.connect(self.db_path) as db:
244
  await db.execute("""
245
  UPDATE curricula
246
+ SET is_content_generated = 1,
247
+ content_generation_status = 'completed',
248
+ content_generation_completed_at = CURRENT_TIMESTAMP
249
  WHERE id = ?
250
  """, (curriculum_id,))
251
  await db.commit()
252
 
253
+ async def update_content_generation_status(
254
+ self,
255
+ curriculum_id: str,
256
+ status: str,
257
+ error_message: Optional[str] = None
258
+ ):
259
+ """Update content generation status for a curriculum"""
260
+ async with aiosqlite.connect(self.db_path) as db:
261
+ if status == 'generating':
262
+ await db.execute("""
263
+ UPDATE curricula
264
+ SET content_generation_status = ?,
265
+ content_generation_started_at = CURRENT_TIMESTAMP,
266
+ content_generation_error = NULL
267
+ WHERE id = ?
268
+ """, (status, curriculum_id))
269
+ elif status == 'completed':
270
+ await db.execute("""
271
+ UPDATE curricula
272
+ SET content_generation_status = ?,
273
+ content_generation_completed_at = CURRENT_TIMESTAMP,
274
+ content_generation_error = NULL,
275
+ is_content_generated = 1
276
+ WHERE id = ?
277
+ """, (status, curriculum_id))
278
+ elif status == 'failed':
279
+ await db.execute("""
280
+ UPDATE curricula
281
+ SET content_generation_status = ?,
282
+ content_generation_error = ?
283
+ WHERE id = ?
284
+ """, (status, error_message, curriculum_id))
285
+ else:
286
+ await db.execute("""
287
+ UPDATE curricula
288
+ SET content_generation_status = ?,
289
+ content_generation_error = ?
290
+ WHERE id = ?
291
+ """, (status, error_message, curriculum_id))
292
+ await db.commit()
293
+
294
+ async def get_content_generation_status(self, curriculum_id: str) -> Optional[Dict[str, Any]]:
295
+ """Get content generation status for a curriculum"""
296
+ async with aiosqlite.connect(self.db_path) as db:
297
+ db.row_factory = aiosqlite.Row
298
+ async with db.execute("""
299
+ SELECT
300
+ id,
301
+ content_generation_status,
302
+ content_generation_error,
303
+ content_generation_started_at,
304
+ content_generation_completed_at,
305
+ is_content_generated
306
+ FROM curricula
307
+ WHERE id = ?
308
+ """, (curriculum_id,)) as cursor:
309
+ row = await cursor.fetchone()
310
+ if row:
311
+ return dict(row)
312
+ return None
313
+
314
  async def get_metadata_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
315
  """Get metadata extraction by ID"""
316
  async with aiosqlite.connect(self.db_path) as db:
backend/db_cache.py CHANGED
@@ -86,10 +86,10 @@ class ApiCache:
86
  else:
87
  raise TypeError("Cached content must be a JSON string, dict, or list.")
88
 
89
- # 3. Store in cache
90
  async with aiosqlite.connect(self.db_path) as db:
91
  await db.execute(
92
- "INSERT INTO api_cache (cache_key, category, content_json) VALUES (?, ?, ?)",
93
  (cache_key, category, content_to_cache)
94
  )
95
  await db.commit()
 
86
  else:
87
  raise TypeError("Cached content must be a JSON string, dict, or list.")
88
 
89
+ # 3. Store in cache (use INSERT OR REPLACE to handle duplicates)
90
  async with aiosqlite.connect(self.db_path) as db:
91
  await db.execute(
92
+ "INSERT OR REPLACE INTO api_cache (cache_key, category, content_json) VALUES (?, ?, ?)",
93
  (cache_key, category, content_to_cache)
94
  )
95
  await db.commit()
backend/main.py CHANGED
@@ -90,6 +90,9 @@ async def health_check():
90
  },
91
  status_code=200 if is_healthy else 503
92
  )
 
 
 
93
  except Exception as e:
94
  return JSONResponse(
95
  content={
@@ -104,7 +107,7 @@ async def health_check():
104
  async def repair_database():
105
  """Repair database issues (admin endpoint)"""
106
  try:
107
- # repair_result = await db.repair_database() # This method doesn't exist on the Database class
108
 
109
  return JSONResponse(
110
  content={
@@ -158,11 +161,14 @@ async def extract_metadata(data: MetadataRequest):
158
  """Extract language learning metadata from user query"""
159
  logging.info(f"Extracting metadata for query: {data.query[:50]}...")
160
  try:
161
- # Generate metadata using AI, with caching
162
  metadata_dict = await api_cache.get_or_set(
163
  category="metadata",
164
  key_text=data.query,
165
  coro=generate_completions.get_completions,
 
 
 
166
  prompt=data.query,
167
  instructions=config.language_metadata_extraction_prompt
168
  )
@@ -173,7 +179,7 @@ async def extract_metadata(data: MetadataRequest):
173
  native_language=metadata_dict['native_language'],
174
  target_language=metadata_dict['target_language'],
175
  proficiency=metadata_dict['proficiency'],
176
- user_id=None # Make it user-independent
177
  )
178
 
179
  if existing_curriculum:
@@ -200,25 +206,37 @@ async def extract_metadata(data: MetadataRequest):
200
  )
201
 
202
  # Process extraction (generate curriculum and start content generation)
203
- processing_result = await content_generator.process_metadata_extraction(
204
- extraction_id=extraction_id,
205
- query=data.query,
206
- metadata=metadata_dict,
207
- user_id=data.user_id,
208
- generate_content=True # Automatically generate all content
209
- )
 
 
210
 
211
- curriculum_id = processing_result['curriculum_id']
 
 
 
212
 
213
- return JSONResponse(
214
- content={
215
- "message": "Content generation has been initiated.",
216
- "curriculum_id": curriculum_id,
217
- "status_endpoint": f"/content/status/{curriculum_id}",
218
- "cached": False
219
- },
220
- status_code=202
221
- )
 
 
 
 
 
 
 
222
  except Exception as e:
223
  logging.error(f"Error extracting metadata: {e}")
224
  raise HTTPException(status_code=500, detail=str(e))
@@ -257,6 +275,22 @@ async def get_curriculum(curriculum_id: str = Path(..., description="Curriculum
257
 
258
  return JSONResponse(content=curriculum, status_code=200)
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
  async def _get_lesson_content_by_type(
262
  curriculum_id: str,
 
90
  },
91
  status_code=200 if is_healthy else 503
92
  )
93
+ except ValueError as ve:
94
+ logging.error(f"Invalid input: {ve}")
95
+ raise HTTPException(status_code=400, detail=str(ve))
96
  except Exception as e:
97
  return JSONResponse(
98
  content={
 
107
  async def repair_database():
108
  """Repair database issues (admin endpoint)"""
109
  try:
110
+ repair_result = await db_initializer.repair_database()
111
 
112
  return JSONResponse(
113
  content={
 
161
  """Extract language learning metadata from user query"""
162
  logging.info(f"Extracting metadata for query: {data.query[:50]}...")
163
  try:
164
+ # Generate metadata using AI, with caching (include user context)
165
  metadata_dict = await api_cache.get_or_set(
166
  category="metadata",
167
  key_text=data.query,
168
  coro=generate_completions.get_completions,
169
+ context={
170
+ 'user_id': data.user_id
171
+ },
172
  prompt=data.query,
173
  instructions=config.language_metadata_extraction_prompt
174
  )
 
179
  native_language=metadata_dict['native_language'],
180
  target_language=metadata_dict['target_language'],
181
  proficiency=metadata_dict['proficiency'],
182
+ user_id=data.user_id # Use the actual user_id for consistent lookup
183
  )
184
 
185
  if existing_curriculum:
 
206
  )
207
 
208
  # Process extraction (generate curriculum and start content generation)
209
+ try:
210
+ processing_result = await content_generator.process_metadata_extraction(
211
+ extraction_id=extraction_id,
212
+ query=data.query,
213
+ metadata=metadata_dict,
214
+ user_id=data.user_id,
215
+ generate_content=True, # Automatically generate all content
216
+ skip_curriculum_lookup=True # Skip lookup since we already did it above
217
+ )
218
 
219
+ curriculum_id = processing_result['curriculum_id']
220
+
221
+ # Update status to generating
222
+ await db.update_content_generation_status(curriculum_id, 'generating')
223
 
224
+ return JSONResponse(
225
+ content={
226
+ "message": "Content generation has been initiated.",
227
+ "curriculum_id": curriculum_id,
228
+ "status_endpoint": f"/content/status/{curriculum_id}",
229
+ "cached": False
230
+ },
231
+ status_code=202
232
+ )
233
+ except Exception as content_error:
234
+ # If content generation fails, update status to failed
235
+ if 'curriculum_id' in locals():
236
+ await db.update_content_generation_status(
237
+ curriculum_id, 'failed', str(content_error)
238
+ )
239
+ raise content_error
240
  except Exception as e:
241
  logging.error(f"Error extracting metadata: {e}")
242
  raise HTTPException(status_code=500, detail=str(e))
 
275
 
276
  return JSONResponse(content=curriculum, status_code=200)
277
 
278
+ @app.get("/content/status/{curriculum_id}")
279
+ async def get_content_generation_status(curriculum_id: str = Path(..., description="Curriculum ID")):
280
+ """Get content generation status for a curriculum"""
281
+ status = await db.get_content_generation_status(curriculum_id)
282
+ if not status:
283
+ raise HTTPException(status_code=404, detail="Curriculum not found")
284
+
285
+ return JSONResponse(content={
286
+ "curriculum_id": status['id'],
287
+ "status": status['content_generation_status'],
288
+ "error": status['content_generation_error'],
289
+ "started_at": status['content_generation_started_at'],
290
+ "completed_at": status['content_generation_completed_at'],
291
+ "is_content_generated": bool(status['is_content_generated'])
292
+ }, status_code=200)
293
+
294
 
295
  async def _get_lesson_content_by_type(
296
  curriculum_id: str,
backend/schema.sql CHANGED
@@ -26,6 +26,10 @@ CREATE TABLE IF NOT EXISTS curricula (
26
  lesson_topic TEXT,
27
  curriculum_json TEXT NOT NULL, -- Full curriculum JSON with 25 lessons
28
  is_content_generated INTEGER DEFAULT 0, -- Boolean: has all content been generated?
 
 
 
 
29
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
30
  FOREIGN KEY (metadata_extraction_id) REFERENCES metadata_extractions(id) ON DELETE CASCADE
31
  );
@@ -65,6 +69,10 @@ SELECT
65
  c.id as curriculum_id,
66
  c.lesson_topic,
67
  c.is_content_generated,
 
 
 
 
68
  m.created_at
69
  FROM metadata_extractions m
70
  LEFT JOIN curricula c ON m.id = c.metadata_extraction_id
@@ -76,6 +84,10 @@ SELECT
76
  c.id as curriculum_id,
77
  c.user_id,
78
  c.lesson_topic,
 
 
 
 
79
  COUNT(DISTINCT lc.lesson_index) as lessons_with_content,
80
  COUNT(DISTINCT CASE WHEN lc.content_type = 'flashcards' THEN lc.lesson_index END) as lessons_with_flashcards,
81
  COUNT(DISTINCT CASE WHEN lc.content_type = 'exercises' THEN lc.lesson_index END) as lessons_with_exercises,
@@ -92,7 +104,7 @@ CREATE TABLE IF NOT EXISTS api_cache (
92
  content_json TEXT NOT NULL,
93
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
94
  PRIMARY KEY (cache_key, category)
95
- );
96
 
97
  -- Index for faster cache lookups
98
  CREATE INDEX IF NOT EXISTS idx_api_cache_key_category ON api_cache(cache_key, category);
 
26
  lesson_topic TEXT,
27
  curriculum_json TEXT NOT NULL, -- Full curriculum JSON with 25 lessons
28
  is_content_generated INTEGER DEFAULT 0, -- Boolean: has all content been generated?
29
+ content_generation_status TEXT DEFAULT 'pending' CHECK(content_generation_status IN ('pending', 'generating', 'completed', 'failed')),
30
+ content_generation_error TEXT, -- Store error message if generation fails
31
+ content_generation_started_at TIMESTAMP,
32
+ content_generation_completed_at TIMESTAMP,
33
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
34
  FOREIGN KEY (metadata_extraction_id) REFERENCES metadata_extractions(id) ON DELETE CASCADE
35
  );
 
69
  c.id as curriculum_id,
70
  c.lesson_topic,
71
  c.is_content_generated,
72
+ c.content_generation_status,
73
+ c.content_generation_error,
74
+ c.content_generation_started_at,
75
+ c.content_generation_completed_at,
76
  m.created_at
77
  FROM metadata_extractions m
78
  LEFT JOIN curricula c ON m.id = c.metadata_extraction_id
 
84
  c.id as curriculum_id,
85
  c.user_id,
86
  c.lesson_topic,
87
+ c.content_generation_status,
88
+ c.content_generation_error,
89
+ c.content_generation_started_at,
90
+ c.content_generation_completed_at,
91
  COUNT(DISTINCT lc.lesson_index) as lessons_with_content,
92
  COUNT(DISTINCT CASE WHEN lc.content_type = 'flashcards' THEN lc.lesson_index END) as lessons_with_flashcards,
93
  COUNT(DISTINCT CASE WHEN lc.content_type = 'exercises' THEN lc.lesson_index END) as lessons_with_exercises,
 
104
  content_json TEXT NOT NULL,
105
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
106
  PRIMARY KEY (cache_key, category)
107
+ ) WITHOUT ROWID;
108
 
109
  -- Index for faster cache lookups
110
  CREATE INDEX IF NOT EXISTS idx_api_cache_key_category ON api_cache(cache_key, category);
backend/utils/__pycache__/generate_completions.cpython-311.pyc CHANGED
Binary files a/backend/utils/__pycache__/generate_completions.cpython-311.pyc and b/backend/utils/__pycache__/generate_completions.cpython-311.pyc differ
 
backend/utils/__pycache__/generate_completions.cpython-312.pyc CHANGED
Binary files a/backend/utils/__pycache__/generate_completions.cpython-312.pyc and b/backend/utils/__pycache__/generate_completions.cpython-312.pyc differ