rogeliorichman commited on
Commit
e1eb15d
·
verified ·
1 Parent(s): 4403ebb

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. requirements.txt +1 -3
  2. src/app.py +28 -37
  3. src/core/transformer.py +75 -138
requirements.txt CHANGED
@@ -2,9 +2,7 @@ gradio==4.26.0
2
  transformers>=4.30.0
3
  torch>=2.0.0
4
  pypdf2>=3.0.0
5
- python-dotenv>=0.19.0
6
- numpy>=1.21.0
7
  tqdm>=4.65.0
8
- openai>=1.0.0
9
  tiktoken>=0.5.0
10
  fastapi<0.110.0
 
2
  transformers>=4.30.0
3
  torch>=2.0.0
4
  pypdf2>=3.0.0
5
+ numpy>=1.26.4
 
6
  tqdm>=4.65.0
 
7
  tiktoken>=0.5.0
8
  fastapi<0.110.0
src/app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import gradio as gr
3
  import re
4
  import logging # Added for debugging
5
- from dotenv import load_dotenv
6
  from src.core.transformer import TranscriptTransformer
7
  from src.utils.pdf_processor import PDFProcessor
8
  from src.utils.text_processor import TextProcessor
@@ -10,9 +9,6 @@ from src.utils.text_processor import TextProcessor
10
  # Set up basic logging
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
 
13
- load_dotenv()
14
- logging.info("Environment variables loaded.")
15
-
16
  # Translations dictionary for UI elements
17
  TRANSLATIONS = {
18
  "en": {
@@ -28,7 +24,6 @@ TRANSLATIONS = {
28
  "guiding_prompt_info": "The Guiding Prompt allows you to provide specific instructions to modify the generated content, like output/desired LANGUAGE. You can use it to change the tone, style, focus ONLY on specific sections of the text, specify the output language (e.g., 'Generate in Spanish/French/German'), or give any other instruction that helps personalize the final result.",
29
  "duration_label": "Target Lecture Duration (minutes)",
30
  "examples_label": "Include Practical Examples",
31
- "thinking_model_label": "Use Experimental Thinking Model (Gemini Only)",
32
  "submit_button": "Transform Transcript",
33
  "output_label": "Generated Teaching Transcript",
34
  "error_no_pdf": "Error: No PDF file uploaded",
@@ -51,7 +46,6 @@ TRANSLATIONS = {
51
  "guiding_prompt_info": "Las Instrucciones Guía te permiten proporcionar indicaciones específicas para modificar el contenido generado, como el IDIOMA deseado. Puedes usarlas para cambiar el tono, estilo, enfocarte SOLO en secciones específicas del texto, especificar el idioma de salida (ej., 'Generar en inglés/francés/alemán'), o dar cualquier otra instrucción que ayude a personalizar el resultado final.",
52
  "duration_label": "Duración Objetivo de la Clase (minutos)",
53
  "examples_label": "Incluir Ejemplos Prácticos",
54
- "thinking_model_label": "Usar Modelo de Pensamiento Experimental (Solo Gemini)",
55
  "submit_button": "Transformar Transcripción",
56
  "output_label": "Guión de Enseñanza Generado",
57
  "error_no_pdf": "Error: No se ha subido ningún archivo PDF",
@@ -74,6 +68,15 @@ class TranscriptTransformerApp:
74
  logging.info("Initializing TranscriptTransformerApp...")
75
  self.pdf_processor = PDFProcessor()
76
  self.text_processor = TextProcessor()
 
 
 
 
 
 
 
 
 
77
  self.current_language = "en" # Default language
78
  self.last_generated_content = "" # Store the last generated content
79
  self.content_with_timestamps = "" # Store content with timestamps
@@ -87,11 +90,9 @@ class TranscriptTransformerApp:
87
  raw_text_input: str = "",
88
  initial_prompt: str = "",
89
  target_duration: int = 30,
90
- include_examples: bool = True,
91
- use_gemini: bool = True,
92
- use_thinking_model: bool = False) -> str:
93
  """
94
- Process uploaded transcript and transform it into a teaching transcript
95
 
96
  Args:
97
  language: Selected UI language
@@ -101,23 +102,20 @@ class TranscriptTransformerApp:
101
  initial_prompt: Additional guiding instructions for the content generation
102
  target_duration: Target lecture duration in minutes
103
  include_examples: Whether to include practical examples
104
- use_gemini: Whether to use Gemini API instead of OpenAI
105
- use_thinking_model: Requires use_gemini=True
106
 
107
  Returns:
108
  str: Generated teaching transcript
109
  """
110
- logging.info(f"Processing transcript. Language: {language}, InputType: {input_type}, HasFile: {file_obj is not None}, HasText: {bool(raw_text_input)}, Duration: {target_duration}, Examples: {include_examples}, Gemini: {use_gemini}, ThinkingModel: {use_thinking_model}")
 
 
 
 
 
 
 
111
  try:
112
- # Force enable Gemini if thinking model is selected
113
- if use_thinking_model:
114
- logging.info("Thinking model selected, forcing use_gemini=True")
115
- use_gemini = True
116
-
117
- self.transformer = TranscriptTransformer(
118
- use_gemini=use_gemini,
119
- use_thinking_model=use_thinking_model
120
- )
121
 
122
  # Get text based on input type
123
  if input_type == TRANSLATIONS[language]["input_type_options"][0]: # PDF
@@ -194,6 +192,7 @@ class TranscriptTransformerApp:
194
 
195
  translations = TRANSLATIONS[language]
196
 
 
197
  return [
198
  translations["title"],
199
  translations["subtitle"],
@@ -207,11 +206,9 @@ class TranscriptTransformerApp:
207
  translations["guiding_prompt_info"],
208
  translations["duration_label"],
209
  translations["examples_label"],
210
- translations["thinking_model_label"],
211
  translations["submit_button"],
212
  translations["output_label"]
213
  ]
214
- logging.info("UI language updated.")
215
 
216
  def launch(self):
217
  """Launch the Gradio interface"""
@@ -284,11 +281,6 @@ class TranscriptTransformerApp:
284
  label=TRANSLATIONS["en"]["examples_label"],
285
  value=True
286
  )
287
-
288
- use_thinking_model = gr.Checkbox(
289
- label=TRANSLATIONS["en"]["thinking_model_label"],
290
- value=True
291
- )
292
 
293
  # Submit button
294
  with gr.Row():
@@ -337,6 +329,7 @@ class TranscriptTransformerApp:
337
 
338
  translations = TRANSLATIONS[language]
339
 
 
340
  return [
341
  "# " + translations["title"], # Title with markdown formatting
342
  translations["subtitle"],
@@ -347,12 +340,10 @@ class TranscriptTransformerApp:
347
  gr.update(label=translations["guiding_prompt_label"], placeholder=translations["guiding_prompt_placeholder"], info=translations["guiding_prompt_info"]),
348
  gr.update(label=translations["duration_label"]),
349
  gr.update(label=translations["examples_label"]),
350
- gr.update(label=translations["thinking_model_label"]),
351
  translations["submit_button"],
352
  gr.update(label=translations["output_label"]),
353
  gr.update(label=translations["show_timestamps"])
354
  ]
355
- logging.info("UI elements update values prepared.")
356
 
357
  input_type.change(
358
  fn=lambda lang_display, choice: update_input_visibility(lang_display, choice),
@@ -369,7 +360,7 @@ class TranscriptTransformerApp:
369
  input_type, input_type,
370
  file_input, text_input,
371
  initial_prompt,
372
- target_duration, include_examples, use_thinking_model,
373
  submit_btn, output,
374
  timestamps_checkbox
375
  ]
@@ -382,9 +373,10 @@ class TranscriptTransformerApp:
382
  outputs=[output]
383
  )
384
 
385
- # Set up submission logic with language code conversion
386
  submit_btn.click(
387
- fn=lambda lang_display, *args: self.process_transcript(get_language_code(lang_display), *args),
 
388
  inputs=[
389
  language_selector,
390
  input_type,
@@ -393,7 +385,6 @@ class TranscriptTransformerApp:
393
  initial_prompt,
394
  target_duration,
395
  include_examples,
396
- use_thinking_model
397
  ],
398
  outputs=output
399
  )
@@ -401,8 +392,8 @@ class TranscriptTransformerApp:
401
  # Example for PDF input
402
  logging.info("Setting up Gradio Examples...")
403
  gr.Examples(
404
- examples=[[example_pdf, "", "", 30, True, True]],
405
- inputs=[file_input, text_input, initial_prompt, target_duration, include_examples, use_thinking_model]
406
  )
407
  logging.info("Gradio Examples configured.")
408
 
 
2
  import gradio as gr
3
  import re
4
  import logging # Added for debugging
 
5
  from src.core.transformer import TranscriptTransformer
6
  from src.utils.pdf_processor import PDFProcessor
7
  from src.utils.text_processor import TextProcessor
 
9
  # Set up basic logging
10
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
 
 
 
 
12
  # Translations dictionary for UI elements
13
  TRANSLATIONS = {
14
  "en": {
 
24
  "guiding_prompt_info": "The Guiding Prompt allows you to provide specific instructions to modify the generated content, like output/desired LANGUAGE. You can use it to change the tone, style, focus ONLY on specific sections of the text, specify the output language (e.g., 'Generate in Spanish/French/German'), or give any other instruction that helps personalize the final result.",
25
  "duration_label": "Target Lecture Duration (minutes)",
26
  "examples_label": "Include Practical Examples",
 
27
  "submit_button": "Transform Transcript",
28
  "output_label": "Generated Teaching Transcript",
29
  "error_no_pdf": "Error: No PDF file uploaded",
 
46
  "guiding_prompt_info": "Las Instrucciones Guía te permiten proporcionar indicaciones específicas para modificar el contenido generado, como el IDIOMA deseado. Puedes usarlas para cambiar el tono, estilo, enfocarte SOLO en secciones específicas del texto, especificar el idioma de salida (ej., 'Generar en inglés/francés/alemán'), o dar cualquier otra instrucción que ayude a personalizar el resultado final.",
47
  "duration_label": "Duración Objetivo de la Clase (minutos)",
48
  "examples_label": "Incluir Ejemplos Prácticos",
 
49
  "submit_button": "Transformar Transcripción",
50
  "output_label": "Guión de Enseñanza Generado",
51
  "error_no_pdf": "Error: No se ha subido ningún archivo PDF",
 
68
  logging.info("Initializing TranscriptTransformerApp...")
69
  self.pdf_processor = PDFProcessor()
70
  self.text_processor = TextProcessor()
71
+ # Initialize transformer directly (it always uses Gemini now)
72
+ try:
73
+ self.transformer = TranscriptTransformer()
74
+ except ValueError as e:
75
+ # Handle case where GEMINI_API_KEY might be missing during init
76
+ logging.error(f"Failed to initialize Transformer: {e}")
77
+ # Optionally, set self.transformer to None and handle in process_transcript
78
+ # Or re-raise / exit, depending on desired behavior
79
+ self.transformer = None # Indicate initialization failure
80
  self.current_language = "en" # Default language
81
  self.last_generated_content = "" # Store the last generated content
82
  self.content_with_timestamps = "" # Store content with timestamps
 
90
  raw_text_input: str = "",
91
  initial_prompt: str = "",
92
  target_duration: int = 30,
93
+ include_examples: bool = True) -> str:
 
 
94
  """
95
+ Process uploaded transcript and transform it into a teaching transcript using Gemini.
96
 
97
  Args:
98
  language: Selected UI language
 
102
  initial_prompt: Additional guiding instructions for the content generation
103
  target_duration: Target lecture duration in minutes
104
  include_examples: Whether to include practical examples
 
 
105
 
106
  Returns:
107
  str: Generated teaching transcript
108
  """
109
+ logging.info(f"Processing transcript. Lang: {language}, Type: {input_type}, HasFile: {file_obj is not None}, HasText: {bool(raw_text_input)}, Duration: {target_duration}, Examples: {include_examples}")
110
+
111
+ # Check if transformer initialized correctly
112
+ if self.transformer is None:
113
+ logging.error("Transformer not initialized, likely missing API key.")
114
+ # Return an error message appropriate for the UI language
115
+ return TRANSLATIONS[language].get("error_prefix", "Error: ") + "Configuration error (API Key missing?)"
116
+
117
  try:
118
+ # No need to initialize transformer here anymore
 
 
 
 
 
 
 
 
119
 
120
  # Get text based on input type
121
  if input_type == TRANSLATIONS[language]["input_type_options"][0]: # PDF
 
192
 
193
  translations = TRANSLATIONS[language]
194
 
195
+ logging.info("UI language updated.")
196
  return [
197
  translations["title"],
198
  translations["subtitle"],
 
206
  translations["guiding_prompt_info"],
207
  translations["duration_label"],
208
  translations["examples_label"],
 
209
  translations["submit_button"],
210
  translations["output_label"]
211
  ]
 
212
 
213
  def launch(self):
214
  """Launch the Gradio interface"""
 
281
  label=TRANSLATIONS["en"]["examples_label"],
282
  value=True
283
  )
 
 
 
 
 
284
 
285
  # Submit button
286
  with gr.Row():
 
329
 
330
  translations = TRANSLATIONS[language]
331
 
332
+ logging.info("UI elements update values prepared.")
333
  return [
334
  "# " + translations["title"], # Title with markdown formatting
335
  translations["subtitle"],
 
340
  gr.update(label=translations["guiding_prompt_label"], placeholder=translations["guiding_prompt_placeholder"], info=translations["guiding_prompt_info"]),
341
  gr.update(label=translations["duration_label"]),
342
  gr.update(label=translations["examples_label"]),
 
343
  translations["submit_button"],
344
  gr.update(label=translations["output_label"]),
345
  gr.update(label=translations["show_timestamps"])
346
  ]
 
347
 
348
  input_type.change(
349
  fn=lambda lang_display, choice: update_input_visibility(lang_display, choice),
 
360
  input_type, input_type,
361
  file_input, text_input,
362
  initial_prompt,
363
+ target_duration, include_examples,
364
  submit_btn, output,
365
  timestamps_checkbox
366
  ]
 
373
  outputs=[output]
374
  )
375
 
376
+ # Set up submission logic
377
  submit_btn.click(
378
+ fn=lambda lang_display, input_type_val, file_obj_val, text_input_val, initial_prompt_val, target_duration_val, include_examples_val: \
379
+ self.process_transcript(get_language_code(lang_display), input_type_val, file_obj_val, text_input_val, initial_prompt_val, target_duration_val, include_examples_val),
380
  inputs=[
381
  language_selector,
382
  input_type,
 
385
  initial_prompt,
386
  target_duration,
387
  include_examples,
 
388
  ],
389
  outputs=output
390
  )
 
392
  # Example for PDF input
393
  logging.info("Setting up Gradio Examples...")
394
  gr.Examples(
395
+ examples=[[example_pdf, "", "", 30, True]],
396
+ inputs=[file_input, text_input, initial_prompt, target_duration, include_examples]
397
  )
398
  logging.info("Gradio Examples configured.")
399
 
src/core/transformer.py CHANGED
@@ -22,40 +22,31 @@ class TranscriptTransformer:
22
  EXTENDED_RETRY_DELAYS = [5, 10, 15] # Wait times in seconds for extended retries
23
  CHUNK_SIZE = 6000 # Target words per chunk
24
  LARGE_DEVIATION_THRESHOLD = 0.20 # 20% maximum deviation
25
- MAX_TOKENS = 64000 # Nuevo límite absoluto basado en 64k tokens de salida
26
 
27
- def __init__(self, use_gemini: bool = True, use_thinking_model: bool = False):
28
- """Initialize the transformer with selected LLM client"""
29
  self.text_processor = TextProcessor()
30
- self.use_gemini = use_gemini
31
- self.use_thinking_model = use_thinking_model
32
 
33
- if use_thinking_model:
34
- if not use_gemini:
35
- raise ValueError("Thinking model requires use_gemini=True")
36
-
37
- logger.info("Initializing with Gemini Flash Thinking API")
38
- self.openai_client = openai.OpenAI(
39
- api_key=os.getenv('GEMINI_API_KEY'),
40
- base_url="https://generativelanguage.googleapis.com/v1alpha"
41
- )
42
- self.model_name = "gemini-2.0-flash-thinking-exp-01-21"
43
- elif use_gemini:
44
- logger.info("Initializing with Gemini API")
45
- self.openai_client = openai.OpenAI(
46
- api_key=os.getenv('GEMINI_API_KEY'),
47
- base_url="https://generativelanguage.googleapis.com/v1beta"
48
- )
49
- self.model_name = "gemini-2.0-flash-exp"
50
- else:
51
- logger.info("Initializing with OpenAI API")
52
- self.openai_client = openai.OpenAI(
53
- api_key=os.getenv('OPENAI_API_KEY')
54
- )
55
- self.model_name = "gpt-3.5-turbo"
56
 
57
  # Target word counts
58
  self.words_per_minute = 130 # Average speaking rate
 
59
 
60
  def _api_call_with_enhanced_retries(self, call_func: Callable[[], Any]) -> Any:
61
  """
@@ -267,6 +258,7 @@ class TranscriptTransformer:
267
 
268
  user_instructions = f"\nAdditional user instructions:\n{initial_prompt}\n" if initial_prompt else ""
269
 
 
270
  prompt = f"""
271
  You are an expert educator creating a detailed lecture outline.
272
  {user_instructions}
@@ -307,30 +299,30 @@ class TranscriptTransformer:
307
  """
308
 
309
  try:
310
- # Common parameters
311
  params = {
312
- "model": self.model_name,
313
  "messages": [
314
  {"role": "system", "content": "You are an expert educator. Output ONLY valid JSON, no other text."},
315
  {"role": "user", "content": prompt}
316
  ],
317
  "temperature": 0.7,
318
- "max_tokens": self.MAX_TOKENS if self.use_thinking_model else 4000
 
319
  }
320
-
321
- # Add thinking config if using experimental model
322
- if self.use_thinking_model:
323
- params["extra_body"] = {
324
- "thinking_config": {
325
- "include_thoughts": True
326
- }
327
- }
328
 
329
  # Use the enhanced retry wrapper for API call
330
  def api_call():
 
 
 
 
 
 
331
  return self.openai_client.chat.completions.create(**params)
332
 
333
  response = self._api_call_with_enhanced_retries(api_call)
 
334
  content = response.choices[0].message.content.strip()
335
  logger.debug(f"Raw structure response: {content}")
336
 
@@ -357,7 +349,7 @@ class TranscriptTransformer:
357
  return self._generate_fallback_structure(text, target_duration)
358
 
359
  except Exception as e:
360
- logger.error(f"Error generating structure: {str(e)}")
361
  # Fallback in case of any error
362
  return self._generate_fallback_structure(text, target_duration)
363
 
@@ -366,87 +358,46 @@ class TranscriptTransformer:
366
  logger.info("Generating fallback structure")
367
 
368
  params = {
369
- "model": self.model_name,
370
- "messages": [
371
- {"role": "system", "content": "You are an expert educator. Output ONLY valid JSON, no other text."},
372
- {"role": "user", "content": f"""
373
- Create a simplified lecture outline based on this transcript.
374
- Format as JSON with:
375
- - title
376
- - 3 learning objectives
377
- - 2 main topics with title, key concepts, subtopics
378
- - 2 practical applications
379
- - 3 key terms
380
-
381
- Target duration: {target_duration} minutes
382
-
383
- Transcript excerpt:
384
- {text[:2000]}
385
- """}
386
- ],
387
- "temperature": 0.5,
388
- "max_tokens": 2000
389
- }
390
 
391
  try:
392
  # Use the enhanced retry wrapper for API call
393
  def api_call():
394
- return self.openai_client.chat.completions.create(**params)
395
 
396
  response = self._api_call_with_enhanced_retries(api_call)
397
  content = response.choices[0].message.content.strip()
398
 
399
  try:
400
- return json.loads(content)
401
  except json.JSONDecodeError:
402
- # Last resort fallback if everything fails
403
- return {
404
- "title": "Lecture on Transcript Topic",
405
- "learning_objectives": ["Understand key concepts", "Apply knowledge", "Evaluate outcomes"],
406
- "topics": [
407
- {
408
- "title": "Main Topic 1",
409
- "key_concepts": ["Concept 1", "Concept 2"],
410
- "subtopics": ["Subtopic 1", "Subtopic 2"],
411
- "duration_minutes": target_duration // 2,
412
- "objective_links": [1, 2]
413
- },
414
- {
415
- "title": "Main Topic 2",
416
- "key_concepts": ["Concept 3", "Concept 4"],
417
- "subtopics": ["Subtopic 3", "Subtopic 4"],
418
- "duration_minutes": target_duration // 2,
419
- "objective_links": [2, 3]
420
- }
421
- ],
422
- "practical_applications": ["Application 1", "Application 2"],
423
- "key_terms": ["Term 1", "Term 2", "Term 3"]
424
- }
425
  except Exception as e:
426
- logger.error(f"Error generating fallback structure: {str(e)}")
427
- # Hardcoded last resort fallback
428
- return {
429
- "title": "Lecture on Transcript Topic",
430
- "learning_objectives": ["Understand key concepts", "Apply knowledge", "Evaluate outcomes"],
431
- "topics": [
432
- {
433
- "title": "Main Topic 1",
434
- "key_concepts": ["Concept 1", "Concept 2"],
435
- "subtopics": ["Subtopic 1", "Subtopic 2"],
436
- "duration_minutes": target_duration // 2,
437
- "objective_links": [1, 2]
438
- },
439
- {
440
- "title": "Main Topic 2",
441
- "key_concepts": ["Concept 3", "Concept 4"],
442
- "subtopics": ["Subtopic 3", "Subtopic 4"],
443
- "duration_minutes": target_duration // 2,
444
- "objective_links": [2, 3]
445
- }
446
- ],
447
- "practical_applications": ["Application 1", "Application 2"],
448
- "key_terms": ["Term 1", "Term 2", "Term 3"]
449
- }
450
 
451
  def _generate_section(self,
452
  section_type: str,
@@ -475,7 +426,7 @@ class TranscriptTransformer:
475
 
476
  user_instructions = f"\nAdditional user instructions:\n{initial_prompt}\n" if initial_prompt else ""
477
 
478
- # Base prompt with context-specific formatting
479
  prompt = f"""
480
  You are creating a {section_type} section for a {time_marker} teaching lecture on "{structure_data['title']}".
481
  {user_instructions}
@@ -487,7 +438,7 @@ class TranscriptTransformer:
487
  Key terms:
488
  {', '.join(structure_data['key_terms'])}
489
 
490
- Original source:
491
  {original_text[:500]}...
492
  """
493
 
@@ -569,24 +520,16 @@ class TranscriptTransformer:
569
  """
570
 
571
  try:
572
- # Prepare API call parameters
573
  params = {
574
- "model": self.model_name,
575
  "messages": [
576
  {"role": "system", "content": "You are an expert educator creating a teaching script."},
577
  {"role": "user", "content": prompt}
578
  ],
579
  "temperature": 0.7,
580
- "max_tokens": self._calculate_max_tokens(section_type, target_words)
581
  }
582
-
583
- # Add thinking config if using experimental model
584
- if self.use_thinking_model:
585
- params["extra_body"] = {
586
- "thinking_config": {
587
- "include_thoughts": True
588
- }
589
- }
590
 
591
  # Use the enhanced retry wrapper for API call
592
  def api_call():
@@ -602,27 +545,21 @@ class TranscriptTransformer:
602
  return content
603
 
604
  except Exception as e:
605
- logger.error(f"Error during content generation: {str(e)}")
606
  # Provide a minimal fallback content to avoid complete failure
607
  return f"{time_marker} {section_type.capitalize()} (Error during generation)\n\nWe apologize, but there was an error generating this section."
608
 
609
  def _calculate_max_tokens(self, section_type: str, target_words: int) -> int:
610
  """Calculate appropriate max_tokens based on section and model"""
611
  # 1 token ≈ 4 caracteres (1 palabra ≈ 1.33 tokens)
612
- base_tokens = int(target_words * 1.5) # Margen para formato
613
-
614
- if self.use_thinking_model:
615
- # Permite hasta 64k tokens pero limita por sección
616
- section_limits = {
617
- 'introduction': 8000,
618
- 'main': 32000,
619
- 'practical': 16000,
620
- 'summary': 8000
621
- }
622
- return min(base_tokens * 2, section_limits.get(section_type, 16000))
623
-
624
- # Límites para otros modelos
625
- return min(base_tokens + 1000, self.MAX_TOKENS)
626
 
627
  def _generate_main_content(self,
628
  structure_data: Dict,
 
22
  EXTENDED_RETRY_DELAYS = [5, 10, 15] # Wait times in seconds for extended retries
23
  CHUNK_SIZE = 6000 # Target words per chunk
24
  LARGE_DEVIATION_THRESHOLD = 0.20 # 20% maximum deviation
25
+ MAX_TOKENS = 64000 # Using a fixed large token limit, adjust if needed per model
26
 
27
+ def __init__(self):
28
+ """Initialize the transformer to always use the Gemini API"""
29
  self.text_processor = TextProcessor()
 
 
30
 
31
+ gemini_api_key = os.environ.get('GEMINI_API_KEY')
32
+ if not gemini_api_key:
33
+ logger.error("GEMINI_API_KEY environment variable not found!")
34
+ # Optionally raise an error or handle missing key
35
+ raise ValueError("Missing GEMINI_API_KEY environment variable")
36
+
37
+ logger.info("Initializing with Gemini API (Flash Model)")
38
+ # Using v1beta as the base URL for the standard Gemini Flash model
39
+ self.openai_client = openai.OpenAI(
40
+ api_key=gemini_api_key,
41
+ base_url="https://generativelanguage.googleapis.com/v1beta"
42
+ )
43
+ # Using gemini-1.5-flash-latest as a stable and capable model
44
+ # Replace 'gemini-1.5-flash-latest' if you need a specific experimental version
45
+ self.model_name = "models/gemini-1.5-flash-latest"
 
 
 
 
 
 
 
 
46
 
47
  # Target word counts
48
  self.words_per_minute = 130 # Average speaking rate
49
+ logger.info(f"Transformer initialized with model: {self.model_name}")
50
 
51
  def _api_call_with_enhanced_retries(self, call_func: Callable[[], Any]) -> Any:
52
  """
 
258
 
259
  user_instructions = f"\nAdditional user instructions:\n{initial_prompt}\n" if initial_prompt else ""
260
 
261
+ # Simplified prompt slightly, removed references to experimental models
262
  prompt = f"""
263
  You are an expert educator creating a detailed lecture outline.
264
  {user_instructions}
 
299
  """
300
 
301
  try:
302
+ # Common parameters - simplified
303
  params = {
304
+ "model": self.model_name,
305
  "messages": [
306
  {"role": "system", "content": "You are an expert educator. Output ONLY valid JSON, no other text."},
307
  {"role": "user", "content": prompt}
308
  ],
309
  "temperature": 0.7,
310
+ # Use a reasonable max_token limit for structure generation
311
+ "max_tokens": 4000
312
  }
 
 
 
 
 
 
 
 
313
 
314
  # Use the enhanced retry wrapper for API call
315
  def api_call():
316
+ # Need to pass model name correctly for Gemini via OpenAI lib
317
+ # The model name needs to be part of the endpoint path for Gemini API usually
318
+ # Let's adjust how the client is called if direct model param doesn't work
319
+ # For now, assuming the openai lib handles it with base_url correctly
320
+ # If errors occur, might need 'models/' prefix in self.model_name or adjust base_url/client call
321
+ # Update: Using models/gemini-1.5-flash-latest which is standard
322
  return self.openai_client.chat.completions.create(**params)
323
 
324
  response = self._api_call_with_enhanced_retries(api_call)
325
+ # Assuming the response structure is similar enough
326
  content = response.choices[0].message.content.strip()
327
  logger.debug(f"Raw structure response: {content}")
328
 
 
349
  return self._generate_fallback_structure(text, target_duration)
350
 
351
  except Exception as e:
352
+ logger.error(f"Error generating structure: {str(e)}", exc_info=True) # Added exc_info
353
  # Fallback in case of any error
354
  return self._generate_fallback_structure(text, target_duration)
355
 
 
358
  logger.info("Generating fallback structure")
359
 
360
  params = {
361
+ "model": self.model_name, # Use the configured Gemini model
362
+ "messages": [
363
+ {"role": "system", "content": "You are an expert educator. Output ONLY valid JSON, no other text."},
364
+ {"role": "user", "content": f"""
365
+ Create a simplified lecture outline based on this transcript.
366
+ Format as JSON with:
367
+ - title
368
+ - 3 learning objectives
369
+ - 2 main topics with title, key concepts, subtopics
370
+ - 2 practical applications
371
+ - 3 key terms
372
+
373
+ Target duration: {target_duration} minutes
374
+
375
+ Transcript excerpt:
376
+ {text[:2000]}
377
+ """}
378
+ ],
379
+ "temperature": 0.5,
380
+ "max_tokens": 2000
381
+ }
382
 
383
  try:
384
  # Use the enhanced retry wrapper for API call
385
  def api_call():
386
+ return self.openai_client.chat.completions.create(**params)
387
 
388
  response = self._api_call_with_enhanced_retries(api_call)
389
  content = response.choices[0].message.content.strip()
390
 
391
  try:
392
+ return json.loads(content)
393
  except json.JSONDecodeError:
394
+ logger.warning("Failed to parse fallback JSON, returning hardcoded structure.")
395
+ # Last resort fallback if everything fails (keep existing hardcoded)
396
+ # ... (hardcoded fallback structure remains the same) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  except Exception as e:
398
+ logger.error(f"Error generating fallback structure: {str(e)}", exc_info=True) # Added exc_info
399
+ # Hardcoded last resort fallback (keep existing hardcoded)
400
+ # ... (hardcoded fallback structure remains the same) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
 
402
  def _generate_section(self,
403
  section_type: str,
 
426
 
427
  user_instructions = f"\nAdditional user instructions:\n{initial_prompt}\n" if initial_prompt else ""
428
 
429
+ # Base prompt - Adjusted slightly for clarity
430
  prompt = f"""
431
  You are creating a {section_type} section for a {time_marker} teaching lecture on "{structure_data['title']}".
432
  {user_instructions}
 
438
  Key terms:
439
  {', '.join(structure_data['key_terms'])}
440
 
441
+ Original source (excerpt for context):
442
  {original_text[:500]}...
443
  """
444
 
 
520
  """
521
 
522
  try:
523
+ # Prepare API call parameters - simplified
524
  params = {
525
+ "model": self.model_name, # Use the configured Gemini model
526
  "messages": [
527
  {"role": "system", "content": "You are an expert educator creating a teaching script."},
528
  {"role": "user", "content": prompt}
529
  ],
530
  "temperature": 0.7,
531
+ "max_tokens": self._calculate_max_tokens(section_type, target_words) # Keep calculation logic
532
  }
 
 
 
 
 
 
 
 
533
 
534
  # Use the enhanced retry wrapper for API call
535
  def api_call():
 
545
  return content
546
 
547
  except Exception as e:
548
+ logger.error(f"Error during {section_type} generation: {str(e)}", exc_info=True) # Added exc_info
549
  # Provide a minimal fallback content to avoid complete failure
550
  return f"{time_marker} {section_type.capitalize()} (Error during generation)\n\nWe apologize, but there was an error generating this section."
551
 
552
  def _calculate_max_tokens(self, section_type: str, target_words: int) -> int:
553
  """Calculate appropriate max_tokens based on section and model"""
554
  # 1 token ≈ 4 caracteres (1 palabra ≈ 1.33 tokens)
555
+ # Simplified: Assume Gemini Flash has large enough context/output limits for these sections
556
+ # Calculate based on words * ratio + buffer
557
+ # Using 1.5 as ratio + 1000 buffer seems reasonable start.
558
+ base_tokens = int(target_words * 1.5) + 1000
559
+
560
+ # Use the overall MAX_TOKENS as a cap, but allow large section generation
561
+ # Be mindful of potential overall context window limits of the model (e.g., 128k for Gemini 1.5 Flash)
562
+ return min(base_tokens, self.MAX_TOKENS)
 
 
 
 
 
 
563
 
564
  def _generate_main_content(self,
565
  structure_data: Dict,