Spaces:

HardbanRecordsLab
/

diy-course-app

Runtime error

App Files Files Community

HardbanRecordsLab commited on Aug 7

Commit

5703e10

verified ·

1 Parent(s): 8561786

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -58

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ==============================================================================
-# KROK NAPRAWCZY: Wymuszona instalacja brakujących bibliotek
 # ==============================================================================
 import os
 import subprocess
@@ -25,7 +25,7 @@ if 'PACKAGES_INSTALLED' not in os.environ:
     os.environ['PACKAGES_INSTALLED'] = 'TRUE'
 # ==============================================================================
-# Reszta kodu aplikacji
 # ==============================================================================
 import gradio as gr
 from transformers import pipeline as text_pipeline
@@ -34,7 +34,6 @@ import torch
 import re
 from fpdf import FPDF
 from PIL import Image
-import io
 import time
 from datasets import Dataset, Features, Value, Sequence, Image as HFImage
 from gradio_client import Client
@@ -44,7 +43,7 @@ import numpy as np
 from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip, CompositeAudioClip
 from speechbrain.pretrained import EncoderClassifier
-# --- Konfiguracja Modeli i Danych ---
 LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 IMAGE_MODEL = "stabilityai/stable-diffusion-2-1-base"
 TTS_MODEL = "microsoft/speecht5_tts"
@@ -52,53 +51,46 @@ VOCODER_MODEL = "microsoft/speecht5_hifigan"
 SPEAKER_EMBEDDING_MODEL = "speechbrain/spkrec-xvect-voxceleb"
 DATASET_PATH = "saved_courses"
 TEXT_TO_3D_MODEL_SPACE = "stabilityai/TripoSR"
-# --- Ładowanie Modeli i Klientów ---
 device = "cuda" if torch.cuda.is_available() else "cpu"
-try:
-    print("Ładowanie modelu językowego..."); text_generator = text_pipeline("text-generation", model=LLM_MODEL, torch_dtype=torch.bfloat16, device_map="auto"); LLM_LOADED = True; print("Model językowy załadowany.")
-except Exception as e: print(f"Błąd ładowania LLM: {e}"); text_generator = None; LLM_LOADED = False
-try:
-    print("Ładowanie modelu obrazkowego...")
-    image_generator = DiffusionPipeline.from_pretrained(IMAGE_MODEL)
-    image_generator.to(device)
-    IMAGE_MODEL_LOADED = True
-    print("Model obrazkowy załadowany.")
-except Exception as e:
-    print(f"Błąd ładowania Image Model: {e}"); image_generator = None; IMAGE_MODEL_LOADED = False
-try:
-    print("Inicjalizacja klienta 3D..."); client_3d = Client(TEXT_TO_3D_MODEL_SPACE); CLIENT_3D_LOADED = True; print("Klient 3D gotowy.")
-except Exception as e: print(f"Błąd inicjalizacji klienta 3D: {e}"); client_3d = None; CLIENT_3D_LOADED = False
-try:
-    print("Ładowanie modeli TTS...")
-    tts_processor = SpeechT5Processor.from_pretrained(TTS_MODEL)
-    tts_model = SpeechT5ForTextToSpeech.from_pretrained(TTS_MODEL).to(device)
-    vocoder = SpeechT5HifiGan.from_pretrained(VOCODER_MODEL).to(device)
-    spk_model_source = SPEAKER_EMBEDDING_MODEL
-    speaker_model = EncoderClassifier.from_hparams(source=spk_model_source, savedir=os.path.join('/tmp', spk_model_source))
-    dummy_waveform = torch.randn(1, 16000).to(device)
-    speaker_embeddings = speaker_model.encode_batch(dummy_waveform).squeeze().to(device)
-    TTS_LOADED = True
-    print("Modele TTS załadowane.")
-except Exception as e:
-    print(f"Błąd ładowania modeli TTS: {e}"); TTS_LOADED = False
-# --- Funkcje Aplikacji ---
 def list_saved_projects():
-    if not os.path.exists(DATASET_PATH):
-        return []
     try:
         dataset = Dataset.load_from_disk(DATASET_PATH)
         return dataset["project_name"]
     except Exception as e:
-        print(f"Błąd odczytu zapisanych projektów: {e}")
-        return []
 def parse_course_to_structure(markdown_text):
     course_data = {'title': "Nowy Kurs", 'steps': []}
@@ -112,10 +104,11 @@ def parse_course_to_structure(markdown_text):
 def generate_course_structure_and_images(topic, progress=gr.Progress(track_tqdm=True)):
     if not LLM_LOADED or not IMAGE_MODEL_LOADED:
         return [None, gr.State([])] + [gr.update(visible=False)]*2 + [gr.update(interactive=False)]*3 + [gr.update()]*11
     progress(0, desc="Generowanie tekstu kursu...")
-    prompt = f"<|system|>\nJesteś ekspertem w tworzeniu kursów online. Twoim zadaniem jest stworzenie zwięzłego, 5-etapowego planu kursu DIY na podany temat. Wygeneruj odpowiedź w formacie Markdown, która zawiera: 1. Chwytliwy tytuł kursu (jako nagłówek H1). 2. Pięć ponumerowanych kroków kursu. Każdy krok powinien mieć tytuł (pogrubiony) i krótki, 2-3 zdaniowy opis. Nie dodawaj żadnych wstępów, podsumowań ani dodatkowych komentarzy.</s>\n<|user|>\nTemat kursu: \"{topic}\"</s>\n<|assistant|>"
     response = text_generator(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_p=0.95)
     course_text = response[0]['generated_text'].split('<|assistant|>')[-1].strip()
     course_data = parse_course_to_structure(course_text)
@@ -134,10 +127,11 @@ def generate_course_structure_and_images(topic, progress=gr.Progress(track_tqdm=
         if i < len(course_data['steps']):
             updates.extend([gr.update(value=course_data['steps'][i]['title']), gr.update(value=course_data['steps'][i]['description'])])
         else:
-            updates.extend([gr.update(value=""), gr.update(value="")])
     return generated_images, gr.State(generated_images), gr.update(visible=True), gr.update(visible=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), *updates
 def load_project(project_name):
     if not project_name:
         gr.Warning("Wybierz projekt do wczytania.")
@@ -154,7 +148,7 @@ def load_project(project_name):
             step = project_data['steps'][i]
             updates.extend([gr.update(value=step['title']), gr.update(value=step['description'])])
         else:
-            updates.extend([gr.update(value=""), gr.update(value="")])
     return loaded_images, gr.State(loaded_images), gr.update(visible=True), gr.update(visible=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), *updates
@@ -288,16 +282,7 @@ def generate_pdf_from_ui(images_state, course_title, *args):
     return pdf_output_path
 # --- Budowa Interfejsu Gradio ---
-custom_css = """
-body, #root { height: 100vh; margin: 0; padding: 0; }
-.gradio-container { max-width: 100% !important; }
-.main, .wrap { height: 100%; }
-.gap { height: 100%; }
-.app-title { text-align: center; font-size: 2.5em; color: #2c3e50; font-weight: bold; margin-bottom: 0px !important; }
-.app-subtitle { text-align: center; font-size: 1.1em; color: #576574; margin-top: 5px; margin-bottom: 20px; }
-.section-title { font-size: 1.5em; font-weight: bold; color: #2980b9; border-bottom: 2px solid #3498db; padding-bottom: 5px; margin-top: 10px; }
-footer { display: none !important; }
-"""
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue"), title="Kreator Kursów AI", css=custom_css) as demo:
@@ -408,5 +393,8 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue"), ti
         inputs=[images_state] + editor_text_fields,
         outputs=[video_output]
     )
-demo.launch(show_api=False)

 # ==============================================================================
+# Wymuszona instalacja bibliotek
 # ==============================================================================
 import os
 import subprocess
     os.environ['PACKAGES_INSTALLED'] = 'TRUE'
 # ==============================================================================
+# Główny kod aplikacji
 # ==============================================================================
 import gradio as gr
 from transformers import pipeline as text_pipeline
 import re
 from fpdf import FPDF
 from PIL import Image
 import time
 from datasets import Dataset, Features, Value, Sequence, Image as HFImage
 from gradio_client import Client
 from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip, CompositeAudioClip
 from speechbrain.pretrained import EncoderClassifier
+# --- Konfiguracja ---
 LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 IMAGE_MODEL = "stabilityai/stable-diffusion-2-1-base"
 TTS_MODEL = "microsoft/speecht5_tts"
 SPEAKER_EMBEDDING_MODEL = "speechbrain/spkrec-xvect-voxceleb"
 DATASET_PATH = "saved_courses"
 TEXT_TO_3D_MODEL_SPACE = "stabilityai/TripoSR"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# --- Globalne zmienne dla modeli ---
+text_generator, image_generator, client_3d, tts_processor, tts_model, vocoder, speaker_embeddings = (None,) * 7
+LLM_LOADED, IMAGE_MODEL_LOADED, CLIENT_3D_LOADED, TTS_LOADED = (False,) * 4
+# --- Funkcje ładowania modeli ---
+def load_models():
+    global text_generator, image_generator, client_3d, tts_processor, tts_model, vocoder, speaker_embeddings
+    global LLM_LOADED, IMAGE_MODEL_LOADED, CLIENT_3D_LOADED, TTS_LOADED
+    try:
+        print("Ładowanie modelu językowego..."); text_generator = text_pipeline("text-generation", model=LLM_MODEL, torch_dtype=torch.bfloat16, device_map="auto"); LLM_LOADED = True; print("Model językowy załadowany.")
+    except Exception as e: print(f"Błąd ładowania LLM: {e}")
+    try:
+        print("Ładowanie modelu obrazkowego..."); image_generator = DiffusionPipeline.from_pretrained(IMAGE_MODEL); image_generator.to(device); IMAGE_MODEL_LOADED = True; print("Model obrazkowy załadowany.")
+    except Exception as e: print(f"Błąd ładowania Image Model: {e}")
+    try:
+        print("Inicjalizacja klienta 3D..."); client_3d = Client(TEXT_TO_3D_MODEL_SPACE); CLIENT_3D_LOADED = True; print("Klient 3D gotowy.")
+    except Exception as e: print(f"Błąd inicjalizacji klienta 3D: {e}")
+    try:
+        print("Ładowanie modeli TTS...")
+        tts_processor = SpeechT5Processor.from_pretrained(TTS_MODEL)
+        tts_model = SpeechT5ForTextToSpeech.from_pretrained(TTS_MODEL).to(device)
+        vocoder = SpeechT5HifiGan.from_pretrained(VOCODER_MODEL).to(device)
+        speaker_model = EncoderClassifier.from_hparams(source=SPEAKER_EMBEDDING_MODEL, savedir=os.path.join('/tmp', SPEAKER_EMBEDDING_MODEL))
+        dummy_waveform = torch.randn(1, 16000).to(device)
+        speaker_embeddings = speaker_model.encode_batch(dummy_waveform).squeeze().to(device)
+        TTS_LOADED = True
+        print("Modele TTS załadowane.")
+    except Exception as e: print(f"Błąd ładowania modeli TTS: {e}")
+# --- Funkcje pomocnicze i główne ---
 def list_saved_projects():
+    if not os.path.exists(DATASET_PATH): return []
     try:
         dataset = Dataset.load_from_disk(DATASET_PATH)
         return dataset["project_name"]
     except Exception as e:
+        print(f"Błąd odczytu projektów: {e}"); return []
 def parse_course_to_structure(markdown_text):
     course_data = {'title': "Nowy Kurs", 'steps': []}
 def generate_course_structure_and_images(topic, progress=gr.Progress(track_tqdm=True)):
     if not LLM_LOADED or not IMAGE_MODEL_LOADED:
+        gr.Error("Kluczowe modele AI nie zostały załadowane. Sprawdź logi.")
         return [None, gr.State([])] + [gr.update(visible=False)]*2 + [gr.update(interactive=False)]*3 + [gr.update()]*11
     progress(0, desc="Generowanie tekstu kursu...")
+    prompt = f"<|system|>\nJesteś ekspertem w tworzeniu kursów online...</s>\n<|user|>\nTemat kursu: \"{topic}\"</s>\n<|assistant|>"
     response = text_generator(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_p=0.95)
     course_text = response[0]['generated_text'].split('<|assistant|>')[-1].strip()
     course_data = parse_course_to_structure(course_text)
         if i < len(course_data['steps']):
             updates.extend([gr.update(value=course_data['steps'][i]['title']), gr.update(value=course_data['steps'][i]['description'])])
         else:
+            updates.extend(["", ""])
     return generated_images, gr.State(generated_images), gr.update(visible=True), gr.update(visible=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), *updates
+# ... (pozostałe funkcje: load_project, update_preview, move_step, etc. bez zmian) ...
 def load_project(project_name):
     if not project_name:
         gr.Warning("Wybierz projekt do wczytania.")
             step = project_data['steps'][i]
             updates.extend([gr.update(value=step['title']), gr.update(value=step['description'])])
         else:
+            updates.extend(["", ""])
     return loaded_images, gr.State(loaded_images), gr.update(visible=True), gr.update(visible=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), *updates
     return pdf_output_path
 # --- Budowa Interfejsu Gradio ---
+custom_css = "body, #root { height: 100vh; margin: 0; padding: 0; } .gradio-container { max-width: 100% !important; } .main, .wrap, .gap { height: 100%; } .app-title { text-align: center; font-size: 2.5em; color: #2c3e50; font-weight: bold; margin-bottom: 0px !important; } .app-subtitle { text-align: center; font-size: 1.1em; color: #576574; margin-top: 5px; margin-bottom: 20px; } .section-title { font-size: 1.5em; font-weight: bold; color: #2980b9; border-bottom: 2px solid #3498db; padding-bottom: 5px; margin-top: 10px; } footer { display: none !important; }"
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue"), title="Kreator Kursów AI", css=custom_css) as demo:
         inputs=[images_state] + editor_text_fields,
         outputs=[video_output]
     )
+# Uruchomienie aplikacji po załadowaniu modeli
+demo.queue().launch(show_api=False)
+# Wywołanie funkcji ładującej modele po zdefiniowaniu interfejsu
+load_models()