Spaces:

Taf2023
/

anycoder-d58beb53

Sleeping

App Files Files Community

Taf2023 commited on 22 days ago

Commit

7962c79

verified ·

1 Parent(s): 29a8579

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

app.py +75 -0
requirements.txt +16 -0
utils.py +93 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import gradio as gr
+import os
+from utils import process_pipeline
+# Define the UI
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # Header
+    with gr.Row():
+        gr.Markdown(
+            """
+            # 🇹🇭 Thai to AI Media Generator
+            ### แปลงข้อความไทยเป็นภาพและเสียงด้วย AI (Translate Thai -> Image & Audio)
+            [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
+            """
+        )
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Input Section
+            input_text = gr.Textbox(
+                label="ใส่ข้อความภาษาไทย (Input Thai Text)",
+                placeholder="ตัวอย่าง: แมวน่ารักใส่แว่นกันแดดนั่งอยู่บนชายหาด",
+                lines=3
+            )
+            style_dropdown = gr.Dropdown(
+                choices=["None", "Cinematic", "Anime", "3D Model", "Oil Painting", "Pixel Art"],
+                value="None",
+                label="สไตล์ภาพ (Image Style)"
+            )
+            submit_btn = gr.Button("✨ สร้างผลงาน (Generate)", variant="primary", size="lg")
+            gr.Markdown(
+                """
+                **หมายเหตุ:**
+                - ระบบจะใช้โมเดลฟรีบน Hugging Face
+                - การประมวลผลอาจใช้เวลา 10-30 วินาทีขึ้นอยู่กับความหนาแน่นของ Server
+                """
+            )
+        with gr.Column(scale=1):
+            # Output Section
+            with gr.Group():
+                gr.Markdown("### 1. ผลลัพธ์การแปล (Translation)")
+                output_translation = gr.Textbox(label="English Translation", interactive=False)
+            with gr.Group():
+                gr.Markdown("### 2. ภาพที่สร้างได้ (Generated Image)")
+                output_image = gr.Image(label="AI Image", type="pil")
+            with gr.Group():
+                gr.Markdown("### 3. เสียงบรรยาย (Generated Audio)")
+                output_audio = gr.Audio(label="AI Speech", type="filepath")
+    # Logic Connection
+    submit_btn.click(
+        fn=process_pipeline,
+        inputs=[input_text, style_dropdown],
+        outputs=[output_translation, output_image, output_audio]
+    )
+    # Add examples
+    gr.Examples(
+        examples=[
+            ["เด็กผู้หญิงใส่ชุดไทยยืนอยู่หน้าวัดอรุณ", "Cinematic"],
+            ["หุ่นยนต์แห่งโลกอนาคตกำลังทำอาหารในครัว", "3D Model"],
+            ["ป่ามหัศจรรย์ที่มีต้นไม้เรืองแสง", "Oil Painting"]
+        ],
+        inputs=[input_text, style_dropdown]
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+gradio
+requests
+Pillow
+numpy
+pandas
+matplotlib
+seaborn
+scikit-learn
+torch
+torchvision
+torchaudio
+openpyxl
+python-docx
+PyPDF2
+tqdm
+joblib

utils.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import os
+from huggingface_hub import InferenceClient
+import tempfile
+import uuid
+# Initialize the client
+# We rely on the free tier which works for these specific models without a token locally,
+# but in production/Spaces, it uses the environment's token automatically.
+client = InferenceClient()
+# Define Models
+TRANSLATION_MODEL = "Helsinki-NLP/opus-mt-th-en"
+IMAGE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
+AUDIO_MODEL = "facebook/mms-tts-eng"
+def translate_text(text):
+    """Translates Thai text to English."""
+    try:
+        if not text.strip():
+            return ""
+        # Using the translation API
+        result = client.translation(text, model=TRANSLATION_MODEL)
+        # The API usually returns [{'translation_text': '...'}] or similar object
+        if hasattr(result, 'translation_text'):
+             return result.translation_text
+        return result[0]['translation_text']
+    except Exception as e:
+        print(f"Translation Error: {e}")
+        return f"Error translating: {text}"
+def generate_image(prompt, style):
+    """Generates an image from text."""
+    try:
+        # Enhance prompt based on style
+        enhanced_prompt = prompt
+        if style == "Cinematic":
+            enhanced_prompt += ", cinematic lighting, highly detailed, photorealistic, 8k"
+        elif style == "Anime":
+            enhanced_prompt += ", anime style, japanese animation, vibrant colors"
+        elif style == "3D Model":
+            enhanced_prompt += ", 3d render, blender, unreal engine 5, isometric"
+        elif style == "Oil Painting":
+            enhanced_prompt += ", oil painting, textured, artistic, van gogh style"
+        elif style == "Pixel Art":
+            enhanced_prompt += ", pixel art, 16-bit, retro game style"
+        image = client.text_to_image(
+            enhanced_prompt,
+            model=IMAGE_MODEL
+        )
+        return image
+    except Exception as e:
+        print(f"Image Generation Error: {e}")
+        return None
+def generate_audio(text):
+    """Generates audio from English text."""
+    try:
+        # Generate audio bytes
+        audio_bytes = client.text_to_speech(
+            text,
+            model=AUDIO_MODEL
+        )
+        # Save to a temporary file
+        temp_dir = tempfile.gettempdir()
+        filename = f"{uuid.uuid4()}.flac"
+        filepath = os.path.join(temp_dir, filename)
+        with open(filepath, "wb") as f:
+            f.write(audio_bytes)
+        return filepath
+    except Exception as e:
+        print(f"Audio Generation Error: {e}")
+        return None
+def process_pipeline(thai_text, style):
+    """Main function to orchestrate the flow."""
+    if not thai_text:
+        return "Please enter text.", None, None
+    print(f"Processing: {thai_text}")
+    # Step 1: Translate
+    eng_text = translate_text(thai_text)
+    # Step 2 & 3: Generate Image and Audio (can be done in parallel ideally, but sequential here for simplicity)
+    image = generate_image(eng_text, style)
+    audio_path = generate_audio(eng_text)
+    return eng_text, image, audio_path