Taf2023 commited on
Commit
7962c79
·
verified ·
1 Parent(s): 29a8579

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app.py +75 -0
  2. requirements.txt +16 -0
  3. utils.py +93 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from utils import process_pipeline
4
+
5
+ # Define the UI
6
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
7
+
8
+ # Header
9
+ with gr.Row():
10
+ gr.Markdown(
11
+ """
12
+ # 🇹🇭 Thai to AI Media Generator
13
+ ### แปลงข้อความไทยเป็นภาพและเสียงด้วย AI (Translate Thai -> Image & Audio)
14
+ [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
15
+ """
16
+ )
17
+
18
+ with gr.Row():
19
+ with gr.Column(scale=1):
20
+ # Input Section
21
+ input_text = gr.Textbox(
22
+ label="ใส่ข้อความภาษาไทย (Input Thai Text)",
23
+ placeholder="ตัวอย่าง: แมวน่ารักใส่แว่นกันแดดนั่งอยู่บนชายหาด",
24
+ lines=3
25
+ )
26
+
27
+ style_dropdown = gr.Dropdown(
28
+ choices=["None", "Cinematic", "Anime", "3D Model", "Oil Painting", "Pixel Art"],
29
+ value="None",
30
+ label="สไตล์ภาพ (Image Style)"
31
+ )
32
+
33
+ submit_btn = gr.Button("✨ สร้างผลงาน (Generate)", variant="primary", size="lg")
34
+
35
+ gr.Markdown(
36
+ """
37
+ **หมายเหตุ:**
38
+ - ระบบจะใช้โมเดลฟรีบน Hugging Face
39
+ - การประมวลผลอาจใช้เวลา 10-30 วินาทีขึ้นอยู่กับความหนาแน่นของ Server
40
+ """
41
+ )
42
+
43
+ with gr.Column(scale=1):
44
+ # Output Section
45
+ with gr.Group():
46
+ gr.Markdown("### 1. ผลลัพธ์การแปล (Translation)")
47
+ output_translation = gr.Textbox(label="English Translation", interactive=False)
48
+
49
+ with gr.Group():
50
+ gr.Markdown("### 2. ภาพที่สร้างได้ (Generated Image)")
51
+ output_image = gr.Image(label="AI Image", type="pil")
52
+
53
+ with gr.Group():
54
+ gr.Markdown("### 3. เสียงบรรยาย (Generated Audio)")
55
+ output_audio = gr.Audio(label="AI Speech", type="filepath")
56
+
57
+ # Logic Connection
58
+ submit_btn.click(
59
+ fn=process_pipeline,
60
+ inputs=[input_text, style_dropdown],
61
+ outputs=[output_translation, output_image, output_audio]
62
+ )
63
+
64
+ # Add examples
65
+ gr.Examples(
66
+ examples=[
67
+ ["เด็กผู้หญิงใส่ชุดไทยยืนอยู่หน้าวัดอรุณ", "Cinematic"],
68
+ ["หุ่นยนต์แห่งโลกอนาคตกำลังทำอาหารในครัว", "3D Model"],
69
+ ["ป่ามหัศจรรย์ที่มีต้นไม้เรืองแสง", "Oil Painting"]
70
+ ],
71
+ inputs=[input_text, style_dropdown]
72
+ )
73
+
74
+ if __name__ == "__main__":
75
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ requests
3
+ Pillow
4
+ numpy
5
+ pandas
6
+ matplotlib
7
+ seaborn
8
+ scikit-learn
9
+ torch
10
+ torchvision
11
+ torchaudio
12
+ openpyxl
13
+ python-docx
14
+ PyPDF2
15
+ tqdm
16
+ joblib
utils.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import InferenceClient
3
+ import tempfile
4
+ import uuid
5
+
6
+ # Initialize the client
7
+ # We rely on the free tier which works for these specific models without a token locally,
8
+ # but in production/Spaces, it uses the environment's token automatically.
9
+ client = InferenceClient()
10
+
11
+ # Define Models
12
+ TRANSLATION_MODEL = "Helsinki-NLP/opus-mt-th-en"
13
+ IMAGE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
14
+ AUDIO_MODEL = "facebook/mms-tts-eng"
15
+
16
+ def translate_text(text):
17
+ """Translates Thai text to English."""
18
+ try:
19
+ if not text.strip():
20
+ return ""
21
+
22
+ # Using the translation API
23
+ result = client.translation(text, model=TRANSLATION_MODEL)
24
+ # The API usually returns [{'translation_text': '...'}] or similar object
25
+ if hasattr(result, 'translation_text'):
26
+ return result.translation_text
27
+ return result[0]['translation_text']
28
+ except Exception as e:
29
+ print(f"Translation Error: {e}")
30
+ return f"Error translating: {text}"
31
+
32
+ def generate_image(prompt, style):
33
+ """Generates an image from text."""
34
+ try:
35
+ # Enhance prompt based on style
36
+ enhanced_prompt = prompt
37
+ if style == "Cinematic":
38
+ enhanced_prompt += ", cinematic lighting, highly detailed, photorealistic, 8k"
39
+ elif style == "Anime":
40
+ enhanced_prompt += ", anime style, japanese animation, vibrant colors"
41
+ elif style == "3D Model":
42
+ enhanced_prompt += ", 3d render, blender, unreal engine 5, isometric"
43
+ elif style == "Oil Painting":
44
+ enhanced_prompt += ", oil painting, textured, artistic, van gogh style"
45
+ elif style == "Pixel Art":
46
+ enhanced_prompt += ", pixel art, 16-bit, retro game style"
47
+
48
+ image = client.text_to_image(
49
+ enhanced_prompt,
50
+ model=IMAGE_MODEL
51
+ )
52
+ return image
53
+ except Exception as e:
54
+ print(f"Image Generation Error: {e}")
55
+ return None
56
+
57
+ def generate_audio(text):
58
+ """Generates audio from English text."""
59
+ try:
60
+ # Generate audio bytes
61
+ audio_bytes = client.text_to_speech(
62
+ text,
63
+ model=AUDIO_MODEL
64
+ )
65
+
66
+ # Save to a temporary file
67
+ temp_dir = tempfile.gettempdir()
68
+ filename = f"{uuid.uuid4()}.flac"
69
+ filepath = os.path.join(temp_dir, filename)
70
+
71
+ with open(filepath, "wb") as f:
72
+ f.write(audio_bytes)
73
+
74
+ return filepath
75
+ except Exception as e:
76
+ print(f"Audio Generation Error: {e}")
77
+ return None
78
+
79
+ def process_pipeline(thai_text, style):
80
+ """Main function to orchestrate the flow."""
81
+ if not thai_text:
82
+ return "Please enter text.", None, None
83
+
84
+ print(f"Processing: {thai_text}")
85
+
86
+ # Step 1: Translate
87
+ eng_text = translate_text(thai_text)
88
+
89
+ # Step 2 & 3: Generate Image and Audio (can be done in parallel ideally, but sequential here for simplicity)
90
+ image = generate_image(eng_text, style)
91
+ audio_path = generate_audio(eng_text)
92
+
93
+ return eng_text, image, audio_path