Spaces:

txh17
/

work

Runtime error

App Files Files Community

txh17 commited on Apr 21, 2025

Commit

3320745

verified ·

1 Parent(s): 5957b86

Create app.py

Browse files

Files changed (1) hide show

app.py +211 -0

app.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import gradio as gr
+from transformers import pipeline
+import torch
+from diffusers import StableDiffusionPipeline
+import soundfile as sf
+import speech_recognition as sr
+import numpy as np
+import os
+# 初始化组件
+# 使用较小的开源LLM进行提示增强
+llm_pipe = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
+# 初始化Stable Diffusion
+sd_pipe = StableDiffusionPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5",
+    torch_dtype=torch.float16
+).to("cuda" if torch.cuda.is_available() else "cpu")
+# 语音识别初始化
+recognizer = sr.Recognizer()
+def enhance_prompt(basic_prompt, style, detail_level, artist_style):
+    """使用LLM增强提示词"""
+    prompt_template = f"""
+    根据以下简短描述创建一个详细的Stable Diffusion提示:
+    原始描述: {basic_prompt}
+    风格: {style}
+    细节级别: {detail_level}
+    艺术家风格: {artist_style}
+    请生成一个包含以下元素的详细提示:
+    - 主体描述
+    - 环境/背景
+    - 光照条件
+    - 色彩风格
+    - 艺术媒介(如数字绘画、油画等)
+    - 质量描述(如4K、超详细等)
+    生成的提示:
+    """
+    enhanced_prompt = llm_pipe(
+        prompt_template,
+        max_length=200,
+        num_return_sequences=1,
+        temperature=0.7
+    )[0]['generated_text']
+    # 清理生成的文本
+    enhanced_prompt = enhanced_prompt.replace(prompt_template, "").strip()
+    return enhanced_prompt
+def generate_image(enhanced_prompt, steps, guidance_scale, seed):
+    """使用Stable Diffusion生成图像"""
+    if seed == -1:
+        seed = torch.randint(0, 2**32, (1,)).item()
+    generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed)
+    image = sd_pipe(
+        enhanced_prompt,
+        num_inference_steps=steps,
+        guidance_scale=guidance_scale,
+        generator=generator
+    ).images[0]
+    return image, seed
+def process_audio(audio):
+    """处理语音输入"""
+    sr, audio_data = audio
+    audio_array = np.array(audio_data, dtype=np.float32)
+    # 保存临时文件供语音识别使用
+    temp_file = "temp_audio.wav"
+    sf.write(temp_file, audio_array, sr)
+    with sr.AudioFile(temp_file) as source:
+        audio_data = recognizer.record(source)
+        try:
+            text = recognizer.recognize_google(audio_data, language='en-US')
+            os.remove(temp_file)
+            return text
+        except Exception as e:
+            os.remove(temp_file)
+            return f"语音识别错误: {str(e)}"
+def full_process(basic_prompt, style, detail_level, artist_style, steps, guidance_scale, seed, use_audio, audio_input):
+    """完整处理流程"""
+    # 处理语音输入
+    if use_audio and audio_input is not None:
+        basic_prompt = process_audio(audio_input)
+    # 生成增强提示
+    enhanced_prompt = enhance_prompt(basic_prompt, style, detail_level, artist_style)
+    # 生成图像
+    image, used_seed = generate_image(enhanced_prompt, steps, guidance_scale, seed)
+    return enhanced_prompt, image, used_seed
+# Gradio界面
+with gr.Blocks(title="魔法树屋图像生成器") as demo:
+    gr.Markdown("# 🎨 魔法树屋图像生成器")
+    gr.Markdown("输入简短描述或使用语音输入，生成精美图像！")
+    with gr.Row():
+        with gr.Column():
+            # 输入部分
+            use_audio = gr.Checkbox(label="使用语音输入")
+            audio_input = gr.Audio(label="录音", visible=False)
+            basic_prompt = gr.Textbox(
+                label="简短描述",
+                placeholder="例如: 天空中的魔法树屋",
+                visible=True
+            )
+            # 当复选框变化时切换输入方式
+            def toggle_input(use_audio):
+                return {
+                    basic_prompt: gr.update(visible=not use_audio),
+                    audio_input: gr.update(visible=use_audio)
+                }
+            use_audio.change(
+                toggle_input,
+                inputs=use_audio,
+                outputs=[basic_prompt, audio_input]
+            )
+            # 风格选项
+            style = gr.Dropdown(
+                label="风格",
+                choices=["现实主义", "幻想艺术", "赛博朋克", "水墨画", "卡通", "极简主义"],
+                value="幻想艺术"
+            )
+            detail_level = gr.Slider(
+                label="细节级别",
+                minimum=1,
+                maximum=5,
+                step=1,
+                value=3
+            )
+            artist_style = gr.Dropdown(
+                label="艺术家风格",
+                choices=["无", "梵高", "毕加索", "莫奈", "达利", "宫崎骏"],
+                value="无"
+            )
+            # 高级选项
+            with gr.Accordion("高级选项", open=False):
+                steps = gr.Slider(
+                    label="生成步数",
+                    minimum=20,
+                    maximum=100,
+                    step=5,
+                    value=50
+                )
+                guidance_scale = gr.Slider(
+                    label="引导尺度",
+                    minimum=1.0,
+                    maximum=20.0,
+                    step=0.5,
+                    value=7.5
+                )
+                seed = gr.Number(
+                    label="随机种子 (-1 表示随机)",
+                    value=-1
+                )
+            submit_btn = gr.Button("生成图像", variant="primary")
+        with gr.Column():
+            # 输出部分
+            enhanced_prompt = gr.Textbox(
+                label="生成的提示",
+                interactive=False
+            )
+            image_output = gr.Image(
+                label="生成的图像",
+                height=512
+            )
+            used_seed = gr.Number(
+                label="使用的种子",
+                interactive=False
+            )
+    # 连接按钮
+    submit_btn.click(
+        fn=full_process,
+        inputs=[
+            basic_prompt, style, detail_level, artist_style,
+            steps, guidance_scale, seed, use_audio, audio_input
+        ],
+        outputs=[enhanced_prompt, image_output, used_seed]
+    )
+# 对于Hugging Face Spaces，我们需要设置队列
+demo.queue()
+# 启动应用
+if __name__ == "__main__":
+    demo.launch()