Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
import sounddevice as sd # For microphone input | |
from diffusers import DiffusionPipeline | |
# Load the diffuser pipeline with LORA weights | |
pipeline = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") | |
pipeline.load_lora_weights("MdEndan/tinysketch-fine-tuned") | |
def generate_image(text): | |
"""Converts speech to text, generates an image using diffuser pipeline, | |
and displays the result.""" | |
# Speech-to-text using a pre-trained pipeline (replace with your choice) | |
speech_pipe = pipeline("automatic-speech-recognition") | |
try: | |
# Record audio from microphone (adjust duration and sample rate if needed) | |
duration = 5 # Record for 5 seconds | |
fs = 16000 # Sample rate | |
print("Speak now...") | |
myrecording = sd.rec(duration * fs, samplerate=fs, channels=1) | |
sd.wait() | |
print("Recording stopped") | |
# Convert audio to WAV for compatibility with some pipelines | |
sd.write("recording.wav", myrecording, fs) | |
# Transcribe speech | |
with open("recording.wav", "rb") as f: | |
audio_bytes = f.read() | |
speech_output = speech_pipe(audio_bytes, return_tensors="pt")["sequences"] | |
text = speech_output[0].tolist() # Extract the transcribed text | |
except Exception as e: | |
print(f"Error during speech recognition: {e}") | |
text = "Error: Speech recognition failed." | |
# Ensure text input is a string | |
if not isinstance(text, str): | |
text = str(text) | |
# Generate image using diffuser pipeline | |
try: | |
image = pipeline(text).images[0] | |
return image | |
except Exception as e: | |
print(f"Error during image generation: {e}") | |
return None | |
# Gradio interface with microphone and image display | |
interface = gr.Interface( | |
fn=generate_image, | |
inputs=gr.Audio(sources=["microphone"]), | |
outputs=gr.Image(thumbnail=True), | |
title="Speak & Create: Text-to-Image with Microphone Input (LORA)", | |
description="Speak your description and see an image generated using a fine-tuned model!", | |
) | |
# Handle potential errors during Gradio launch | |
try: | |
# Request access to the microphone (might require user permission) | |
interface.launch(share=True, capture_audio=True) | |
except Exception as e: | |
print(f"Error launching Gradio interface: {e}") | |