|
import gradio as gr |
|
import requests |
|
import random |
|
import json |
|
import os |
|
from together import Together |
|
|
|
|
|
TOGETHER_API_KEY = os.environ.get('TOGETHER_API_KEY') |
|
|
|
|
|
client = Together(api_key=TOGETHER_API_KEY) |
|
|
|
|
|
PIXABAY_API_KEY = os.environ.get('PIXABAY_API_KEY') |
|
IMAGE_API_URL = 'https://pixabay.com/api/' |
|
PER_PAGE = 5 |
|
|
|
def image_to_url(image_path): |
|
""" |
|
Uploads an image to a temporary hosting service and returns its URL. |
|
|
|
Args: |
|
image_path (str): The path to the image file to be uploaded. |
|
|
|
Returns: |
|
str: The URL of the uploaded image, or an error message if the upload fails. |
|
""" |
|
try: |
|
url = 'https://uguu.se/upload' |
|
|
|
with open(image_path, 'rb') as f: |
|
files = {'files[]': (image_path, f)} |
|
response = requests.post(url, files=files) |
|
response_json = response.json() |
|
return response_json['files'][0]['url'] |
|
except FileNotFoundError: |
|
return "Error: File not found. Please check the image path." |
|
except Exception as e: |
|
return f"An error occurred: {e}" |
|
|
|
def search_pixabay_images(query: str = "", image_type: str = "all", orientation: str = "all"): |
|
""" |
|
Searches the Pixabay API for royalty-free stock images based on user's query and filters. |
|
|
|
Args: |
|
query (str): The search term for finding stock images. |
|
image_type (str): Filter results by image type. Accepted values: "all", "photo", "illustration", "vector". |
|
orientation (str): Filter results by image orientation. Accepted values: "all", "horizontal", "vertical". |
|
|
|
Returns: |
|
str: URL of the found image or status/error message |
|
""" |
|
if not query: |
|
return "Please enter a search query." |
|
|
|
if not PIXABAY_API_KEY: |
|
return "Pixabay API Key not found. Please set the PIXABAY_API_KEY environment variable." |
|
|
|
params = { |
|
'key': PIXABAY_API_KEY, |
|
'q': query, |
|
'per_page': PER_PAGE, |
|
'page': 1, |
|
'safesearch': 'true', |
|
'image_type': image_type, |
|
'orientation': orientation |
|
} |
|
|
|
try: |
|
response = requests.get(IMAGE_API_URL, params=params) |
|
response.raise_for_status() |
|
data = response.json() |
|
|
|
if data.get('totalHits', 0) == 0: |
|
return f"No image results found for '{query}'." |
|
|
|
hits = data.get('hits', []) |
|
if not hits: |
|
return f"No image results found for '{query}'." |
|
|
|
selected_hit = random.choice(hits) |
|
image_url = selected_hit.get('largeImageURL') |
|
|
|
if image_url: |
|
return image_url |
|
else: |
|
return "Could not retrieve large image URL." |
|
|
|
except requests.exceptions.RequestException as e: |
|
return f"API request error: {e}" |
|
except json.JSONDecodeError: |
|
return "Error decoding API response." |
|
except Exception as e: |
|
return f"An unexpected error occurred: {e}" |
|
|
|
def together_text_to_image(prompt: str = "", width: int = 1024, height: int = 1024): |
|
""" |
|
Generates an image from a text prompt using the Together AI API and the FLUX.1.1-pro model. |
|
|
|
Args: |
|
prompt (str): The text prompt to generate the image from. |
|
width (int): The width of the generated image in pixels. Must be between 512 and 1440. Defaults to 1024. |
|
height (int): The height of the generated image in pixels. Must be between 512 and 1440. Defaults to 1024. |
|
|
|
Returns: |
|
str: The URL of the generated image if successful, or an error message if not. |
|
""" |
|
if not client: |
|
return "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable." |
|
if not prompt: |
|
return "Please enter a prompt for text-to-image generation." |
|
|
|
|
|
min_size, max_size = 512, 1440 |
|
if width < min_size or width > max_size or height < min_size or height > max_size: |
|
aspect_ratio = width / height |
|
|
|
if width < min_size or height < min_size: |
|
if width < height: |
|
width = min_size |
|
height = int(round(width / aspect_ratio)) |
|
else: |
|
height = min_size |
|
width = int(round(height * aspect_ratio)) |
|
elif width > max_size or height > max_size: |
|
if width > height: |
|
width = max_size |
|
height = int(round(width / aspect_ratio)) |
|
else: |
|
height = max_size |
|
width = int(round(height * aspect_ratio)) |
|
|
|
width = max(min_size, min(width, max_size)) |
|
height = max(min_size, min(height, max_size)) |
|
|
|
try: |
|
image_completion = client.images.generate( |
|
model="black-forest-labs/FLUX.1.1-pro", |
|
width=width, |
|
height=height, |
|
steps=40, |
|
prompt=prompt, |
|
) |
|
return image_completion.data[0].url |
|
except Exception as e: |
|
return f"Error generating image from text: {e}" |
|
|
|
def together_image_to_image(image_path: str = None, prompt: str = ""): |
|
""" |
|
Transforms an image based on a text prompt using the Together AI API. |
|
|
|
Args: |
|
image_path (str): The path to the input image file. |
|
prompt (str): The text prompt for image transformation. |
|
|
|
Returns: |
|
str: The URL of the transformed image, or an error message. |
|
""" |
|
if not client: |
|
return "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable." |
|
if image_path is None: |
|
return "Please upload or paste an image for image-to-image transformation." |
|
if not prompt: |
|
return "Please enter a prompt for image transformation." |
|
|
|
try: |
|
image_url = image_to_url(image_path) |
|
if isinstance(image_url, str) and image_url.startswith("Error"): |
|
return image_url |
|
|
|
image_completion = client.images.generate( |
|
model="black-forest-labs/FLUX.1-kontext-max", |
|
steps=40, |
|
prompt=prompt, |
|
image_url=image_url |
|
) |
|
return image_completion.data[0].url |
|
except Exception as e: |
|
return f"Error transforming image: {e}" |
|
|
|
def text_to_speech(text: str = "", voice: str = ""): |
|
""" |
|
Converts text to speech using Together AI's audio API. |
|
|
|
Args: |
|
text (str): The text to convert to speech |
|
voice (str): The voice to use for speech synthesis. |
|
|
|
Returns: |
|
url (str): Give url to the generated audio file or error message |
|
""" |
|
if not client: |
|
return "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable." |
|
if not text: |
|
return "Please enter text to convert to speech." |
|
|
|
try: |
|
speech_file_path = "speech.mp3" |
|
response = client.audio.speech.create( |
|
model="cartesia/sonic", |
|
input=text, |
|
voice=voice, |
|
) |
|
response.stream_to_file(speech_file_path) |
|
url = image_to_url(speech_file_path) |
|
return url |
|
except Exception as e: |
|
return f"Error generating speech: {e}" |
|
|
|
|
|
with gr.Blocks(title="Media Generation and Search Explorer") as demo: |
|
gr.Markdown("## Media Generation and Search Explorer") |
|
gr.Markdown("Explore royalty-free images from Pixabay and generate/transform images using Together AI.") |
|
|
|
with gr.Tab("Pixabay Image Search"): |
|
gr.Markdown("Search for royalty-free images on Pixabay.") |
|
gr.Warning("This requires setting the PIXABAY_API_KEY environment variable.") |
|
|
|
with gr.Row(): |
|
pixabay_query_input = gr.Textbox(label="Search Query", placeholder="e.g., yellow flowers") |
|
pixabay_search_button = gr.Button("Search Images") |
|
|
|
with gr.Row(): |
|
pixabay_image_type_input = gr.Radio( |
|
["all", "photo", "illustration", "vector"], |
|
label="Image Type", |
|
value="all" |
|
) |
|
pixabay_orientation_input = gr.Radio( |
|
["all", "horizontal", "vertical"], |
|
label="Orientation", |
|
value="all" |
|
) |
|
|
|
pixabay_output = gr.Textbox(label="Result Image URL", interactive=False) |
|
|
|
pixabay_search_button.click( |
|
fn=search_pixabay_images, |
|
inputs=[ |
|
pixabay_query_input, |
|
pixabay_image_type_input, |
|
pixabay_orientation_input |
|
], |
|
outputs=pixabay_output |
|
) |
|
|
|
with gr.Tab("Together AI - Text to Image"): |
|
gr.Markdown("Generate an image from a text prompt using Together AI.") |
|
gr.Warning("This requires setting the TOGETHER_API_KEY environment variable.") |
|
with gr.Row(): |
|
together_text_to_image_prompt = gr.Textbox(label="Enter your prompt", scale=2) |
|
together_text_to_image_width = gr.Slider(label="Width", value=1024, minimum=512, maximum=1440) |
|
together_text_to_image_height = gr.Slider(label="Height", value=1024, minimum=512, maximum=1440) |
|
together_text_to_image_button = gr.Button("Generate Image", scale=1) |
|
together_text_to_image_output = gr.Textbox(label="Generated Image (URL)", interactive=False) |
|
|
|
together_text_to_image_button.click( |
|
fn=together_text_to_image, |
|
inputs=[together_text_to_image_prompt, together_text_to_image_width, together_text_to_image_height], |
|
outputs=together_text_to_image_output, |
|
) |
|
|
|
with gr.Tab("Together AI - Image to Image"): |
|
gr.Markdown("Transform an uploaded image based on a text prompt using Together AI.") |
|
gr.Warning("This requires setting the TOGETHER_API_KEY environment variable.") |
|
with gr.Row(): |
|
together_image_input = gr.Image(label="Upload or paste an image", type="filepath", scale=2) |
|
together_image_to_image_prompt = gr.Textbox(label="Enter your transformation prompt", scale=2) |
|
together_image_to_image_button = gr.Button("Transform Image", scale=1) |
|
together_image_to_image_output = gr.Textbox(label="Transformed Image (URL)", interactive=False) |
|
|
|
together_image_to_image_button.click( |
|
fn=together_image_to_image, |
|
inputs=[together_image_input, together_image_to_image_prompt], |
|
outputs=together_image_to_image_output, |
|
) |
|
|
|
with gr.Tab("Together AI - Text to Audio"): |
|
gr.Markdown("Generate audio from text using Together AI's text-to-speech models.") |
|
gr.Warning("This requires setting the TOGETHER_API_KEY environment variable.") |
|
|
|
with gr.Row(): |
|
tts_input_text = gr.Textbox(label="Enter text to convert to speech", lines=3) |
|
tts_voice_selection = gr.Dropdown( |
|
label="Select Voice", |
|
choices=[ |
|
'calm lady', 'meditation lady', 'storyteller lady', 'wise lady', 'teacher lady', |
|
'wise man', 'customer support man', 'tutorial man', 'helpful woman', |
|
'customer support lady', 'asmr lady', 'pleasant man', 'professional woman', |
|
'reading lady', 'reading man' |
|
], |
|
value="helpful woman" |
|
) |
|
tts_generate_button = gr.Button("Generate Audio") |
|
|
|
tts_audio_output = gr.Textbox(label="Generated Audio (url)", interactive=False) |
|
|
|
tts_generate_button.click( |
|
fn=text_to_speech, |
|
inputs=[tts_input_text, tts_voice_selection], |
|
outputs=tts_audio_output |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(mcp_server=True) |