|
import gradio as gr |
|
import os |
|
from deep_translator import GoogleTranslator |
|
from PIL import Image |
|
import requests |
|
import io |
|
import time |
|
from groq import Groq |
|
import torch |
|
|
|
os.environ['hugging'] |
|
H_key = os.getenv('hugging') |
|
API_URL = "https://api-inference.huggingface.co/models/Artples/LAI-ImageGeneration-vSDXL-2" |
|
headers = {"Authorization": f"Bearer {H_key}"} |
|
|
|
os.environ['groq'] |
|
api_key = os.getenv('groq') |
|
client = Groq(api_key=api_key) |
|
|
|
|
|
def query_image_generation(payload, max_retries=5): |
|
for attempt in range(max_retries): |
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
|
|
if response.status_code == 503: |
|
print(f"Model is still loading, retrying... Attempt {attempt + 1}/{max_retries}") |
|
estimated_time = min(response.json().get("estimated_time", 60), 60) |
|
time.sleep(estimated_time) |
|
continue |
|
|
|
if response.status_code != 200: |
|
print(f"Error: Received status code {response.status_code}") |
|
print(f"Response: {response.text}") |
|
return None |
|
|
|
return response.content |
|
|
|
print(f"Failed to generate image after {max_retries} attempts.") |
|
return None |
|
|
|
def generate_image(prompt): |
|
image_bytes = query_image_generation({"inputs": prompt}) |
|
|
|
if image_bytes is None: |
|
return None |
|
|
|
try: |
|
image = Image.open(io.BytesIO(image_bytes)) |
|
return image |
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return None |
|
|
|
def process_audio_or_text(input_text, audio_path, generate_image_flag): |
|
tamil_text, translation, image = None, None, None |
|
|
|
if audio_path: |
|
try: |
|
with open(audio_path, "rb") as file: |
|
transcription = client.audio.transcriptions.create( |
|
file=(os.path.basename(audio_path), file.read()), |
|
model="whisper-large-v3", |
|
language="ta", |
|
response_format="verbose_json", |
|
) |
|
tamil_text = transcription.text |
|
except Exception as e: |
|
return f"An error occurred during transcription: {str(e)}", None, None |
|
|
|
try: |
|
translator = GoogleTranslator(source='ta', target='en') |
|
translation = translator.translate(tamil_text) |
|
except Exception as e: |
|
return tamil_text, f"An error occurred during translation: {str(e)}", None |
|
|
|
elif input_text: |
|
try: |
|
translator = GoogleTranslator(source='ta', target='en') |
|
translation = translator.translate(input_text) |
|
except Exception as e: |
|
return tamil_text, f"An error occurred during translation: {str(e)}", None |
|
|
|
|
|
|
|
|
|
try: |
|
chat_completion = client.chat.completions.create( |
|
messages=[{"role": "user", "content": translation}], |
|
model="llama-3.2-90b-text-preview" |
|
) |
|
chatbot_response = chat_completion.choices[0].message.content |
|
except Exception as e: |
|
return None, f"An error occurred during chatbot interaction: {str(e)}", None |
|
|
|
if generate_image_flag: |
|
image = generate_image(translation) |
|
|
|
return translation, chatbot_response, image |
|
|
|
|
|
css = """ |
|
.gradio-container { |
|
font-family: 'Georgia', serif; |
|
background-color: #f5f5f5; |
|
padding: 20px; |
|
color: #000000; |
|
} |
|
.gr-row { |
|
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1); |
|
background-color: #ffffff; |
|
border-radius: 10px; |
|
padding: 20px; |
|
margin: 10px 0; |
|
} |
|
.gr-button { |
|
background-color: #8b4513; |
|
color: white; |
|
font-size: 16px; |
|
border-radius: 5px; |
|
} |
|
.gr-button:hover { |
|
background-color: #6a3511; |
|
} |
|
.gr-checkbox-label { |
|
font-weight: bold; |
|
} |
|
.gr-image { |
|
border-radius: 10px; |
|
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1); |
|
} |
|
#main-title { |
|
text-align: center; |
|
font-size: 28px; |
|
font-weight: bold; |
|
color: #8b4513; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=css) as iface: |
|
gr.Markdown("<h1 id='main-title'>🖼️ AI Chatbot and Image Generation App</h1>") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
user_input = gr.Textbox(label="Enter Tamil or English text", placeholder="Type your message here...") |
|
audio_input = gr.Audio(type="filepath", label="Or upload audio (for Image Generation)") |
|
image_generation_checkbox = gr.Checkbox(label="Generate Image", value=True) |
|
|
|
|
|
submit_btn = gr.Button("Submit") |
|
clear_btn = gr.Button("Clear") |
|
|
|
with gr.Column(scale=1): |
|
text_output_1 = gr.Textbox(label="English Transcription", interactive=False) |
|
text_output_2 = gr.Textbox(label="Chatbot Response", interactive=False) |
|
image_output = gr.Image(label="Generated Image") |
|
|
|
|
|
submit_btn.click(fn=process_audio_or_text, |
|
inputs=[user_input, audio_input, image_generation_checkbox], |
|
outputs=[text_output_1, text_output_2, image_output]) |
|
|
|
clear_btn.click(lambda: ("", None, False, "", "", None), |
|
inputs=[], |
|
outputs=[user_input, audio_input, image_generation_checkbox, text_output_1, text_output_2, image_output]) |
|
|
|
iface.launch() |
|
|