Spaces:

LiKenun
/

ai-building-blocks

Running on Zero

App Files Files Community

LiKenun commited on Nov 5

Commit

55d79e2

1 Parent(s): 1c1b97a

Move environment variable querying code out of the Gradio UI-construction functions all the way to the root of the application, `app.py`

Browse files

Files changed (7) hide show

app.py +39 -9
automatic_speech_recognition.py +4 -4
chatbot.py +5 -4
image_classification.py +4 -4
image_to_text.py +5 -4
text_to_image.py +3 -4
text_to_speech.py +5 -4

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from dotenv import load_dotenv
 import gradio as gr
 from huggingface_hub import InferenceClient
 from automatic_speech_recognition import create_asr_tab
@@ -16,14 +17,35 @@ class App:
     and integrating all the individual building block tabs.
     """
-    def __init__(self, client: InferenceClient):
-        """Initialize the App with an InferenceClient instance.
         Args:
             client: Hugging Face InferenceClient instance for making API calls
                 to Hugging Face's inference endpoints.
         """
         self.client = client
     def run(self):
         """Launch the Gradio application with all building block tabs.
@@ -37,22 +59,30 @@ class App:
             gr.Markdown("A gallery of building blocks for building AI applications")
             with gr.Tabs():
                 with gr.Tab("Text-to-image Generation"):
-                    create_text_to_image_tab(self.client)
                 with gr.Tab("Image-to-text or Image Captioning"):
-                    create_image_to_text_tab()
                 with gr.Tab("Image Classification"):
-                    create_image_classification_tab(self.client)
                 with gr.Tab("Text-to-speech (TTS)"):
-                    create_text_to_speech_tab()
                 with gr.Tab("Automatic Speech Recognition (ASR)"):
-                    create_asr_tab(self.client)
                 with gr.Tab("Chat"):
-                    create_chatbot_tab()
             demo.launch()
 if __name__ == "__main__":
     load_dotenv()
-    app = App(InferenceClient())
     app.run()

 from dotenv import load_dotenv
+from os import getenv
 import gradio as gr
 from huggingface_hub import InferenceClient
 from automatic_speech_recognition import create_asr_tab
     and integrating all the individual building block tabs.
     """
+    def __init__(
+        self,
+        client: InferenceClient,
+        text_to_image_model: str,
+        image_to_text_model: str,
+        image_classification_model: str,
+        text_to_speech_model: str,
+        audio_transcription_model: str,
+        chat_model: str
+    ):
+        """Initialize the App with an InferenceClient instance and model IDs.
         Args:
             client: Hugging Face InferenceClient instance for making API calls
                 to Hugging Face's inference endpoints.
+            text_to_image_model: Model ID for text-to-image generation.
+            image_to_text_model: Model ID for image captioning.
+            image_classification_model: Model ID for image classification.
+            text_to_speech_model: Model ID for text-to-speech.
+            audio_transcription_model: Model ID for automatic speech recognition.
+            chat_model: Model ID for chatbot.
         """
         self.client = client
+        self.text_to_image_model = text_to_image_model
+        self.image_to_text_model = image_to_text_model
+        self.image_classification_model = image_classification_model
+        self.text_to_speech_model = text_to_speech_model
+        self.audio_transcription_model = audio_transcription_model
+        self.chat_model = chat_model
     def run(self):
         """Launch the Gradio application with all building block tabs.
             gr.Markdown("A gallery of building blocks for building AI applications")
             with gr.Tabs():
                 with gr.Tab("Text-to-image Generation"):
+                    create_text_to_image_tab(self.client, self.text_to_image_model)
                 with gr.Tab("Image-to-text or Image Captioning"):
+                    create_image_to_text_tab(self.image_to_text_model)
                 with gr.Tab("Image Classification"):
+                    create_image_classification_tab(self.client, self.image_classification_model)
                 with gr.Tab("Text-to-speech (TTS)"):
+                    create_text_to_speech_tab(self.text_to_speech_model)
                 with gr.Tab("Automatic Speech Recognition (ASR)"):
+                    create_asr_tab(self.client, self.audio_transcription_model)
                 with gr.Tab("Chat"):
+                    create_chatbot_tab(self.chat_model)
             demo.launch()
 if __name__ == "__main__":
     load_dotenv()
+    app = App(
+        client=InferenceClient(),
+        text_to_image_model=getenv("TEXT_TO_IMAGE_MODEL"),
+        image_to_text_model=getenv("IMAGE_TO_TEXT_MODEL"),
+        image_classification_model=getenv("IMAGE_CLASSIFICATION_MODEL"),
+        text_to_speech_model=getenv("TEXT_TO_SPEECH_MODEL"),
+        audio_transcription_model=getenv("AUDIO_TRANSCRIPTION_MODEL"),
+        chat_model=getenv("CHAT_MODEL")
+    )
     app.run()

automatic_speech_recognition.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from functools import partial
 from huggingface_hub import InferenceClient
-from os import getenv, path, unlink
 import gradio as gr
 from utils import save_audio_to_temp_file, get_model_sample_rate, request_audio
@@ -40,7 +40,7 @@ def automatic_speech_recognition(client: InferenceClient, model: str, audio: tup
                 pass # Ignore clean-up errors.
-def create_asr_tab(client: InferenceClient):
     """Create the automatic speech recognition tab in the Gradio interface.
     This function sets up all UI components for automatic speech recognition, including:
@@ -51,8 +51,8 @@ def create_asr_tab(client: InferenceClient):
     Args:
         client: Hugging Face InferenceClient instance to pass to the automatic_speech_recognition function.
     """
-    model_id = getenv("AUDIO_TRANSCRIPTION_MODEL")
     gr.Markdown("Transcribe audio to text.")
     audio_transcription_url_input = gr.Textbox(label="Audio URL")
     audio_transcription_audio_request_button = gr.Button("Get Audio")
@@ -65,7 +65,7 @@ def create_asr_tab(client: InferenceClient):
     audio_transcription_generate_button = gr.Button("Transcribe")
     audio_transcription_output = gr.Textbox(label="Text")
     audio_transcription_generate_button.click(
-        fn=partial(automatic_speech_recognition, client, model_id),
         inputs=audio_transcription_audio_input,
         outputs=audio_transcription_output
     )

 from functools import partial
 from huggingface_hub import InferenceClient
+from os import path, unlink
 import gradio as gr
 from utils import save_audio_to_temp_file, get_model_sample_rate, request_audio
                 pass # Ignore clean-up errors.
+def create_asr_tab(client: InferenceClient, model: str):
     """Create the automatic speech recognition tab in the Gradio interface.
     This function sets up all UI components for automatic speech recognition, including:
     Args:
         client: Hugging Face InferenceClient instance to pass to the automatic_speech_recognition function.
+        model: Hugging Face model ID to use for automatic speech recognition.
     """
     gr.Markdown("Transcribe audio to text.")
     audio_transcription_url_input = gr.Textbox(label="Audio URL")
     audio_transcription_audio_request_button = gr.Button("Get Audio")
     audio_transcription_generate_button = gr.Button("Transcribe")
     audio_transcription_output = gr.Textbox(label="Text")
     audio_transcription_generate_button.click(
+        fn=partial(automatic_speech_recognition, client, model),
         inputs=audio_transcription_audio_input,
         outputs=audio_transcription_output
     )

chatbot.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from os import getenv
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 from utils import get_pytorch_device, spaces_gpu
@@ -176,7 +175,7 @@ def chat(model: str, message: str, conversation_history: list[dict] | None) -> t
     return response, conversation_history
-def create_chatbot_tab():
     """Create the chatbot tab in the Gradio interface.
     This function sets up all UI components for the conversational chatbot,
@@ -189,8 +188,10 @@ def create_chatbot_tab():
     It also wires up event handlers for both button clicks and Enter key presses,
     and manages the conversion between Gradio's chat format and the internal
     conversation history format.
     """
-    model_id = getenv("CHAT_MODEL")
     gr.Markdown("Have a conversation with an AI chatbot.")
     chatbot_history = gr.State(value=None)  # Store the conversation history.
     chatbot_output = gr.Chatbot(label="Conversation")
@@ -217,7 +218,7 @@ def create_chatbot_tab():
         """
         if not message.strip():
             return history, conversation_state, ""
-        response, updated_conversation = chat(model_id, message, conversation_state)  # Get response from chatbot.
         if history is None:  # Update Gradio chat history format: list of [user_message, bot_message] pairs.
             history = []
         history.append([message, response])

 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 from utils import get_pytorch_device, spaces_gpu
     return response, conversation_history
+def create_chatbot_tab(model: str):
     """Create the chatbot tab in the Gradio interface.
     This function sets up all UI components for the conversational chatbot,
     It also wires up event handlers for both button clicks and Enter key presses,
     and manages the conversion between Gradio's chat format and the internal
     conversation history format.
+    Args:
+        model: Hugging Face model ID to use for the chatbot.
     """
     gr.Markdown("Have a conversation with an AI chatbot.")
     chatbot_history = gr.State(value=None)  # Store the conversation history.
     chatbot_output = gr.Chatbot(label="Conversation")
         """
         if not message.strip():
             return history, conversation_state, ""
+        response, updated_conversation = chat(model, message, conversation_state)  # Get response from chatbot.
         if history is None:  # Update Gradio chat history format: list of [user_message, bot_message] pairs.
             history = []
         history.append([message, response])

image_classification.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from functools import partial
 from huggingface_hub import InferenceClient
-from os import path, unlink, getenv
 import gradio as gr
 from PIL.Image import Image
 import pandas as pd
@@ -47,7 +47,7 @@ def image_classification(client: InferenceClient, model: str, image: Image) -> D
                 pass # Ignore clean-up errors.
-def create_image_classification_tab(client: InferenceClient):
     """Create the image classification tab in the Gradio interface.
     This function sets up all UI components for image classification, including:
@@ -58,8 +58,8 @@ def create_image_classification_tab(client: InferenceClient):
     Args:
         client: Hugging Face InferenceClient instance to pass to the image_classification function.
     """
-    model_id = getenv("IMAGE_CLASSIFICATION_MODEL")
     gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
     image_classification_url_input = gr.Textbox(label="Image URL")
     image_classification_image_request_button = gr.Button("Get Image")
@@ -72,7 +72,7 @@ def create_image_classification_tab(client: InferenceClient):
     image_classification_button = gr.Button("Classify")
     image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
     image_classification_button.click(
-        fn=partial(image_classification, client, model_id),
         inputs=image_classification_image_input,
         outputs=image_classification_output
     )

 from functools import partial
 from huggingface_hub import InferenceClient
+from os import path, unlink
 import gradio as gr
 from PIL.Image import Image
 import pandas as pd
                 pass # Ignore clean-up errors.
+def create_image_classification_tab(client: InferenceClient, model: str):
     """Create the image classification tab in the Gradio interface.
     This function sets up all UI components for image classification, including:
     Args:
         client: Hugging Face InferenceClient instance to pass to the image_classification function.
+        model: Hugging Face model ID to use for image classification.
     """
     gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
     image_classification_url_input = gr.Textbox(label="Image URL")
     image_classification_image_request_button = gr.Button("Get Image")
     image_classification_button = gr.Button("Classify")
     image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
     image_classification_button.click(
+        fn=partial(image_classification, client, model),
         inputs=image_classification_image_input,
         outputs=image_classification_output
     )

image_to_text.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gc
 from functools import partial
-from os import getenv
 import gradio as gr
 from PIL.Image import Image
 from transformers import AutoProcessor, BlipForConditionalGeneration
@@ -42,7 +41,7 @@ def image_to_text(model: str, image: Image) -> list[str]:
     return results
-def create_image_to_text_tab():
     """Create the image-to-text captioning tab in the Gradio interface.
     This function sets up all UI components for image captioning, including:
@@ -50,8 +49,10 @@ def create_image_to_text_tab():
     - Button to retrieve image from URL
     - Image preview component
     - Caption button and output list
     """
-    model_id = getenv("IMAGE_TO_TEXT_MODEL")
     gr.Markdown("Generate a text description of an image.")
     image_to_text_url_input = gr.Textbox(label="Image URL")
     image_to_text_image_request_button = gr.Button("Get Image")
@@ -64,7 +65,7 @@ def create_image_to_text_tab():
     image_to_text_button = gr.Button("Caption")
     image_to_text_output = gr.List(label="Captions", headers=["Caption"])
     image_to_text_button.click(
-        fn=partial(image_to_text, model_id),
         inputs=image_to_text_image_input,
         outputs=image_to_text_output
     )

 import gc
 from functools import partial
 import gradio as gr
 from PIL.Image import Image
 from transformers import AutoProcessor, BlipForConditionalGeneration
     return results
+def create_image_to_text_tab(model: str):
     """Create the image-to-text captioning tab in the Gradio interface.
     This function sets up all UI components for image captioning, including:
     - Button to retrieve image from URL
     - Image preview component
     - Caption button and output list
+    Args:
+        model: Hugging Face model ID to use for image captioning.
     """
     gr.Markdown("Generate a text description of an image.")
     image_to_text_url_input = gr.Textbox(label="Image URL")
     image_to_text_image_request_button = gr.Button("Get Image")
     image_to_text_button = gr.Button("Caption")
     image_to_text_output = gr.List(label="Captions", headers=["Caption"])
     image_to_text_button.click(
+        fn=partial(image_to_text, model),
         inputs=image_to_text_image_input,
         outputs=image_to_text_output
     )

text_to_image.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from functools import partial
-from os import getenv
 import gradio as gr
 from PIL.Image import Image
 from huggingface_hub import InferenceClient
@@ -19,7 +18,7 @@ def text_to_image(client: InferenceClient, model: str, prompt: str) -> Image:
     return client.text_to_image(prompt, model=model)
-def create_text_to_image_tab(client: InferenceClient):
     """Create the text-to-image generation tab in the Gradio interface.
     This function sets up all UI components for text-to-image generation,
@@ -27,14 +26,14 @@ def create_text_to_image_tab(client: InferenceClient):
     Args:
         client: Hugging Face InferenceClient instance to pass to the text_to_image function.
     """
-    model_id = getenv("TEXT_TO_IMAGE_MODEL")
     gr.Markdown("Generate an image from a text prompt.")
     text_to_image_prompt = gr.Textbox(label="Prompt")
     text_to_image_generate_button = gr.Button("Generate")
     text_to_image_output = gr.Image(label="Image", type="pil")
     text_to_image_generate_button.click(
-        fn=partial(text_to_image, client, model_id),
         inputs=text_to_image_prompt,
         outputs=text_to_image_output
     )

 from functools import partial
 import gradio as gr
 from PIL.Image import Image
 from huggingface_hub import InferenceClient
     return client.text_to_image(prompt, model=model)
+def create_text_to_image_tab(client: InferenceClient, model: str):
     """Create the text-to-image generation tab in the Gradio interface.
     This function sets up all UI components for text-to-image generation,
     Args:
         client: Hugging Face InferenceClient instance to pass to the text_to_image function.
+        model: Hugging Face model ID to use for text-to-image generation.
     """
     gr.Markdown("Generate an image from a text prompt.")
     text_to_image_prompt = gr.Textbox(label="Prompt")
     text_to_image_generate_button = gr.Button("Generate")
     text_to_image_output = gr.Image(label="Image", type="pil")
     text_to_image_generate_button.click(
+        fn=partial(text_to_image, client, model),
         inputs=text_to_image_prompt,
         outputs=text_to_image_output
     )

text_to_speech.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gc
 from functools import partial
-from os import getenv
 import gradio as gr
 from transformers import pipeline
 from utils import spaces_gpu
@@ -40,19 +39,21 @@ def text_to_speech(model: str, text: str) -> tuple[int, bytes]:
     return (result["sampling_rate"], result["audio"][0])
-def create_text_to_speech_tab():
     """Create the text-to-speech tab in the Gradio interface.
     This function sets up all UI components for text-to-speech generation,
     including input textbox, generate button, and output audio player.
     """
-    model_id = getenv("TEXT_TO_SPEECH_MODEL")
     gr.Markdown("Generate speech from text.")
     text_to_speech_text = gr.Textbox(label="Text")
     text_to_speech_generate_button = gr.Button("Generate")
     text_to_speech_output = gr.Audio(label="Speech")
     text_to_speech_generate_button.click(
-        fn=partial(text_to_speech, model_id),
         inputs=text_to_speech_text,
         outputs=text_to_speech_output
     )

 import gc
 from functools import partial
 import gradio as gr
 from transformers import pipeline
 from utils import spaces_gpu
     return (result["sampling_rate"], result["audio"][0])
+def create_text_to_speech_tab(model: str):
     """Create the text-to-speech tab in the Gradio interface.
     This function sets up all UI components for text-to-speech generation,
     including input textbox, generate button, and output audio player.
+    Args:
+        model: Hugging Face model ID to use for text-to-speech.
     """
     gr.Markdown("Generate speech from text.")
     text_to_speech_text = gr.Textbox(label="Text")
     text_to_speech_generate_button = gr.Button("Generate")
     text_to_speech_output = gr.Audio(label="Speech")
     text_to_speech_generate_button.click(
+        fn=partial(text_to_speech, model),
         inputs=text_to_speech_text,
         outputs=text_to_speech_output
     )