LiKenun commited on
Commit
55d79e2
·
1 Parent(s): 1c1b97a

Move environment variable querying code out of the Gradio UI-construction functions all the way to the root of the application, `app.py`

Browse files
app.py CHANGED
@@ -1,4 +1,5 @@
1
  from dotenv import load_dotenv
 
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
  from automatic_speech_recognition import create_asr_tab
@@ -16,14 +17,35 @@ class App:
16
  and integrating all the individual building block tabs.
17
  """
18
 
19
- def __init__(self, client: InferenceClient):
20
- """Initialize the App with an InferenceClient instance.
 
 
 
 
 
 
 
 
 
21
 
22
  Args:
23
  client: Hugging Face InferenceClient instance for making API calls
24
  to Hugging Face's inference endpoints.
 
 
 
 
 
 
25
  """
26
  self.client = client
 
 
 
 
 
 
27
 
28
  def run(self):
29
  """Launch the Gradio application with all building block tabs.
@@ -37,22 +59,30 @@ class App:
37
  gr.Markdown("A gallery of building blocks for building AI applications")
38
  with gr.Tabs():
39
  with gr.Tab("Text-to-image Generation"):
40
- create_text_to_image_tab(self.client)
41
  with gr.Tab("Image-to-text or Image Captioning"):
42
- create_image_to_text_tab()
43
  with gr.Tab("Image Classification"):
44
- create_image_classification_tab(self.client)
45
  with gr.Tab("Text-to-speech (TTS)"):
46
- create_text_to_speech_tab()
47
  with gr.Tab("Automatic Speech Recognition (ASR)"):
48
- create_asr_tab(self.client)
49
  with gr.Tab("Chat"):
50
- create_chatbot_tab()
51
 
52
  demo.launch()
53
 
54
 
55
  if __name__ == "__main__":
56
  load_dotenv()
57
- app = App(InferenceClient())
 
 
 
 
 
 
 
 
58
  app.run()
 
1
  from dotenv import load_dotenv
2
+ from os import getenv
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
  from automatic_speech_recognition import create_asr_tab
 
17
  and integrating all the individual building block tabs.
18
  """
19
 
20
+ def __init__(
21
+ self,
22
+ client: InferenceClient,
23
+ text_to_image_model: str,
24
+ image_to_text_model: str,
25
+ image_classification_model: str,
26
+ text_to_speech_model: str,
27
+ audio_transcription_model: str,
28
+ chat_model: str
29
+ ):
30
+ """Initialize the App with an InferenceClient instance and model IDs.
31
 
32
  Args:
33
  client: Hugging Face InferenceClient instance for making API calls
34
  to Hugging Face's inference endpoints.
35
+ text_to_image_model: Model ID for text-to-image generation.
36
+ image_to_text_model: Model ID for image captioning.
37
+ image_classification_model: Model ID for image classification.
38
+ text_to_speech_model: Model ID for text-to-speech.
39
+ audio_transcription_model: Model ID for automatic speech recognition.
40
+ chat_model: Model ID for chatbot.
41
  """
42
  self.client = client
43
+ self.text_to_image_model = text_to_image_model
44
+ self.image_to_text_model = image_to_text_model
45
+ self.image_classification_model = image_classification_model
46
+ self.text_to_speech_model = text_to_speech_model
47
+ self.audio_transcription_model = audio_transcription_model
48
+ self.chat_model = chat_model
49
 
50
  def run(self):
51
  """Launch the Gradio application with all building block tabs.
 
59
  gr.Markdown("A gallery of building blocks for building AI applications")
60
  with gr.Tabs():
61
  with gr.Tab("Text-to-image Generation"):
62
+ create_text_to_image_tab(self.client, self.text_to_image_model)
63
  with gr.Tab("Image-to-text or Image Captioning"):
64
+ create_image_to_text_tab(self.image_to_text_model)
65
  with gr.Tab("Image Classification"):
66
+ create_image_classification_tab(self.client, self.image_classification_model)
67
  with gr.Tab("Text-to-speech (TTS)"):
68
+ create_text_to_speech_tab(self.text_to_speech_model)
69
  with gr.Tab("Automatic Speech Recognition (ASR)"):
70
+ create_asr_tab(self.client, self.audio_transcription_model)
71
  with gr.Tab("Chat"):
72
+ create_chatbot_tab(self.chat_model)
73
 
74
  demo.launch()
75
 
76
 
77
  if __name__ == "__main__":
78
  load_dotenv()
79
+ app = App(
80
+ client=InferenceClient(),
81
+ text_to_image_model=getenv("TEXT_TO_IMAGE_MODEL"),
82
+ image_to_text_model=getenv("IMAGE_TO_TEXT_MODEL"),
83
+ image_classification_model=getenv("IMAGE_CLASSIFICATION_MODEL"),
84
+ text_to_speech_model=getenv("TEXT_TO_SPEECH_MODEL"),
85
+ audio_transcription_model=getenv("AUDIO_TRANSCRIPTION_MODEL"),
86
+ chat_model=getenv("CHAT_MODEL")
87
+ )
88
  app.run()
automatic_speech_recognition.py CHANGED
@@ -1,6 +1,6 @@
1
  from functools import partial
2
  from huggingface_hub import InferenceClient
3
- from os import getenv, path, unlink
4
  import gradio as gr
5
  from utils import save_audio_to_temp_file, get_model_sample_rate, request_audio
6
 
@@ -40,7 +40,7 @@ def automatic_speech_recognition(client: InferenceClient, model: str, audio: tup
40
  pass # Ignore clean-up errors.
41
 
42
 
43
- def create_asr_tab(client: InferenceClient):
44
  """Create the automatic speech recognition tab in the Gradio interface.
45
 
46
  This function sets up all UI components for automatic speech recognition, including:
@@ -51,8 +51,8 @@ def create_asr_tab(client: InferenceClient):
51
 
52
  Args:
53
  client: Hugging Face InferenceClient instance to pass to the automatic_speech_recognition function.
 
54
  """
55
- model_id = getenv("AUDIO_TRANSCRIPTION_MODEL")
56
  gr.Markdown("Transcribe audio to text.")
57
  audio_transcription_url_input = gr.Textbox(label="Audio URL")
58
  audio_transcription_audio_request_button = gr.Button("Get Audio")
@@ -65,7 +65,7 @@ def create_asr_tab(client: InferenceClient):
65
  audio_transcription_generate_button = gr.Button("Transcribe")
66
  audio_transcription_output = gr.Textbox(label="Text")
67
  audio_transcription_generate_button.click(
68
- fn=partial(automatic_speech_recognition, client, model_id),
69
  inputs=audio_transcription_audio_input,
70
  outputs=audio_transcription_output
71
  )
 
1
  from functools import partial
2
  from huggingface_hub import InferenceClient
3
+ from os import path, unlink
4
  import gradio as gr
5
  from utils import save_audio_to_temp_file, get_model_sample_rate, request_audio
6
 
 
40
  pass # Ignore clean-up errors.
41
 
42
 
43
+ def create_asr_tab(client: InferenceClient, model: str):
44
  """Create the automatic speech recognition tab in the Gradio interface.
45
 
46
  This function sets up all UI components for automatic speech recognition, including:
 
51
 
52
  Args:
53
  client: Hugging Face InferenceClient instance to pass to the automatic_speech_recognition function.
54
+ model: Hugging Face model ID to use for automatic speech recognition.
55
  """
 
56
  gr.Markdown("Transcribe audio to text.")
57
  audio_transcription_url_input = gr.Textbox(label="Audio URL")
58
  audio_transcription_audio_request_button = gr.Button("Get Audio")
 
65
  audio_transcription_generate_button = gr.Button("Transcribe")
66
  audio_transcription_output = gr.Textbox(label="Text")
67
  audio_transcription_generate_button.click(
68
+ fn=partial(automatic_speech_recognition, client, model),
69
  inputs=audio_transcription_audio_input,
70
  outputs=audio_transcription_output
71
  )
chatbot.py CHANGED
@@ -1,4 +1,3 @@
1
- from os import getenv
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
4
  from utils import get_pytorch_device, spaces_gpu
@@ -176,7 +175,7 @@ def chat(model: str, message: str, conversation_history: list[dict] | None) -> t
176
  return response, conversation_history
177
 
178
 
179
- def create_chatbot_tab():
180
  """Create the chatbot tab in the Gradio interface.
181
 
182
  This function sets up all UI components for the conversational chatbot,
@@ -189,8 +188,10 @@ def create_chatbot_tab():
189
  It also wires up event handlers for both button clicks and Enter key presses,
190
  and manages the conversion between Gradio's chat format and the internal
191
  conversation history format.
 
 
 
192
  """
193
- model_id = getenv("CHAT_MODEL")
194
  gr.Markdown("Have a conversation with an AI chatbot.")
195
  chatbot_history = gr.State(value=None) # Store the conversation history.
196
  chatbot_output = gr.Chatbot(label="Conversation")
@@ -217,7 +218,7 @@ def create_chatbot_tab():
217
  """
218
  if not message.strip():
219
  return history, conversation_state, ""
220
- response, updated_conversation = chat(model_id, message, conversation_state) # Get response from chatbot.
221
  if history is None: # Update Gradio chat history format: list of [user_message, bot_message] pairs.
222
  history = []
223
  history.append([message, response])
 
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
3
  from utils import get_pytorch_device, spaces_gpu
 
175
  return response, conversation_history
176
 
177
 
178
+ def create_chatbot_tab(model: str):
179
  """Create the chatbot tab in the Gradio interface.
180
 
181
  This function sets up all UI components for the conversational chatbot,
 
188
  It also wires up event handlers for both button clicks and Enter key presses,
189
  and manages the conversion between Gradio's chat format and the internal
190
  conversation history format.
191
+
192
+ Args:
193
+ model: Hugging Face model ID to use for the chatbot.
194
  """
 
195
  gr.Markdown("Have a conversation with an AI chatbot.")
196
  chatbot_history = gr.State(value=None) # Store the conversation history.
197
  chatbot_output = gr.Chatbot(label="Conversation")
 
218
  """
219
  if not message.strip():
220
  return history, conversation_state, ""
221
+ response, updated_conversation = chat(model, message, conversation_state) # Get response from chatbot.
222
  if history is None: # Update Gradio chat history format: list of [user_message, bot_message] pairs.
223
  history = []
224
  history.append([message, response])
image_classification.py CHANGED
@@ -1,6 +1,6 @@
1
  from functools import partial
2
  from huggingface_hub import InferenceClient
3
- from os import path, unlink, getenv
4
  import gradio as gr
5
  from PIL.Image import Image
6
  import pandas as pd
@@ -47,7 +47,7 @@ def image_classification(client: InferenceClient, model: str, image: Image) -> D
47
  pass # Ignore clean-up errors.
48
 
49
 
50
- def create_image_classification_tab(client: InferenceClient):
51
  """Create the image classification tab in the Gradio interface.
52
 
53
  This function sets up all UI components for image classification, including:
@@ -58,8 +58,8 @@ def create_image_classification_tab(client: InferenceClient):
58
 
59
  Args:
60
  client: Hugging Face InferenceClient instance to pass to the image_classification function.
 
61
  """
62
- model_id = getenv("IMAGE_CLASSIFICATION_MODEL")
63
  gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
64
  image_classification_url_input = gr.Textbox(label="Image URL")
65
  image_classification_image_request_button = gr.Button("Get Image")
@@ -72,7 +72,7 @@ def create_image_classification_tab(client: InferenceClient):
72
  image_classification_button = gr.Button("Classify")
73
  image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
74
  image_classification_button.click(
75
- fn=partial(image_classification, client, model_id),
76
  inputs=image_classification_image_input,
77
  outputs=image_classification_output
78
  )
 
1
  from functools import partial
2
  from huggingface_hub import InferenceClient
3
+ from os import path, unlink
4
  import gradio as gr
5
  from PIL.Image import Image
6
  import pandas as pd
 
47
  pass # Ignore clean-up errors.
48
 
49
 
50
+ def create_image_classification_tab(client: InferenceClient, model: str):
51
  """Create the image classification tab in the Gradio interface.
52
 
53
  This function sets up all UI components for image classification, including:
 
58
 
59
  Args:
60
  client: Hugging Face InferenceClient instance to pass to the image_classification function.
61
+ model: Hugging Face model ID to use for image classification.
62
  """
 
63
  gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
64
  image_classification_url_input = gr.Textbox(label="Image URL")
65
  image_classification_image_request_button = gr.Button("Get Image")
 
72
  image_classification_button = gr.Button("Classify")
73
  image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
74
  image_classification_button.click(
75
+ fn=partial(image_classification, client, model),
76
  inputs=image_classification_image_input,
77
  outputs=image_classification_output
78
  )
image_to_text.py CHANGED
@@ -1,6 +1,5 @@
1
  import gc
2
  from functools import partial
3
- from os import getenv
4
  import gradio as gr
5
  from PIL.Image import Image
6
  from transformers import AutoProcessor, BlipForConditionalGeneration
@@ -42,7 +41,7 @@ def image_to_text(model: str, image: Image) -> list[str]:
42
  return results
43
 
44
 
45
- def create_image_to_text_tab():
46
  """Create the image-to-text captioning tab in the Gradio interface.
47
 
48
  This function sets up all UI components for image captioning, including:
@@ -50,8 +49,10 @@ def create_image_to_text_tab():
50
  - Button to retrieve image from URL
51
  - Image preview component
52
  - Caption button and output list
 
 
 
53
  """
54
- model_id = getenv("IMAGE_TO_TEXT_MODEL")
55
  gr.Markdown("Generate a text description of an image.")
56
  image_to_text_url_input = gr.Textbox(label="Image URL")
57
  image_to_text_image_request_button = gr.Button("Get Image")
@@ -64,7 +65,7 @@ def create_image_to_text_tab():
64
  image_to_text_button = gr.Button("Caption")
65
  image_to_text_output = gr.List(label="Captions", headers=["Caption"])
66
  image_to_text_button.click(
67
- fn=partial(image_to_text, model_id),
68
  inputs=image_to_text_image_input,
69
  outputs=image_to_text_output
70
  )
 
1
  import gc
2
  from functools import partial
 
3
  import gradio as gr
4
  from PIL.Image import Image
5
  from transformers import AutoProcessor, BlipForConditionalGeneration
 
41
  return results
42
 
43
 
44
+ def create_image_to_text_tab(model: str):
45
  """Create the image-to-text captioning tab in the Gradio interface.
46
 
47
  This function sets up all UI components for image captioning, including:
 
49
  - Button to retrieve image from URL
50
  - Image preview component
51
  - Caption button and output list
52
+
53
+ Args:
54
+ model: Hugging Face model ID to use for image captioning.
55
  """
 
56
  gr.Markdown("Generate a text description of an image.")
57
  image_to_text_url_input = gr.Textbox(label="Image URL")
58
  image_to_text_image_request_button = gr.Button("Get Image")
 
65
  image_to_text_button = gr.Button("Caption")
66
  image_to_text_output = gr.List(label="Captions", headers=["Caption"])
67
  image_to_text_button.click(
68
+ fn=partial(image_to_text, model),
69
  inputs=image_to_text_image_input,
70
  outputs=image_to_text_output
71
  )
text_to_image.py CHANGED
@@ -1,5 +1,4 @@
1
  from functools import partial
2
- from os import getenv
3
  import gradio as gr
4
  from PIL.Image import Image
5
  from huggingface_hub import InferenceClient
@@ -19,7 +18,7 @@ def text_to_image(client: InferenceClient, model: str, prompt: str) -> Image:
19
  return client.text_to_image(prompt, model=model)
20
 
21
 
22
- def create_text_to_image_tab(client: InferenceClient):
23
  """Create the text-to-image generation tab in the Gradio interface.
24
 
25
  This function sets up all UI components for text-to-image generation,
@@ -27,14 +26,14 @@ def create_text_to_image_tab(client: InferenceClient):
27
 
28
  Args:
29
  client: Hugging Face InferenceClient instance to pass to the text_to_image function.
 
30
  """
31
- model_id = getenv("TEXT_TO_IMAGE_MODEL")
32
  gr.Markdown("Generate an image from a text prompt.")
33
  text_to_image_prompt = gr.Textbox(label="Prompt")
34
  text_to_image_generate_button = gr.Button("Generate")
35
  text_to_image_output = gr.Image(label="Image", type="pil")
36
  text_to_image_generate_button.click(
37
- fn=partial(text_to_image, client, model_id),
38
  inputs=text_to_image_prompt,
39
  outputs=text_to_image_output
40
  )
 
1
  from functools import partial
 
2
  import gradio as gr
3
  from PIL.Image import Image
4
  from huggingface_hub import InferenceClient
 
18
  return client.text_to_image(prompt, model=model)
19
 
20
 
21
+ def create_text_to_image_tab(client: InferenceClient, model: str):
22
  """Create the text-to-image generation tab in the Gradio interface.
23
 
24
  This function sets up all UI components for text-to-image generation,
 
26
 
27
  Args:
28
  client: Hugging Face InferenceClient instance to pass to the text_to_image function.
29
+ model: Hugging Face model ID to use for text-to-image generation.
30
  """
 
31
  gr.Markdown("Generate an image from a text prompt.")
32
  text_to_image_prompt = gr.Textbox(label="Prompt")
33
  text_to_image_generate_button = gr.Button("Generate")
34
  text_to_image_output = gr.Image(label="Image", type="pil")
35
  text_to_image_generate_button.click(
36
+ fn=partial(text_to_image, client, model),
37
  inputs=text_to_image_prompt,
38
  outputs=text_to_image_output
39
  )
text_to_speech.py CHANGED
@@ -1,6 +1,5 @@
1
  import gc
2
  from functools import partial
3
- from os import getenv
4
  import gradio as gr
5
  from transformers import pipeline
6
  from utils import spaces_gpu
@@ -40,19 +39,21 @@ def text_to_speech(model: str, text: str) -> tuple[int, bytes]:
40
  return (result["sampling_rate"], result["audio"][0])
41
 
42
 
43
- def create_text_to_speech_tab():
44
  """Create the text-to-speech tab in the Gradio interface.
45
 
46
  This function sets up all UI components for text-to-speech generation,
47
  including input textbox, generate button, and output audio player.
 
 
 
48
  """
49
- model_id = getenv("TEXT_TO_SPEECH_MODEL")
50
  gr.Markdown("Generate speech from text.")
51
  text_to_speech_text = gr.Textbox(label="Text")
52
  text_to_speech_generate_button = gr.Button("Generate")
53
  text_to_speech_output = gr.Audio(label="Speech")
54
  text_to_speech_generate_button.click(
55
- fn=partial(text_to_speech, model_id),
56
  inputs=text_to_speech_text,
57
  outputs=text_to_speech_output
58
  )
 
1
  import gc
2
  from functools import partial
 
3
  import gradio as gr
4
  from transformers import pipeline
5
  from utils import spaces_gpu
 
39
  return (result["sampling_rate"], result["audio"][0])
40
 
41
 
42
+ def create_text_to_speech_tab(model: str):
43
  """Create the text-to-speech tab in the Gradio interface.
44
 
45
  This function sets up all UI components for text-to-speech generation,
46
  including input textbox, generate button, and output audio player.
47
+
48
+ Args:
49
+ model: Hugging Face model ID to use for text-to-speech.
50
  """
 
51
  gr.Markdown("Generate speech from text.")
52
  text_to_speech_text = gr.Textbox(label="Text")
53
  text_to_speech_generate_button = gr.Button("Generate")
54
  text_to_speech_output = gr.Audio(label="Speech")
55
  text_to_speech_generate_button.click(
56
+ fn=partial(text_to_speech, model),
57
  inputs=text_to_speech_text,
58
  outputs=text_to_speech_output
59
  )