dron3flyv3r commited on
Commit
3a47b34
1 Parent(s): 5e8f5fe

Add audio transcription and text summarization features

Browse files
Files changed (2) hide show
  1. README.md +3 -0
  2. app.py +33 -3
README.md CHANGED
@@ -8,6 +8,9 @@ sdk_version: 4.22.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ models:
12
+ openai/whisper-large-v3
13
+ google/gemma-7b-it
14
  ---
15
 
16
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,12 +1,42 @@
1
  import gradio as gr
2
  import os
 
3
 
4
  HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
5
 
6
 
7
- def greet(name):
8
- return "Hello " + name + "!!" + HUGGINGFACE_API_KEY
 
 
 
9
 
10
 
11
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  iface.launch()
 
1
  import gradio as gr
2
  import os
3
+ from huggingface_hub import InferenceClient
4
 
5
  HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
6
 
7
 
8
+ def transcript_audio(audio_file) -> str:
9
+ model = "openai/whisper-large-v3"
10
+ api = InferenceClient(model, token=HUGGINGFACE_API_KEY)
11
+ text = api.automatic_speech_recognition(audio_file)
12
+ return text
13
 
14
 
15
+ def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
16
+ llm_model = "google/gemma-7b-it"
17
+ api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
18
+ if conclusion:
19
+ prompt = f"Summarize the following text into {bullet_points} bullet points and a conclusion: {text}"
20
+ else:
21
+ prompt = f"Summarize the following text into {bullet_points} bullet points: {text}"
22
+ summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
23
+ return summary["generated_text"]
24
+
25
+ def control(audio_file: gr.AudioFile, text: str, bullet_points: int, conclusion: bool) -> str:
26
+ if audio_file:
27
+ text = transcript_audio(audio_file)
28
+ summary = summarize_text(text, bullet_points, conclusion)
29
+ return summary
30
+ # make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
31
+ iface = gr.Interface(
32
+ fn=summarize_text,
33
+ inputs=[
34
+ gr.components.Audio(label="Audio file"),
35
+ gr.components.Textbox(lines=5, label="Text"),
36
+ gr.components.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of bullet points"),
37
+ gr.components.Checkbox(label="Add conclusion"),
38
+ ],
39
+ outputs=gr.components.Textbox(label="Summary")
40
+ )
41
+
42
  iface.launch()