aurelben commited on
Commit
23ed361
1 Parent(s): f349c91

change whisper version

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +23 -8
  3. requirements.txt +1 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Parlons Nous
3
- python_version: 3.9.0
4
  emoji: 🏢
5
  colorFrom: purple
6
  colorTo: indigo
 
1
  ---
2
  title: Parlons Nous
3
+ python_version: 3.10
4
  emoji: 🏢
5
  colorFrom: purple
6
  colorTo: indigo
app.py CHANGED
@@ -5,15 +5,30 @@ import numpy as np
5
  import torch
6
  from groq import Groq
7
  from transformers import pipeline
8
- from transformers.utils import is_flash_attn_2_available
9
  from TTS.api import TTS
10
 
11
- transcriber = pipeline("automatic-speech-recognition",
12
- model="openai/whisper-large-v3",
13
- torch_dtype=torch.float16,
14
- device="cuda:0",
15
- model_kwargs={"attn_implementation": "flash_attention_2"} if is_flash_attn_2_available() else {"attn_implementation": "sdpa"},
16
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  groq_client = Groq(api_key=os.getenv('GROQ_API_KEY'))
19
 
@@ -36,7 +51,7 @@ def transcribe(stream, new_chunk):
36
  stream = np.concatenate([stream, y])
37
  else:
38
  stream = y
39
- return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
40
 
41
  def autocomplete(text):
42
  """
 
5
  import torch
6
  from groq import Groq
7
  from transformers import pipeline
 
8
  from TTS.api import TTS
9
 
10
+ MODEL_NAME = "openai/whisper-large-v3"
11
+ BATCH_SIZE = 8
12
+ FILE_LIMIT_MB = 1000
13
+ YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
14
+
15
+ device = 0 if torch.cuda.is_available() else "cpu"
16
+
17
+ pipe = pipeline(
18
+ task="automatic-speech-recognition",
19
+ model=MODEL_NAME,
20
+ chunk_length_s=30,
21
+ device=device,
22
+ )
23
+
24
+
25
+ def use_pipe(inputs):
26
+ if inputs is None:
27
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
28
+
29
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
30
+ return text
31
+
32
 
33
  groq_client = Groq(api_key=os.getenv('GROQ_API_KEY'))
34
 
 
51
  stream = np.concatenate([stream, y])
52
  else:
53
  stream = y
54
+ return stream, use_pipe(stream)
55
 
56
  def autocomplete(text):
57
  """
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  gradio
2
  groq
3
  numpy
 
4
  torchaudio
5
  transformers
6
  tts
 
1
  gradio
2
  groq
3
  numpy
4
+ torch
5
  torchaudio
6
  transformers
7
  tts