beeguy commited on
Commit
a75d0bf
1 Parent(s): b3ae8aa

speech recognition

Browse files
Files changed (3) hide show
  1. .gitignore +7 -0
  2. app.py +23 -4
  3. requirements.txt +69 -0
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # .gitignore file for python projects
2
+ .venv
3
+ .env
4
+ __pycache__
5
+ *.pyc
6
+ *.pyo
7
+ *.pyd
app.py CHANGED
@@ -1,7 +1,26 @@
1
  import gradio as gr
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
 
5
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 
6
 
7
+ def transcribe(stream, new_chunk):
8
+ sr, y = new_chunk
9
+ y = y.astype(np.float32)
10
+ y /= np.max(np.abs(y))
11
+
12
+ if stream is not None:
13
+ stream = np.concatenate([stream, y])
14
+ else:
15
+ stream = y
16
+ return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
17
+
18
+
19
+ demo = gr.Interface(
20
+ transcribe,
21
+ ["state", gr.Audio(sources=["microphone"], streaming=True)],
22
+ ["state", "text"],
23
+ live=True,
24
+ )
25
+
26
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.3.0
3
+ annotated-types==0.6.0
4
+ anyio==4.3.0
5
+ attrs==23.2.0
6
+ certifi==2024.2.2
7
+ charset-normalizer==3.3.2
8
+ click==8.1.7
9
+ contourpy==1.2.1
10
+ cycler==0.12.1
11
+ exceptiongroup==1.2.0
12
+ fastapi==0.110.1
13
+ ffmpy==0.3.2
14
+ filelock==3.13.4
15
+ fonttools==4.51.0
16
+ fsspec==2024.3.1
17
+ gradio==4.26.0
18
+ gradio_client==0.15.1
19
+ h11==0.14.0
20
+ httpcore==1.0.5
21
+ httpx==0.27.0
22
+ huggingface-hub==0.22.2
23
+ idna==3.7
24
+ importlib_resources==6.4.0
25
+ Jinja2==3.1.3
26
+ jsonschema==4.21.1
27
+ jsonschema-specifications==2023.12.1
28
+ kiwisolver==1.4.5
29
+ markdown-it-py==3.0.0
30
+ MarkupSafe==2.1.5
31
+ matplotlib==3.8.4
32
+ mdurl==0.1.2
33
+ numpy==1.26.4
34
+ orjson==3.10.0
35
+ packaging==24.0
36
+ pandas==2.2.2
37
+ pillow==10.3.0
38
+ pydantic==2.7.0
39
+ pydantic_core==2.18.1
40
+ pydub==0.25.1
41
+ Pygments==2.17.2
42
+ pyparsing==3.1.2
43
+ python-dateutil==2.9.0.post0
44
+ python-multipart==0.0.9
45
+ pytz==2024.1
46
+ PyYAML==6.0.1
47
+ referencing==0.34.0
48
+ regex==2023.12.25
49
+ requests==2.31.0
50
+ rich==13.7.1
51
+ rpds-py==0.18.0
52
+ ruff==0.3.7
53
+ safetensors==0.4.2
54
+ semantic-version==2.10.0
55
+ shellingham==1.5.4
56
+ six==1.16.0
57
+ sniffio==1.3.1
58
+ starlette==0.37.2
59
+ tokenizers==0.15.2
60
+ tomlkit==0.12.0
61
+ toolz==0.12.1
62
+ tqdm==4.66.2
63
+ transformers==4.39.3
64
+ typer==0.12.3
65
+ typing_extensions==4.11.0
66
+ tzdata==2024.1
67
+ urllib3==2.2.1
68
+ uvicorn==0.29.0
69
+ websockets==11.0.3