aleger commited on
Commit
d8d26b1
1 Parent(s): 2745c87

add bentoml files

Browse files
apis/openapi.yaml ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ components:
2
+ schemas:
3
+ InternalServerError:
4
+ description: Internal Server Error
5
+ properties:
6
+ msg:
7
+ title: Message
8
+ type: string
9
+ type:
10
+ title: Error Type
11
+ type: string
12
+ required:
13
+ - msg
14
+ - type
15
+ title: InternalServerError
16
+ type: object
17
+ InvalidArgument:
18
+ description: Bad Request
19
+ properties:
20
+ msg:
21
+ title: Message
22
+ type: string
23
+ type:
24
+ title: Error Type
25
+ type: string
26
+ required:
27
+ - msg
28
+ - type
29
+ title: InvalidArgument
30
+ type: object
31
+ NotFound:
32
+ description: Not Found
33
+ properties:
34
+ msg:
35
+ title: Message
36
+ type: string
37
+ type:
38
+ title: Error Type
39
+ type: string
40
+ required:
41
+ - msg
42
+ - type
43
+ title: NotFound
44
+ type: object
45
+ info:
46
+ contact:
47
+ email: contact@bentoml.com
48
+ name: BentoML Team
49
+ description: "# speech_to_text_pipeline:None\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.0.20-informational)](https://pypi.org/project/BentoML)\n\
50
+ [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.org/)\n\
51
+ [![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)\n\
52
+ [![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)\n\
53
+ [![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai)\n\
54
+ \nThis is a Machine Learning Service created with BentoML.\n| InferenceAPI | Input\
55
+ \ | Output |\n| ------------ | ----- | ------ |\n| POST [`/process_uploaded_file`](#operations-Service_APIs-speech_to_text_pipeline__process_uploaded_file)\
56
+ \ | BytesIOFile | JSON |\n| POST [`/zip_transcription`](#operations-Service_APIs-speech_to_text_pipeline__zip_transcription)\
57
+ \ | JSON | BytesIOFile |\n\n\n\n\n## Help\n\n* [\U0001F4D6 Documentation](https://docs.bentoml.org/en/latest/):\
58
+ \ Learn how to use BentoML.\n* [\U0001F4AC Community](https://l.bentoml.com/join-slack-swagger):\
59
+ \ Join the BentoML Slack community.\n* [\U0001F41B GitHub Issues](https://github.com/bentoml/BentoML/issues):\
60
+ \ Report bugs and feature requests.\n* Tip: you can also [customize this README](https://docs.bentoml.org/en/latest/concepts/bento.html#description).\n"
61
+ title: speech_to_text_pipeline
62
+ version: None
63
+ openapi: 3.0.2
64
+ paths:
65
+ /healthz:
66
+ get:
67
+ description: Health check endpoint. Expecting an empty response with status
68
+ code <code>200</code> when the service is in health state. The <code>/healthz</code>
69
+ endpoint is <b>deprecated</b>. (since Kubernetes v1.16)
70
+ responses:
71
+ '200':
72
+ description: Successful Response
73
+ tags:
74
+ - Infrastructure
75
+ /livez:
76
+ get:
77
+ description: Health check endpoint for Kubernetes. Healthy endpoint responses
78
+ with a <code>200</code> OK status.
79
+ responses:
80
+ '200':
81
+ description: Successful Response
82
+ tags:
83
+ - Infrastructure
84
+ /metrics:
85
+ get:
86
+ description: Prometheus metrics endpoint. The <code>/metrics</code> responses
87
+ with a <code>200</code>. The output can then be used by a Prometheus sidecar
88
+ to scrape the metrics of the service.
89
+ responses:
90
+ '200':
91
+ description: Successful Response
92
+ tags:
93
+ - Infrastructure
94
+ /process_uploaded_file:
95
+ post:
96
+ consumes:
97
+ - null
98
+ description: ''
99
+ operationId: speech_to_text_pipeline__process_uploaded_file
100
+ produces:
101
+ - application/json
102
+ requestBody:
103
+ content:
104
+ '*/*':
105
+ schema:
106
+ format: binary
107
+ type: string
108
+ required: true
109
+ x-bentoml-io-descriptor:
110
+ args:
111
+ kind: binaryio
112
+ mime_type: null
113
+ id: bentoml.io.File
114
+ responses:
115
+ 200:
116
+ content:
117
+ application/json:
118
+ schema:
119
+ type: object
120
+ description: Successful Response
121
+ x-bentoml-io-descriptor:
122
+ args:
123
+ has_json_encoder: true
124
+ has_pydantic_model: false
125
+ id: bentoml.io.JSON
126
+ 400:
127
+ content:
128
+ application/json:
129
+ schema:
130
+ $ref: '#/components/schemas/InvalidArgument'
131
+ description: Bad Request
132
+ 404:
133
+ content:
134
+ application/json:
135
+ schema:
136
+ $ref: '#/components/schemas/NotFound'
137
+ description: Not Found
138
+ 500:
139
+ content:
140
+ application/json:
141
+ schema:
142
+ $ref: '#/components/schemas/InternalServerError'
143
+ description: Internal Server Error
144
+ summary: "InferenceAPI(BytesIOFile \u2192 JSON)"
145
+ tags:
146
+ - Service APIs
147
+ x-bentoml-name: process_uploaded_file
148
+ /readyz:
149
+ get:
150
+ description: A <code>200</code> OK status from <code>/readyz</code> endpoint
151
+ indicated the service is ready to accept traffic. From that point and onward,
152
+ Kubernetes will use <code>/livez</code> endpoint to perform periodic health
153
+ checks.
154
+ responses:
155
+ '200':
156
+ description: Successful Response
157
+ tags:
158
+ - Infrastructure
159
+ /zip_transcription:
160
+ post:
161
+ consumes:
162
+ - application/json
163
+ description: ''
164
+ operationId: speech_to_text_pipeline__zip_transcription
165
+ produces:
166
+ - null
167
+ requestBody:
168
+ content:
169
+ application/json:
170
+ schema:
171
+ type: object
172
+ required: true
173
+ x-bentoml-io-descriptor:
174
+ args:
175
+ has_json_encoder: true
176
+ has_pydantic_model: false
177
+ id: bentoml.io.JSON
178
+ responses:
179
+ 200:
180
+ content:
181
+ '*/*':
182
+ schema:
183
+ format: binary
184
+ type: string
185
+ description: Successful Response
186
+ x-bentoml-io-descriptor:
187
+ args:
188
+ kind: binaryio
189
+ mime_type: null
190
+ id: bentoml.io.File
191
+ 400:
192
+ content:
193
+ application/json:
194
+ schema:
195
+ $ref: '#/components/schemas/InvalidArgument'
196
+ description: Bad Request
197
+ 404:
198
+ content:
199
+ application/json:
200
+ schema:
201
+ $ref: '#/components/schemas/NotFound'
202
+ description: Not Found
203
+ 500:
204
+ content:
205
+ application/json:
206
+ schema:
207
+ $ref: '#/components/schemas/InternalServerError'
208
+ description: Internal Server Error
209
+ summary: "InferenceAPI(JSON \u2192 BytesIOFile)"
210
+ tags:
211
+ - Service APIs
212
+ x-bentoml-name: zip_transcription
213
+ servers:
214
+ - url: .
215
+ tags:
216
+ - description: BentoML Service API endpoints for inference.
217
+ name: Service APIs
218
+ - description: Common infrastructure endpoints for observability.
219
+ name: Infrastructure
bento.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ service: service:svc
2
+ name: speech_to_text_pipeline
3
+ version: 4246aqrsa265utka
4
+ bentoml_version: 1.0.20
5
+ creation_time: '2023-08-03T14:06:11.828884+00:00'
6
+ labels:
7
+ owner: modern-ai-team
8
+ stage: dev
9
+ models: []
10
+ runners:
11
+ - name: audio_transcriber
12
+ runnable_type: AudioTranscriber
13
+ embedded: false
14
+ models: []
15
+ resource_config: null
16
+ - name: audio_amplitude
17
+ runnable_type: AudioAmplitude
18
+ embedded: false
19
+ models: []
20
+ resource_config: null
21
+ - name: keyword_extractor
22
+ runnable_type: KeywordExtractor
23
+ embedded: false
24
+ models: []
25
+ resource_config: null
26
+ - name: transcription_zipper
27
+ runnable_type: TranscriptionZipper
28
+ embedded: false
29
+ models: []
30
+ resource_config: null
31
+ apis:
32
+ - name: process_uploaded_file
33
+ input_type: BytesIOFile
34
+ output_type: JSON
35
+ - name: zip_transcription
36
+ input_type: JSON
37
+ output_type: BytesIOFile
38
+ docker:
39
+ distro: debian
40
+ python_version: '3.10'
41
+ cuda_version: null
42
+ env:
43
+ BENTOML_CONFIG: src/configuration.yaml
44
+ system_packages:
45
+ - ffmpeg
46
+ setup_script: null
47
+ base_image: null
48
+ dockerfile_template: null
49
+ python:
50
+ requirements_txt: ../requirements.txt
51
+ packages: null
52
+ lock_packages: null
53
+ index_url: null
54
+ no_index: null
55
+ trusted_host: null
56
+ find_links: null
57
+ extra_index_url: null
58
+ pip_args: null
59
+ wheels: null
60
+ conda:
61
+ environment_yml: null
62
+ channels: null
63
+ dependencies: null
64
+ pip: null
env/docker/entrypoint.sh ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -Eeuo pipefail
3
+
4
+ # check to see if this file is being run or sourced from another script
5
+ _is_sourced() {
6
+ # https://unix.stackexchange.com/a/215279
7
+ [ "${#FUNCNAME[@]}" -ge 2 ] &&
8
+ [ "${FUNCNAME[0]}" = '_is_sourced' ] &&
9
+ [ "${FUNCNAME[1]}" = 'source' ]
10
+ }
11
+
12
+ _main() {
13
+ # For backwards compatibility with the yatai<1.0.0, adapting the old "yatai" command to the new "start" command.
14
+ if [ "${#}" -gt 0 ] && [ "${1}" = 'python' ] && [ "${2}" = '-m' ] && { [ "${3}" = 'bentoml._internal.server.cli.runner' ] || [ "${3}" = "bentoml._internal.server.cli.api_server" ]; }; then # SC2235, use { } to avoid subshell overhead
15
+ if [ "${3}" = 'bentoml._internal.server.cli.runner' ]; then
16
+ set -- bentoml start-runner-server "${@:4}"
17
+ elif [ "${3}" = 'bentoml._internal.server.cli.api_server' ]; then
18
+ set -- bentoml start-http-server "${@:4}"
19
+ fi
20
+ # If no arg or first arg looks like a flag.
21
+ elif [[ "$#" -eq 0 ]] || [[ "${1:0:1}" =~ '-' ]]; then
22
+ # This is provided for backwards compatibility with places where user may have
23
+ # discover this easter egg and use it in their scripts to run the container.
24
+ if [[ -v BENTOML_SERVE_COMPONENT ]]; then
25
+ echo "\$BENTOML_SERVE_COMPONENT is set! Calling 'bentoml start-*' instead"
26
+ if [ "${BENTOML_SERVE_COMPONENT}" = 'http_server' ]; then
27
+ set -- bentoml start-http-server "$@" "$BENTO_PATH"
28
+ elif [ "${BENTOML_SERVE_COMPONENT}" = 'grpc_server' ]; then
29
+ set -- bentoml start-grpc-server "$@" "$BENTO_PATH"
30
+ elif [ "${BENTOML_SERVE_COMPONENT}" = 'runner' ]; then
31
+ set -- bentoml start-runner-server "$@" "$BENTO_PATH"
32
+ fi
33
+ else
34
+ set -- bentoml serve "$@" "$BENTO_PATH"
35
+ fi
36
+ fi
37
+ # Overide the BENTOML_PORT if PORT env var is present. Used for Heroku and Yatai.
38
+ if [[ -v PORT ]]; then
39
+ echo "\$PORT is set! Overiding \$BENTOML_PORT with \$PORT ($PORT)"
40
+ export BENTOML_PORT=$PORT
41
+ fi
42
+ # Handle serve and start commands that is passed to the container.
43
+ # Assuming that serve and start commands are the first arguments
44
+ # Note that this is the recommended way going forward to run all bentoml containers.
45
+ if [ "${#}" -gt 0 ] && { [ "${1}" = 'serve' ] || [ "${1}" = 'serve-http' ] || [ "${1}" = 'serve-grpc' ] || [ "${1}" = 'start-http-server' ] || [ "${1}" = 'start-grpc-server' ] || [ "${1}" = 'start-runner-server' ]; }; then
46
+ exec bentoml "$@" "$BENTO_PATH"
47
+ else
48
+ # otherwise default to run whatever the command is
49
+ # This should allow running bash, sh, python, etc
50
+ exec "$@"
51
+ fi
52
+ }
53
+
54
+ if ! _is_sourced; then
55
+ _main "$@"
56
+ fi
env/python/install.sh ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -exuo pipefail
3
+
4
+ # Parent directory https://stackoverflow.com/a/246128/8643197
5
+ BASEDIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}"; )" &> /dev/null && pwd 2> /dev/null; )"
6
+
7
+ PIP_ARGS=(--no-warn-script-location)
8
+
9
+ # BentoML by default generates two requirement files:
10
+ # - ./env/python/requirements.lock.txt: all dependencies locked to its version presented during `build`
11
+ # - ./env/python/requirements.txt: all dependencies as user specified in code or requirements.txt file
12
+ REQUIREMENTS_TXT="$BASEDIR/requirements.txt"
13
+ REQUIREMENTS_LOCK="$BASEDIR/requirements.lock.txt"
14
+ WHEELS_DIR="$BASEDIR/wheels"
15
+ BENTOML_VERSION=${BENTOML_VERSION:-1.0.20}
16
+ # Install python packages, prefer installing the requirements.lock.txt file if it exist
17
+ if [ -f "$REQUIREMENTS_LOCK" ]; then
18
+ echo "Installing pip packages from 'requirements.lock.txt'.."
19
+ pip3 install -r "$REQUIREMENTS_LOCK" "${PIP_ARGS[@]}"
20
+ else
21
+ if [ -f "$REQUIREMENTS_TXT" ]; then
22
+ echo "Installing pip packages from 'requirements.txt'.."
23
+ pip3 install -r "$REQUIREMENTS_TXT" "${PIP_ARGS[@]}"
24
+ fi
25
+ fi
26
+
27
+ # Install user-provided wheels
28
+ if [ -d "$WHEELS_DIR" ]; then
29
+ echo "Installing wheels packaged in Bento.."
30
+ pip3 install "$WHEELS_DIR"/*.whl "${PIP_ARGS[@]}"
31
+ fi
32
+
33
+ # Install the BentoML from PyPI if it's not already installed
34
+ if python3 -c "import bentoml" &> /dev/null; then
35
+ existing_bentoml_version=$(python3 -c "import bentoml; print(bentoml.__version__)")
36
+ if [ "$existing_bentoml_version" != "$BENTOML_VERSION" ]; then
37
+ echo "WARNING: using BentoML version ${existing_bentoml_version}"
38
+ fi
39
+ else
40
+ pip3 install bentoml=="$BENTOML_VERSION"
41
+ fi
env/python/requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bentoml==1.0.20
2
+ openai-whisper
3
+ faster-whisper
4
+ pydub==0.25.1
5
+ torch==2.0.1
6
+ torchvision==0.15.2
7
+ torchaudio==2.0.2
8
+ transformers==4.29.2
9
+ yake==0.4.8
10
+ fastapi
11
+ python-docx
env/python/version.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10.12
src/configuration.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ runners:
2
+ timeout: 900
3
+
4
+ api_server:
5
+ http:
6
+ port: 7860
7
+ cors:
8
+ enabled: True
9
+ access_control_allow_origins: [ "*" ]
10
+ access_control_allow_methods: ["GET", "OPTIONS", "POST", "HEAD", "PUT"]
11
+ access_control_allow_credentials: True
12
+ access_control_allow_headers: [ "*" ]
13
+ access_control_max_age: Null
14
+ # access_control_expose_headers: ["Content-Length"]
15
+ # default_max_request_size: 104857600
src/runners/__init__.py ADDED
File without changes
src/runners/audio_amplitude.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import bentoml
2
+ import numpy as np
3
+ from pydub import AudioSegment
4
+ from pydub.utils import mediainfo
5
+
6
+
7
+ class AudioAmplitude(bentoml.Runnable):
8
+ SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
9
+ SUPPORTS_CPU_MULTI_THREADING = True
10
+
11
+ SAMPLE_RATE = 16000
12
+
13
+ def __init__(self):
14
+ pass
15
+
16
+ @bentoml.Runnable.method(batchable=False)
17
+ def get_audio_amplitude(self, temp_file_path):
18
+ # bit_rate = int(int(mediainfo(temp_file_path)["bit_rate"])/1000)
19
+ audio = AudioSegment.from_file(temp_file_path)
20
+
21
+ # get raw audio data as a bytestring
22
+ raw_data = audio.raw_data
23
+ # get the frame rate
24
+ sample_rate = audio.frame_rate
25
+ # get amount of bytes contained in one sample
26
+ sample_size = audio.sample_width
27
+ # get channels
28
+ channels = audio.channels
29
+
30
+ print("INFO: ", sample_rate, sample_size, channels)
31
+
32
+ audio_array = np.array(audio.get_array_of_samples())
33
+
34
+ # Normalize the audio array to values between -1 and 1
35
+ normalized_audio = audio_array / (2 ** 15) # Assuming 32-bit audio
36
+
37
+ # Convert stereo to mono (average the channels)
38
+ if audio.channels == 2:
39
+ normalized_audio = (normalized_audio[::2] + normalized_audio[1::2]) / 2
40
+
41
+ return normalized_audio
src/runners/audio_transcriber.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+
3
+ import bentoml
4
+ import ffmpeg
5
+ import numpy as np
6
+ import torch
7
+ from faster_whisper import WhisperModel
8
+ from transformers import pipeline
9
+
10
+
11
+ class AudioTranscriber(bentoml.Runnable):
12
+ SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
13
+ SUPPORTS_CPU_MULTI_THREADING = True
14
+
15
+ SAMPLE_RATE = 16000
16
+
17
+ def __init__(self):
18
+ self.faster_model = WhisperModel("base")
19
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
+ # self.model.to(self.device)
21
+
22
+ @bentoml.Runnable.method(batchable=False)
23
+ def transcribe_audio_faster(self, temp_file_path):
24
+ segments, info = self.faster_model.transcribe(temp_file_path)
25
+ transcription = []
26
+ segment_info = []
27
+ for segment in segments:
28
+ print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
29
+ transcription.append(segment.text)
30
+ segment_info.append((segment.start, segment.end))
31
+
32
+ return transcription, info, segment_info
33
+
34
+ @bentoml.Runnable.method(batchable=False)
35
+ def transcribe_audio(self, file):
36
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
37
+ temp_file.write(file.read())
38
+ temp_file_path = temp_file.name
39
+
40
+ # Use the temporary file path as input for ffmpeg.input()
41
+
42
+ try:
43
+ # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
44
+ # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
45
+ out, _ = (
46
+ ffmpeg.input(temp_file_path, threads=0)
47
+ .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=self.SAMPLE_RATE)
48
+ .run(cmd="ffmpeg", capture_stdout=True, capture_stderr=True)
49
+ )
50
+ except ffmpeg.Error as e:
51
+ raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}")
52
+
53
+ input_features = np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
54
+
55
+ print("Input_features", type(input_features))
56
+
57
+ pipe = pipeline("automatic-speech-recognition",
58
+ model=self.model, tokenizer=self.tokenizer, feature_extractor=self.extractor,
59
+ device=self.device)
60
+
61
+ result = self.get_long_transcription_whisper(input_features, pipe)
62
+
63
+ return result
64
+
65
+ @staticmethod
66
+ def get_long_transcription_whisper(input_features, pipe, return_timestamps=True,
67
+ chunk_length_s=10, stride_length_s=2):
68
+ """Get the transcription of a long audio file using the Whisper model
69
+ input_feature: numpy.ndarray
70
+ """
71
+
72
+ return pipe(input_features, return_timestamps=return_timestamps,
73
+ chunk_length_s=chunk_length_s, stride_length_s=stride_length_s)
src/runners/keyword_extractor.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import bentoml
2
+ import yake
3
+
4
+
5
+ class KeywordExtractor(bentoml.Runnable):
6
+ SUPPORTED_RESOURCES = ("cpu",)
7
+ SUPPORTS_CPU_MULTI_THREADING = False
8
+
9
+ def __init__(self, **kwargs):
10
+ self.keyword_extractor = yake.KeywordExtractor(**kwargs)
11
+
12
+ @bentoml.Runnable.method(batchable=False)
13
+ def extract_keywords(self, transcript, lang, key_count):
14
+ self.keyword_extractor.lan = lang
15
+ self.keyword_extractor.top = key_count
16
+ keywords = self.keyword_extractor.extract_keywords(transcript)
17
+ print("keywords successfully extracted")
18
+ return keywords
src/runners/transcription_zipper.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os.path
3
+ import zipfile
4
+ import bentoml
5
+ import tempfile
6
+ from pathlib import Path
7
+
8
+ from docx.enum.text import WD_COLOR_INDEX
9
+
10
+
11
+ class TranscriptionZipper(bentoml.Runnable):
12
+ SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
13
+ SUPPORTS_CPU_MULTI_THREADING = True
14
+
15
+ @bentoml.Runnable.method(batchable=False)
16
+ def zip_transcription(self, transcription_list):
17
+ zip_buffer = io.BytesIO()
18
+
19
+ for t_list in transcription_list:
20
+ orig_filename = t_list[0]
21
+ if ".mp3" in orig_filename:
22
+ orig_filename = orig_filename.removesuffix(".mp3")
23
+ else:
24
+ orig_filename = orig_filename.removesuffix(".wav")
25
+
26
+ new_content = create_word_content(orig_filename, t_list[1], t_list[2])
27
+ new_content.save(orig_filename + '.docx')
28
+ # new_content = create_content(t_list[1], t_list[2]) # html string
29
+
30
+ with zipfile.ZipFile(zip_buffer, "a") as zip_file:
31
+ # zip_file.writestr(file_name + ".html", new_content)
32
+ zip_file.write(orig_filename + '.docx')
33
+
34
+ # Return the zip file as bytes
35
+ return zip_buffer.getvalue()
36
+
37
+
38
+ def create_word_content(filename, old_content, new_content):
39
+ from docx import Document
40
+
41
+ document = Document()
42
+
43
+ document.add_heading(filename, 1)
44
+
45
+ p = document.add_paragraph()
46
+ run = p.add_run()
47
+ run.add_break()
48
+
49
+ old_content = old_content.split(" ")
50
+ changed_content = new_content.split(" ")
51
+ both = [word for word in changed_content if word in old_content]
52
+ i = 0
53
+
54
+ while i < len(changed_content):
55
+ try:
56
+ if changed_content[i] == old_content[i]:
57
+ p.add_run(" " + changed_content[i])
58
+ # new_content += " " + changed_content[i]
59
+ both.pop(0)
60
+ old_content.pop(0)
61
+ changed_content.pop(0)
62
+ else:
63
+ old_pos = old_content.index(both[0])
64
+ new_pos = changed_content.index(both[0])
65
+ p.add_run(" " + " ".join(old_content[0:old_pos])).font.strike = True
66
+ p.add_run(" " + " ".join(changed_content[0:new_pos])).font.highlight_color = WD_COLOR_INDEX.YELLOW
67
+ # new_content += " <s>" + " ".join(old_content[0:old_pos]) + "</s> "
68
+ # new_content += " ".join(changed_content[0:new_pos])
69
+ del old_content[0:old_pos]
70
+ del changed_content[0:new_pos]
71
+ except:
72
+ p.add_run(" ".join(old_content[i:])).font.strike = True
73
+ p.add_run(" ".join(changed_content[i:])).font.highlight_color = WD_COLOR_INDEX.YELLOW
74
+ # new_content += " <s>" + " ".join(old_content[i:]) + "</s> "
75
+ # new_content += " " + " ".join(changed_content[i:])
76
+ break
77
+
78
+ return document
79
+
80
+
81
+ def create_content(old_content, new_content):
82
+ old_content = old_content.split(" ")
83
+ changed_content = new_content.split(" ")
84
+ both = [word for word in changed_content if word in old_content]
85
+ new_content = ""
86
+ i = 0
87
+
88
+ while i < len(changed_content):
89
+ try:
90
+ if changed_content[i] == old_content[i]:
91
+ new_content += " " + changed_content[i]
92
+ both.pop(0)
93
+ old_content.pop(0)
94
+ changed_content.pop(0)
95
+ else:
96
+ old_pos = old_content.index(both[0])
97
+ new_pos = changed_content.index(both[0])
98
+ new_content += " <s>" + " ".join(old_content[0:old_pos]) + "</s> "
99
+ new_content += " ".join(changed_content[0:new_pos])
100
+ del old_content[0:old_pos]
101
+ del changed_content[0:new_pos]
102
+ except:
103
+ new_content += " <s>" + " ".join(old_content[i:]) + "</s> "
104
+ new_content += " " + " ".join(changed_content[i:])
105
+ break
106
+
107
+ return new_content
src/service.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+
4
+ import bentoml
5
+ from bentoml.io import JSON, File
6
+
7
+ from runners.audio_transcriber import AudioTranscriber
8
+ from runners.keyword_extractor import KeywordExtractor
9
+ from runners.transcription_zipper import TranscriptionZipper
10
+ from runners.audio_amplitude import AudioAmplitude
11
+
12
+ runner_audio_transcriber = bentoml.Runner(
13
+ AudioTranscriber,
14
+ name="audio_transcriber",
15
+ )
16
+ runner_audio_amplitude = bentoml.Runner(
17
+ AudioAmplitude,
18
+ name="audio_amplitude",
19
+ )
20
+ runner_keyword_extractor = bentoml.Runner(
21
+ KeywordExtractor,
22
+ name="keyword_extractor",
23
+ )
24
+
25
+ runner_transcription_zipper = bentoml.Runner(
26
+ TranscriptionZipper,
27
+ name="transcription_zipper"
28
+ )
29
+
30
+ svc = bentoml.Service(
31
+ "speech_to_text_pipeline",
32
+ runners=[
33
+ runner_audio_transcriber,
34
+ runner_audio_amplitude,
35
+ runner_keyword_extractor,
36
+ runner_transcription_zipper,
37
+ ],
38
+ )
39
+
40
+
41
+ @svc.api(input=File(), output=JSON())
42
+ async def process_uploaded_file(file):
43
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
44
+ temp_file.write(file.read())
45
+ temp_file_path = temp_file.name
46
+ transcript, info, segment_info = await runner_audio_transcriber.transcribe_audio_faster.async_run(temp_file_path)
47
+ # amplitudes = await runner_audio_amplitude.get_audio_amplitude.async_run(temp_file_path)
48
+ output = {"file_name": file.name, "transcript": transcript, "language": info.language,
49
+ "file_size": os.stat(temp_file_path).st_size, "segments": segment_info}
50
+ return output
51
+
52
+
53
+ @svc.api(input=JSON(), output=File())
54
+ async def zip_transcription(transcription):
55
+ zip_file = await runner_transcription_zipper.zip_transcription.async_run(transcription)
56
+ return zip_file