katospiegel commited on
Commit
c72eec6
·
1 Parent(s): 736b9fb

feat: v0.1.0

Browse files
.env.dist CHANGED
@@ -5,6 +5,19 @@ TASK=
5
  LANGUAGE=
6
  INPUT_FILE=
7
  OUTPUT_FILE=
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # ODTP ENV VARIABLES TO CONNECT
10
  ODTP_MONGO_SERVER=
@@ -19,8 +32,4 @@ ODTP_DIGITAL_TWIN=
19
  ODTP_EXCUTION=
20
  ODTP_STEP=
21
  ODTP_COMPONENT=
22
- ODTP_COMPONENT_VERSION=
23
-
24
- #ODTP_API_MODE=TRUE
25
- #ODTP_GRADIO_SHARE=TRUE
26
- TODO: User and password
 
5
  LANGUAGE=
6
  INPUT_FILE=
7
  OUTPUT_FILE=
8
+ QUANTIZE=
9
+
10
+ # VARIABLES RELATED TO THE FULL PIPELINE
11
+ FULL_PIPELINE=
12
+ INPUT_METADATA_FILE=
13
+ S3_MEDIA_BUCKET=
14
+ S3_MEDIA_REGION=
15
+ S3_MEDIA_SECRET=
16
+ S3_MEDIA_KEY=
17
+
18
+ # ODTP ENV VARIABLES FOR API MODE
19
+ ODTP_API_MODE=
20
+ ODTP_GRADIO_SHARE=
21
 
22
  # ODTP ENV VARIABLES TO CONNECT
23
  ODTP_MONGO_SERVER=
 
32
  ODTP_EXCUTION=
33
  ODTP_STEP=
34
  ODTP_COMPONENT=
35
+ ODTP_COMPONENT_VERSION=
 
 
 
 
.github/workflows/multiplatform_docker_build.yml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Multi-Platform Docker Build
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ jobs:
7
+ build-and-publish:
8
+ runs-on: ubuntu-latest
9
+
10
+ steps:
11
+ # Step 1: Check out the repository and submodules
12
+ - name: Check out code
13
+ uses: actions/checkout@v3
14
+ with:
15
+ submodules: true # Fetch submodules
16
+ fetch-depth: 0 # Ensure the full history is fetched
17
+
18
+ # Step 2: Set up Docker Buildx
19
+ - name: Set up Docker Buildx
20
+ uses: docker/setup-buildx-action@v2
21
+
22
+ # Step 3: Install yq
23
+ - name: Install yq
24
+ run: |
25
+ sudo apt-get update && sudo apt-get install -y wget
26
+ sudo wget https://github.com/mikefarah/yq/releases/download/v4.35.1/yq_linux_amd64 -O /usr/bin/yq
27
+ sudo chmod +x /usr/bin/yq
28
+
29
+ # Step 4: Extract component-version and component-name from odtp.yml
30
+ - name: Extract component-version and component-name
31
+ id: extract_info
32
+ run: |
33
+ VERSION=$(yq e '.component-version' odtp.yml)
34
+ NAME=$(yq e '.component-name' odtp.yml)
35
+ echo "VERSION=${VERSION}"
36
+ echo "NAME=${NAME}"
37
+ echo "COMPONENT_VERSION=${VERSION}" >> $GITHUB_ENV
38
+ echo "COMPONENT_NAME=${NAME}" >> $GITHUB_ENV
39
+
40
+ # Step 5: Log in to GitHub Container Registry
41
+ - name: Log in to GitHub Container Registry
42
+ uses: docker/login-action@v2
43
+ with:
44
+ registry: ghcr.io
45
+ username: ${{ github.actor }}
46
+ password: ${{ secrets.GITHUB_TOKEN }}
47
+
48
+ # Step 6: Build and push Docker image for multiple platforms
49
+ - name: Build and push Docker image
50
+ run: |
51
+ IMAGE_NAME=ghcr.io/${{ github.repository }}/${{ env.COMPONENT_NAME }}
52
+ docker buildx build \
53
+ --platform linux/amd64,linux/arm64 \
54
+ --build-arg COMPONENT_VERSION=${{ env.COMPONENT_VERSION }} \
55
+ -t $IMAGE_NAME:${{ env.COMPONENT_VERSION }} \
56
+ -t $IMAGE_NAME:latest \
57
+ --push .
.github/workflows/multiplatform_docker_build_dockerhub.yml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Multi-Platform Docker Build for Dockerhub
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ jobs:
7
+ build-and-publish:
8
+ runs-on: ubuntu-latest
9
+
10
+ steps:
11
+ # Step 1: Check out the repository and submodules
12
+ - name: Check out code
13
+ uses: actions/checkout@v3
14
+ with:
15
+ submodules: true # Fetch submodules
16
+ fetch-depth: 0 # Ensure the full history is fetched
17
+
18
+ # Step 2: Set up Docker Buildx
19
+ - name: Set up Docker Buildx
20
+ uses: docker/setup-buildx-action@v2
21
+
22
+ # Step 3: Install yq
23
+ - name: Install yq
24
+ run: |
25
+ sudo apt-get update && sudo apt-get install -y wget
26
+ sudo wget https://github.com/mikefarah/yq/releases/download/v4.35.1/yq_linux_amd64 -O /usr/bin/yq
27
+ sudo chmod +x /usr/bin/yq
28
+
29
+ # Step 4: Extract component-version and component-name from odtp.yml
30
+ - name: Extract component-version and component-name
31
+ id: extract_info
32
+ run: |
33
+ VERSION=$(yq e '.component-version' odtp.yml)
34
+ NAME=$(yq e '.component-name' odtp.yml)
35
+ echo "VERSION=${VERSION}"
36
+ echo "NAME=${NAME}"
37
+ echo "COMPONENT_VERSION=${VERSION}" >> $GITHUB_ENV
38
+ echo "COMPONENT_NAME=${NAME}" >> $GITHUB_ENV
39
+
40
+ # Step 5: Log in to Docker Hub
41
+ - name: Log in to Docker Hub
42
+ uses: docker/login-action@v2
43
+ with:
44
+ username: ${{ secrets.DOCKER_USERNAME }}
45
+ password: ${{ secrets.DOCKER_PASSWORD }}
46
+
47
+ # Step 6: Build and push Docker image for multiple platforms
48
+ - name: Build and push Docker image
49
+ run: |
50
+ IMAGE_NAME=${{ secrets.DOCKER_USERNAME }}/${{ env.COMPONENT_NAME }}
51
+ docker buildx build \
52
+ --platform linux/amd64,linux/arm64 \
53
+ --build-arg COMPONENT_VERSION=${{ env.COMPONENT_VERSION }} \
54
+ -t $IMAGE_NAME:${{ env.COMPONENT_VERSION }} \
55
+ -t $IMAGE_NAME:latest \
56
+ --push .
.github/workflows/push_to_hf.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+ # to run this workflow manually from the Actions tab
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ sync-to-hub:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v3
13
+ with:
14
+ fetch-depth: 0
15
+ lfs: true
16
+ - name: Push to hub
17
+ env:
18
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
19
+ HF_USERNAME: ${{secrets.HF_USERNAME}}
20
+ run: git push --force https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/katospiegel/odtp-pyannote-whisper main
Dockerfile CHANGED
@@ -1,16 +1,25 @@
1
  FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
2
 
3
- # Set environment variable to avoid interactive prompts
4
- ENV DEBIAN_FRONTEND=noninteractive
5
-
6
- # Weasyprint is necessary for pdf printing
7
- RUN apt-get update && apt-get install -y apt-utils weasyprint
8
 
9
  RUN apt-get install -y python3.11 python3.11-venv python3-pip
10
 
11
- COPY odtp-component-client/requirements.txt /tmp/odtp.requirements.txt
12
- RUN pip install -r /tmp/odtp.requirements.txt
 
 
 
 
 
 
 
 
 
 
 
13
 
 
 
14
 
15
  #######################################################################
16
  # PLEASE INSTALL HERE ALL SYSTEM DEPENDENCIES RELATED TO YOUR TOOL
@@ -23,7 +32,7 @@ RUN pip install -r /tmp/requirements.txt
23
  # Dependencies
24
 
25
  RUN apt-get update && \
26
- apt-get install -y zip git && \
27
  apt-get clean && \
28
  rm -rf /var/lib/apt/lists/*
29
 
@@ -31,6 +40,8 @@ RUN apt-get update && \
31
  COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffmpeg /usr/local/bin/
32
  COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
33
 
 
 
34
 
35
  ######################################################################
36
  # ODTP COMPONENT CONFIGURATION.
@@ -41,18 +52,12 @@ COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
41
  # ODTP Preparation
42
  ##################################################
43
 
44
- RUN mkdir /odtp \
45
- /odtp/odtp-config \
46
- /odtp/odtp-app \
47
- /odtp/odtp-component-client \
48
- /odtp/odtp-logs \
49
- /odtp/odtp-input \
50
- /odtp/odtp-workdir \
51
- /odtp/odtp-output
52
 
53
- # This last 2 folders are specific from odtp-eqasim
54
- RUN mkdir /odtp/odtp-workdir/cache \
55
- /odtp/odtp-workdir/output
56
 
57
  # This copy all the information for running the ODTP component
58
  COPY odtp.yml /odtp/odtp-config/odtp.yml
@@ -66,8 +71,15 @@ WORKDIR /odtp
66
  # Fix for end of the line issue on Windows
67
  ##################################################
68
 
 
 
 
 
 
 
69
  # Fix for end of the line issue on Windows. Avoid error when building on windows
70
  RUN find /odtp -type f -iname "*.sh" -exec sed -i 's/\r$//' {} \;
71
 
 
72
 
73
  ENTRYPOINT ["bash", "/odtp/odtp-component-client/startup.sh"]
 
1
  FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
2
 
3
+ RUN apt-get update && apt-get install -y apt-utils
 
 
 
 
4
 
5
  RUN apt-get install -y python3.11 python3.11-venv python3-pip
6
 
7
+ # Create directories and set permissions before switching to the non-root user
8
+ RUN mkdir -p /odtp/odtp-tmp \
9
+ /odtp \
10
+ /odtp/odtp-config \
11
+ /odtp/odtp-app \
12
+ /odtp/odtp-component-client \
13
+ /odtp/odtp-logs \
14
+ /odtp/odtp-input \
15
+ /odtp/odtp-workdir \
16
+ /odtp/odtp-output \
17
+ /home/user && \
18
+ chown -R 1000:1000 /odtp /home/user
19
+
20
 
21
+ COPY odtp-component-client/requirements.txt /odtp/odtp-tmp/odtp.requirements.txt
22
+ RUN pip install -r /odtp/odtp-tmp/odtp.requirements.txt
23
 
24
  #######################################################################
25
  # PLEASE INSTALL HERE ALL SYSTEM DEPENDENCIES RELATED TO YOUR TOOL
 
32
  # Dependencies
33
 
34
  RUN apt-get update && \
35
+ apt-get install -y zip git libglib2.0-0 libpango1.0-0 && \
36
  apt-get clean && \
37
  rm -rf /var/lib/apt/lists/*
38
 
 
40
  COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffmpeg /usr/local/bin/
41
  COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
42
 
43
+ # Adjust permissions so user 1000 can access /usr/local/bin
44
+ RUN chown -R 1000:1000 /usr/local/bin/
45
 
46
  ######################################################################
47
  # ODTP COMPONENT CONFIGURATION.
 
52
  # ODTP Preparation
53
  ##################################################
54
 
55
+ # Switch to the "user" user
56
+ USER 1000
 
 
 
 
 
 
57
 
58
+ # Set home to the user's home directory
59
+ ENV HOME=/home/user \
60
+ PATH=/home/user/.local/bin:$PATH
61
 
62
  # This copy all the information for running the ODTP component
63
  COPY odtp.yml /odtp/odtp-config/odtp.yml
 
71
  # Fix for end of the line issue on Windows
72
  ##################################################
73
 
74
+ # Switch back to root user to run sed command
75
+ USER root
76
+ RUN chown -R 1000:1000 /odtp
77
+
78
+ # Switch back to the "user" user
79
+ USER 1000
80
  # Fix for end of the line issue on Windows. Avoid error when building on windows
81
  RUN find /odtp -type f -iname "*.sh" -exec sed -i 's/\r$//' {} \;
82
 
83
+ EXPOSE 7860
84
 
85
  ENTRYPOINT ["bash", "/odtp/odtp-component-client/startup.sh"]
LICENSE CHANGED
@@ -1,15 +1,202 @@
1
- BSD 3-Clause "New" or "Revised" License
2
- Licence ID
3
- BSD-3-Clause
4
- Licence text
5
- Copyright (c) 2023-2024 Swiss Data Science Center. All rights reserved.
6
 
7
- Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 
 
8
 
9
- 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
10
 
11
- 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
12
 
13
- 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 
14
 
15
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
 
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
 
8
+ 1. Definitions.
9
 
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
 
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [2025] [SDSC]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
README.md CHANGED
@@ -1,20 +1,28 @@
1
  # odtp-pyannote-whisper
2
 
3
- This component is still under development.
4
-
5
- Add here your badges:
6
- [![Launch in your ODTP](https://img.shields.io/badge/Launch%20in%20your-ODTP-blue?logo=launch)](http://localhost:8501/launch-component)
7
- [![Compatible with ODTP v0.5.x](https://img.shields.io/badge/Compatible%20with-ODTP%20v0.5.0-green)]("")
8
 
9
  > [!NOTE]
10
  > This repository makes use of submodules. Therefore, when cloning it you need to include them.
11
  >
12
  > `git clone --recurse-submodules https://github.com/sdsc-ordes/odtp-pyannote-whisper`
13
 
14
- This pipeline processes a `.wav` audio file by detecting the number of speakers present in the recording using `pyannote.audio`. For each detected speaker segment, it employs `OpenAI's Whisper model` to transcribe or translate the speech individually. This approach ensures accurate and speaker-specific transcriptions or translations, providing a clear understanding of who said what throughout the audio.
15
 
16
  Note: This application utilizes `pyannote.audio` and OpenAI's Whisper model. You must accept the terms of use on Hugging Face for the `pyannote/segmentation` and `pyannote/speaker-diarization` models before using this application.
17
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  ## Table of Contents
19
 
20
  - [Tools Information](#tools-information)
@@ -42,12 +50,12 @@ Note: This application utilizes `pyannote.audio` and OpenAI's Whisper model. You
42
 
43
  ## How to add this component to your ODTP instance
44
 
45
- In order to add this component to your ODTP CLI, you can use. If you want to use the component directly, please refer to the docker section.
46
 
47
  ``` bash
48
  odtp new odtp-component-entry \
49
  --name odtp-pyannote-whisper \
50
- --component-version v0.0.1 \
51
  --repository https://github.com/sdsc-ordes/odtp-pyannote-whisper
52
  ```
53
 
@@ -92,14 +100,32 @@ Build the dockerfile.
92
  docker build -t odtp-pyannote-whisper .
93
  ```
94
 
95
- Run the following command. Mount the correct volumes for input/output/logs folders.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  ``` bash
98
  docker run -it --rm \
99
  -v {PATH_TO_YOUR_INPUT_VOLUME}:/odtp/odtp-input \
100
  -v {PATH_TO_YOUR_OUTPUT_VOLUME}:/odtp/odtp-output \
101
  -v {PATH_TO_YOUR_LOGS_VOLUME}:/odtp/odtp-logs \
102
- --env-file .env odtp-pyannote-whisper
 
103
  ```
104
 
105
  ### Development Mode
@@ -128,24 +154,42 @@ docker run -it --rm \
128
  --env-file .env odtp-pyannote-whisper
129
  ```
130
 
 
 
 
 
 
 
 
 
 
 
 
131
  ### Running in API Mode
132
 
133
- To run the component in API mode and expose a port, use the following command:
 
 
 
 
 
 
 
134
 
135
  ``` bash
136
  docker run -it --rm \
137
- -v {PATH_TO_YOUR_INPUT_VOLUME}:/odtp/odtp-input \
138
- -v {PATH_TO_YOUR_OUTPUT_VOLUME}:/odtp/odtp-output \
139
- -v {PATH_TO_YOUR_LOGS_VOLUME}:/odtp/odtp-logs \
140
- -p {HOST_PORT}:7860 \
141
  --env-file .env \
142
- --entrypoing python3 \
143
- odtp-pyannote-whisper \
144
- /odtp/odtp-app/gradio_app.py
145
  ```
146
 
147
- ## Credits and references
 
 
148
 
149
- SDSC
 
150
 
151
  This component has been created using the `odtp-component-template` `v0.5.0`.
 
 
 
1
  # odtp-pyannote-whisper
2
 
3
+ [![Compatible with ODTP v0.5.x](https://img.shields.io/badge/Compatible%20with-ODTP%20v0.5.0-green)]("") [![Open in Spaces](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md.svg)](https://huggingface.com/spaces/katospiegel/odtp-pyannote-whisper)
 
 
 
 
4
 
5
  > [!NOTE]
6
  > This repository makes use of submodules. Therefore, when cloning it you need to include them.
7
  >
8
  > `git clone --recurse-submodules https://github.com/sdsc-ordes/odtp-pyannote-whisper`
9
 
10
+ This pipeline processes a `.wav` or `mp4` media file by detecting the number of speakers present in the recording using `pyannote.audio`. For each detected speaker segment, it employs `OpenAI's Whisper model` to transcribe or translate the speech individually. This approach ensures accurate and speaker-specific transcriptions or translations, providing a clear understanding of who said what throughout the audio.
11
 
12
  Note: This application utilizes `pyannote.audio` and OpenAI's Whisper model. You must accept the terms of use on Hugging Face for the `pyannote/segmentation` and `pyannote/speaker-diarization` models before using this application.
13
 
14
+ - [Speaker-Diarization](https://huggingface.co/pyannote/speaker-diarization-3.1)
15
+ - [Speaker-Segmentation](https://huggingface.co/pyannote/segmentation-3.0)
16
+
17
+ After accepting these terms and conditions for those models. You can obtain you HuggingFace API Key to allow the access to these models:
18
+
19
+ - [Hugging Face Access Keys](https://huggingface.co/settings/tokens)
20
+
21
+ This token should be provided to the component via the `ENV` variables or by the corresponding text field in the web app interface ([Here](https://huggingface.com/spaces/katospiegel/odtp-pyannote-whisper)).
22
+
23
+ ![](assets/screenshot.png)
24
+
25
+
26
  ## Table of Contents
27
 
28
  - [Tools Information](#tools-information)
 
50
 
51
  ## How to add this component to your ODTP instance
52
 
53
+ This component can be run directly with Docker, however it is designed to be run with [ODTP](https://odtp-org.github.io/odtp-manuals/). In order to add this component to your ODTP CLI, you can use. If you want to use the component directly, please refer to the docker section.
54
 
55
  ``` bash
56
  odtp new odtp-component-entry \
57
  --name odtp-pyannote-whisper \
58
+ --component-version v0.1.0 \
59
  --repository https://github.com/sdsc-ordes/odtp-pyannote-whisper
60
  ```
61
 
 
100
  docker build -t odtp-pyannote-whisper .
101
  ```
102
 
103
+ Then create `.env` file similar to `.env.dist` and fill the variables values. Like on this example:
104
+
105
+ ```
106
+ MODEL=base
107
+ HF_TOKEN=hf_xxxxxxxxxxx
108
+ TASK=transcribe
109
+ INPUT_FILE=HRC_20220328T0000.mp4
110
+ OUTPUT_FILE=HRC_20220328T0000
111
+ VERBOSE=TRUE
112
+ ```
113
+
114
+ Then create 3 folders:
115
+
116
+ - `odtp-input`, where your input data should be located.
117
+ - `odtp-output`, where your output data will be stored.
118
+ - `odtp-logs`, where the logs will be shared.
119
+
120
+ After this, you can run the following command and the pipeline will execute.
121
 
122
  ``` bash
123
  docker run -it --rm \
124
  -v {PATH_TO_YOUR_INPUT_VOLUME}:/odtp/odtp-input \
125
  -v {PATH_TO_YOUR_OUTPUT_VOLUME}:/odtp/odtp-output \
126
  -v {PATH_TO_YOUR_LOGS_VOLUME}:/odtp/odtp-logs \
127
+ --env-file .env \
128
+ odtp-pyannote-whisper
129
  ```
130
 
131
  ### Development Mode
 
154
  --env-file .env odtp-pyannote-whisper
155
  ```
156
 
157
+ On Windowss this is the command to execute.
158
+
159
+ ``` powershell
160
+ docker run -it --rm `
161
+ --gpus all `
162
+ -v ${PWD}/odtp-input:/odtp/odtp-input `
163
+ -v ${PWD}/odtp-output:/odtp/odtp-output `
164
+ -v ${PWD}/odtp-logs:/odtp/odtp-logs `
165
+ --env-file .env odtp-pyannote-whisper
166
+ ```
167
+
168
  ### Running in API Mode
169
 
170
+ To run the component in API mode and expose a port, you need to use the following environment variables:
171
+
172
+ ```
173
+ ODTP_API_MODE=TRUE
174
+ ODTP_GRADIO_SHARE=FALSE #Only if you want to share the app via the gradio tunneling
175
+ ```
176
+
177
+ After the configuration, you can run:
178
 
179
  ``` bash
180
  docker run -it --rm \
181
+ -p 7860:7860 \
 
 
 
182
  --env-file .env \
183
+ odtp-pyannote-whisper
 
 
184
  ```
185
 
186
+ And access to the web interface on `localhost:7860` in your browser.
187
+
188
+ ![](assets/screenshot.png)
189
 
190
+
191
+ ## Credits and references
192
 
193
  This component has been created using the `odtp-component-template` `v0.5.0`.
194
+
195
+ The development of this repository has been realized by SDSC.
app/add_annotation.py CHANGED
@@ -1,17 +1,3 @@
1
- # python3 addAnnotation.py /odtp/odtp-output/HRC_20160622T0000-transcription_original.json /odtp/odtp-input/HRC_20160622T0000-initial.json /odtp/odtp-output/HRC_20160622T0000.json --type audio_transcription --origin_channel original --id transcription_original
2
- # python3 addAnnotation.py /odtp/odtp-output/HRC_20160622T0000-translation_original_english.json /odtp/odtp-output/HRC_20160622T0000.json /odtp/odtp-output/HRC_20160622T0000.json --type audio_translation --origin_channel original --id translation_original_english
3
-
4
-
5
- # python3 addAnnotation.py /odtp/odtp-output/HRC_20220328T0000-transcription_original.json /odtp/odtp-input/HRC_20220328T0000-initial.json /odtp/odtp-output/HRC_20220328T0000.json --type audio_transcription --origin_channel original --id transcription_original
6
- # python3 addAnnotation.py /odtp/odtp-output/HRC_20220328T0000-translation_original_english.json /odtp/odtp-output/HRC_20220328T0000.json /odtp/odtp-output/HRC_20220328T0000.json --type audio_translation --origin_channel original --id translation_original_english
7
-
8
- # python3 addAnnotation.py /odtp/odtp-output/HRC_20220929T0000-transcription_original.json /odtp/odtp-input/HRC_20220929T0000-initial.json /odtp/odtp-output/HRC_20220929T0000.json --type audio_transcription --origin_channel original --id transcription_original
9
- # python3 addAnnotation.py /odtp/odtp-output/HRC_20220929T0000-translation_original_english.json /odtp/odtp-output/HRC_20220929T0000.json /odtp/odtp-output/HRC_20220929T0000.json --type audio_translation --origin_channel original --id translation_original_english
10
-
11
- # python3 add_annotation.py /odtp/odtp-output/HRC_20221010T1000-transcription_original.json /odtp/odtp-input/HRC_20221010T1000-initial.json /odtp/odtp-output/HRC_20221010T1000.json --type audio_transcription --origin_channel original --id transcription_original
12
- # python3 add_annotation.py /odtp/odtp-output/HRC_20221010T1000-translation_original_english.json /odtp/odtp-output/HRC_20221010T1000.json /odtp/odtp-output/HRC_20221010T1000.json --type audio_translation --origin_channel original --id translation_original_english
13
-
14
-
15
  import json
16
  import argparse
17
  from datetime import timedelta
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import json
2
  import argparse
3
  from datetime import timedelta
app/app.py CHANGED
@@ -22,14 +22,15 @@ import json
22
  from dataclasses import dataclass, asdict
23
  from jsonschema import validate, ValidationError
24
 
25
- import createpdf
26
- import paragraphsCreator
27
 
28
  from pydub import AudioSegment
29
  import yt_dlp
30
 
31
  from slugify import slugify
32
  import uuid
 
33
 
34
 
35
 
@@ -470,7 +471,7 @@ def clip_audio(audio_file_path, sample_rate, start, end, output_path):
470
  # Write the audio segment to the output path
471
  sf.write(output_path, waveform[start_sample:end_sample], sr, format='WAV')
472
 
473
- def convert_mpx_to_wav(file_path):
474
  if file_path.lower().endswith('.mp3'):
475
  # Load the MP3 file
476
  audio = AudioSegment.from_mp3(file_path)
@@ -482,12 +483,12 @@ def convert_mpx_to_wav(file_path):
482
  raise ValueError("Input file must be an MP3 or MP4 file")
483
 
484
  # Define the output path
485
- wav_file_path = os.path.splitext(file_path)[0] + '.wav'
486
 
487
  # Export as WAV
488
- audio.export(wav_file_path, format='wav')
489
 
490
- return wav_file_path
491
 
492
 
493
  def download_youtube_video(url, filename, output_path='/tmp'):
@@ -502,6 +503,8 @@ def download_youtube_video(url, filename, output_path='/tmp'):
502
  }],
503
  }
504
 
 
 
505
  if not os.path.exists(output_path):
506
  os.makedirs(output_path)
507
 
@@ -511,10 +514,11 @@ def download_youtube_video(url, filename, output_path='/tmp'):
511
  print(output_file)
512
  base, ext = os.path.splitext(output_file)
513
 
514
- new_file = base + '.wav'
515
  return new_file
516
 
517
  import subprocess
 
518
 
519
  def convert_video_to_wav(input_file, output_file):
520
  """
@@ -551,73 +555,31 @@ def convert_video_to_wav(input_file, output_file):
551
  print(f"Error during conversion: {e}")
552
 
553
 
554
- ######################## Parallel
555
- # import multiprocessing
556
- # import tempfile
557
-
558
- # def process_segment(segment, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options):
559
- # start, end, speaker = segment
560
- # clip_path = f"/tmp/speaker_{speaker}_start_{start:.1f}_end_{end:.1f}.wav"
561
- # clip_audio(file_path, sample_rate, start, end, clip_path)
562
-
563
- # result = asr_model.transcribe(start=start, end=end, options=whisper_options)
564
- # language = result.get('language', args.language or 'unknown')
565
-
566
- # if args.verbose:
567
- # print(f"start={start:.1f}s stop={end:.1f}s lang={language} {speaker}")
568
-
569
- # return {
570
- # 'result': result,
571
- # 'speaker': speaker,
572
- # 'start': start,
573
- # 'language': language
574
- # }
575
-
576
- # def chunkify(lst, n):
577
- # for i in range(0, len(lst), n):
578
- # yield lst[i:i + n]
579
-
580
- # def process_chunk(chunk, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options):
581
- # results = []
582
- # for segment in chunk:
583
- # result = process_segment(segment, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options)
584
- # results.append(result)
585
-
586
- # temp_file = tempfile.mktemp(suffix='.json')
587
- # with open(temp_file, 'w') as f:
588
- # json.dump(results, f)
589
-
590
- # return temp_file
591
-
592
- ########################
593
-
594
-
595
  def main(args):
596
  # TODO: Take out the file_path from ODTP here
597
- if args.input_file.startswith('http://') or args.input_file.startswith('https://'):
598
- file_path = download_youtube_video(args.input_file, filename=os.path.basename(args.output_file) , output_path=os.path.dirname(args.output_file))
599
  base_slug = slugify(file_path, separator='_')
600
- #file_path = convert_mpx_to_wav(file_path)
601
  elif args.input_file.lower().endswith('.mp3'):
602
  file_path = convert_mpx_to_wav(args.input_file)
603
- #file_path = "/odtp/odtp-input/" + file_path
604
  elif args.input_file.lower().endswith('.wav'):
605
  file_path = args.input_file
606
- #file_path = "/odtp/odtp-input/" + file_path
607
  elif args.input_file.lower().endswith('.mp4'):
608
- file_path = convert_mpx_to_wav(args.input_file)
609
- #file_path = "/odtp/odtp-input/" + file_path
610
  elif args.input_file.lower().endswith('.rm'):
611
- file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.rm', '.wav')
612
  convert_video_to_wav(args.input_file, file_path)
613
  elif args.input_file.lower().endswith('.f4v'):
614
- file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.f4v', '.wav')
615
  convert_video_to_wav(args.input_file, file_path)
616
  elif args.input_file.lower().endswith('.mkv'):
617
- file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.mkv', '.wav')
618
  convert_video_to_wav(args.input_file, file_path)
619
  else:
620
- raise ValueError("Input file must be an MP3, WAV, RM, F4V, MKV, Youtube Link, or MP4 file")
621
 
622
 
623
  diarization, _, sample_rate = diarize_audio(args.hf_token, file_path)
@@ -691,39 +653,21 @@ def main(args):
691
  writer_json(generate_segments(result['segments'], speaker, language), args.output_json_file)
692
 
693
  writer_json.finalize()
694
- # Parallel testing
695
- # chunk_size = 2 #args.chunk_size # Assume chunk_size is passed as an argument
696
- # temp_files = []
697
-
698
- # with multiprocessing.Pool() as pool:
699
- # chunks = list(chunkify(grouped_segments, chunk_size))
700
- # results = [pool.apply_async(process_chunk, (chunk, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options)) for chunk in chunks]
701
-
702
- # for result in results:
703
- # temp_file = result.get()
704
- # temp_files.append(temp_file)
705
-
706
- # for temp_file in temp_files:
707
- # with open(temp_file, 'r') as f:
708
- # results = json.load(f)
709
- # for result in results:
710
- # writer(result['result'], args.output_file, result['speaker'], result['start'], writer_options)
711
- # writer_json(generate_segments(result['result']['segments'], result['speaker'], result['language']), args.output_json_file)
712
- # os.remove(temp_file)
713
 
714
  # If you want to validate JSON, paragraphs, PDF creation, etc.
715
- paragraphsCreator.process_paragraphs(
716
  args.output_json_file,
717
  args.output_paragraphs_json_file,
718
  3
719
  )
720
- createpdf.convert_json_to_pdf(
721
  args.output_paragraphs_json_file,
722
  args.output_md_file,
723
  args.output_pdf_file
724
  )
725
 
726
 
 
727
  if __name__ == '__main__':
728
  # Multiprocessing requires spawn when working with CUDA
729
  #multiprocessing.set_start_method('spawn')
 
22
  from dataclasses import dataclass, asdict
23
  from jsonschema import validate, ValidationError
24
 
25
+ import create_pdf
26
+ import paragraphs_creator
27
 
28
  from pydub import AudioSegment
29
  import yt_dlp
30
 
31
  from slugify import slugify
32
  import uuid
33
+ import yaml
34
 
35
 
36
 
 
471
  # Write the audio segment to the output path
472
  sf.write(output_path, waveform[start_sample:end_sample], sr, format='WAV')
473
 
474
+ def convert_mpx_to_wav(file_path, output_path):
475
  if file_path.lower().endswith('.mp3'):
476
  # Load the MP3 file
477
  audio = AudioSegment.from_mp3(file_path)
 
483
  raise ValueError("Input file must be an MP3 or MP4 file")
484
 
485
  # Define the output path
486
+ #wav_file_path = os.path.splitext(file_path)[0] + '.wav'
487
 
488
  # Export as WAV
489
+ audio.export(output_path, format='wav')
490
 
491
+ return output_path
492
 
493
 
494
  def download_youtube_video(url, filename, output_path='/tmp'):
 
503
  }],
504
  }
505
 
506
+
507
+
508
  if not os.path.exists(output_path):
509
  os.makedirs(output_path)
510
 
 
514
  print(output_file)
515
  base, ext = os.path.splitext(output_file)
516
 
517
+ new_file = base + '-original.wav'
518
  return new_file
519
 
520
  import subprocess
521
+ import shutil
522
 
523
  def convert_video_to_wav(input_file, output_file):
524
  """
 
555
  print(f"Error during conversion: {e}")
556
 
557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  def main(args):
559
  # TODO: Take out the file_path from ODTP here
560
+ if args.input_file.startswith('/odtp/odtp-input/http://') or args.input_file.startswith('/odtp/odtp-input/https://'):
561
+ file_path = download_youtube_video(args.input_file.replace("/odtp/odtp-input/",""), filename=os.path.basename(args.output_file) , output_path=os.path.dirname(args.output_file))
562
  base_slug = slugify(file_path, separator='_')
 
563
  elif args.input_file.lower().endswith('.mp3'):
564
  file_path = convert_mpx_to_wav(args.input_file)
565
+ shutil.copy(file_path, os.path.join("/odtp/odtp-output", os.path.basename(file_path).replace('.wav', '-original.mp3')))
566
  elif args.input_file.lower().endswith('.wav'):
567
  file_path = args.input_file
568
+ shutil.copy(file_path, os.path.join("/odtp/odtp-output", os.path.basename(file_path).replace('.wav', '-original.wav')))
569
  elif args.input_file.lower().endswith('.mp4'):
570
+ file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.mp4', '-original.wav')
571
+ convert_mpx_to_wav(args.input_file, file_path)
572
  elif args.input_file.lower().endswith('.rm'):
573
+ file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.rm', '-original.wav')
574
  convert_video_to_wav(args.input_file, file_path)
575
  elif args.input_file.lower().endswith('.f4v'):
576
+ file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.f4v', '-original.wav')
577
  convert_video_to_wav(args.input_file, file_path)
578
  elif args.input_file.lower().endswith('.mkv'):
579
+ file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.mkv', '-original.wav')
580
  convert_video_to_wav(args.input_file, file_path)
581
  else:
582
+ raise ValueError(f"Input file must be an MP3, WAV, RM, F4V, MKV, Youtube Link, or MP4 file. Input file: {args.input_file}")
583
 
584
 
585
  diarization, _, sample_rate = diarize_audio(args.hf_token, file_path)
 
653
  writer_json(generate_segments(result['segments'], speaker, language), args.output_json_file)
654
 
655
  writer_json.finalize()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
 
657
  # If you want to validate JSON, paragraphs, PDF creation, etc.
658
+ paragraphs_creator.process_paragraphs(
659
  args.output_json_file,
660
  args.output_paragraphs_json_file,
661
  3
662
  )
663
+ create_pdf.convert_json_to_pdf(
664
  args.output_paragraphs_json_file,
665
  args.output_md_file,
666
  args.output_pdf_file
667
  )
668
 
669
 
670
+
671
  if __name__ == '__main__':
672
  # Multiprocessing requires spawn when working with CUDA
673
  #multiprocessing.set_start_method('spawn')
app/app.sh CHANGED
@@ -1,30 +1,72 @@
1
  #!/bin/bash
2
 
3
- # if [ -n "$LANGUAGE" ]; then
4
- python3 /odtp/odtp-app/app.py \
5
- --model $MODEL \
6
- $( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
7
- --hf-token $HF_TOKEN \
8
- --task $TASK \
9
- $( [ "$LANGUAGE" = "TRUE" ] && echo "--language" ) \
10
- --input-file /odtp/odtp-input/$INPUT_FILE \
11
- --output-file /odtp/odtp-output/$OUTPUT_FILE.srt \
12
- --output-json-file /odtp/odtp-output/$OUTPUT_FILE.json \
13
- --output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}_paragraphs.json \
14
- --output-md-file /odtp/odtp-output/$OUTPUT_FILE.md \
15
- --output-pdf-file /odtp/odtp-output/$OUTPUT_FILE.pdf \
16
- $( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
17
- # else
18
- # python3 /odtp/odtp-app/app.py \
19
- # --model $MODEL \
20
- # $( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
21
- # --hf-token $HF_TOKEN \
22
- # --task $TASK \
23
- # --input-file /odtp/odtp-input/$INPUT_FILE \
24
- # --output-file /odtp/odtp-output/$OUTPUT_FILE.srt \
25
- # --output-json-file /odtp/odtp-output/$OUTPUT_FILE.json \
26
- # --output-paragraphs-json-file /odtp/odtp-output/$OUTPUT_FILE-paragraphs.json \
27
- # --output-md-file /odtp/odtp-output/$OUTPUT_FILE.md \
28
- # --output-pdf-file /odtp/odtp-output/$OUTPUT_FILE.pdf \
29
- # $( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
30
- # fi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/bin/bash
2
 
3
+ if [ -n "$FULL_PIPELINE" ]; then
4
+
5
+ echo "RUNNING TRANSCRIPTION AND EN TRANSLATION PIPELINE"
6
+ python3 /odtp/odtp-app/app.py \
7
+ --model $MODEL \
8
+ $( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
9
+ --hf-token $HF_TOKEN \
10
+ --task transcribe \
11
+ --input-file /odtp/odtp-input/$INPUT_FILE \
12
+ --output-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original.srt \
13
+ --output-json-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original.json \
14
+ --output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}-transcription_original_paragraphs.json \
15
+ --output-md-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original_original.md \
16
+ --output-pdf-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original_original.pdf \
17
+ $( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
18
+
19
+ python3 /odtp/odtp-app/app.py \
20
+ --model $MODEL \
21
+ $( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
22
+ --hf-token $HF_TOKEN \
23
+ --task translate \
24
+ --language en \
25
+ --input-file /odtp/odtp-input/$INPUT_FILE \
26
+ --output-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.srt \
27
+ --output-json-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.json \
28
+ --output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}-translation_original_english_paragraphs.json \
29
+ --output-md-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.md \
30
+ --output-pdf-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.pdf \
31
+ $( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
32
+
33
+ echo "Adding annotations"
34
+ python3 /odtp/odtp-app/add_annotation.py \
35
+ /odtp/odtp-output/$OUTPUT_FILE-transcription_original.json \
36
+ /odtp/odtp-input/$INPUT_METADATA_FILE \
37
+ /odtp/odtp-output/$OUTPUT_FILE.json \
38
+ --type audio_transcription \
39
+ --origin_channel original \
40
+ --id transcription_original
41
+
42
+ python3 /odtp/odtp-app/add_annotation.py \
43
+ /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.json \
44
+ /odtp/odtp-output/$OUTPUT_FILE.json \
45
+ /odtp/odtp-output/$OUTPUT_FILE.json \
46
+ --type audio_translation \
47
+ --origin_channel original \
48
+ --id translation_original_english
49
+
50
+ echo "Generating yml file"
51
+ python3 /odtp/odtp-app/project_metadata_export.py /odtp/odtp-output/
52
+
53
+ echo "Uploading to S3"
54
+ #python3 /odtp/odtp-app/s3_upload.py
55
+ #TBD
56
+
57
+ else
58
+ python3 /odtp/odtp-app/app.py \
59
+ --model $MODEL \
60
+ $( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
61
+ --hf-token $HF_TOKEN \
62
+ --task $TASK \
63
+ $( [ -n "$LANGUAGE" ] && echo "--language $LANGUAGE" ) \
64
+ --input-file /odtp/odtp-input/$INPUT_FILE \
65
+ --output-file /odtp/odtp-output/$OUTPUT_FILE.srt \
66
+ --output-json-file /odtp/odtp-output/$OUTPUT_FILE.json \
67
+ --output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}_paragraphs.json \
68
+ --output-md-file /odtp/odtp-output/$OUTPUT_FILE.md \
69
+ --output-pdf-file /odtp/odtp-output/$OUTPUT_FILE.pdf \
70
+ $( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
71
+ fi
72
+
app/{createpdf.py → create_pdf.py} RENAMED
File without changes
app/{paragraphsCreator.py → paragraphs_creator.py} RENAMED
File without changes
app/project_metadata_export.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import os
4
+ import re
5
+ import yaml
6
+
7
+ def parse_basename_and_date(folder):
8
+ """
9
+ Searches the folder for a file matching the pattern 'HRC_YYYYMMDDT[HHMM]'.
10
+ Returns the base name and a formatted session date (e.g., "2016 06 22 00:00").
11
+ """
12
+ pattern = re.compile(r"^(HRC_\d{8}T\d{4})")
13
+ for filename in os.listdir(folder):
14
+ match = pattern.match(filename)
15
+ if match:
16
+ base_name = match.group(1)
17
+ # Extract date and time parts from the base name
18
+ dt_match = re.match(r"HRC_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})", base_name)
19
+ if dt_match:
20
+ year, month, day, hour, minute = dt_match.groups()
21
+ session_date = f"{year} {month} {day} {hour}:{minute}"
22
+ return base_name, session_date
23
+ return None, None
24
+
25
+ def check_video_file(folder, base_name):
26
+ """
27
+ Checks if an MP4 file with the given base name exists in the folder.
28
+ """
29
+ video_filename = f"{base_name}.mp4"
30
+ return video_filename in os.listdir(folder)
31
+
32
+ def generate_metadata(base_name, session_date, include_video):
33
+ """
34
+ Builds a metadata dictionary containing file entries based on the base name,
35
+ session date, and whether a video file is present.
36
+ """
37
+ metadata = {
38
+ "files": [
39
+ {
40
+ "name": f"{base_name}.json",
41
+ "type": "json",
42
+ "description": f"JSON file containing metadata transcription and translation from the {session_date} session"
43
+ },
44
+ {
45
+ "name": f"{base_name}-files.yml",
46
+ "type": "yml",
47
+ "description": f"YAML file containing metadata of the files from the {session_date} session"
48
+ }
49
+ ]
50
+ }
51
+
52
+ if include_video:
53
+ metadata["files"].append({
54
+ "name": f"{base_name}.mp4",
55
+ "type": "mp4",
56
+ "description": f"MP4 video file from the {session_date} session"
57
+ })
58
+
59
+ metadata["files"].extend([
60
+ {
61
+ "name": f"{base_name}-original.wav",
62
+ "type": "wav",
63
+ "description": f"Original audio file from the {session_date} session"
64
+ },
65
+ {
66
+ "name": f"{base_name}-transcription_original.srt",
67
+ "type": "srt",
68
+ "description": f"Transcription file in SRT format from the original audio of the {session_date} session"
69
+ },
70
+ {
71
+ "name": f"{base_name}-transcription_original.pdf",
72
+ "type": "pdf",
73
+ "description": f"PDF file containing the transcription from the original audio of the {session_date} session"
74
+ },
75
+ {
76
+ "name": f"{base_name}-translation_original_english.srt",
77
+ "type": "srt",
78
+ "description": f"Translation file in SRT format to English from the original audio of the {session_date} session"
79
+ },
80
+ {
81
+ "name": f"{base_name}-translation_original_english.pdf",
82
+ "type": "pdf",
83
+ "description": f"PDF file containing the English translation from the original audio of the {session_date} session"
84
+ }
85
+ ])
86
+
87
+ return metadata
88
+
89
+ def write_yaml_file(metadata, output_file):
90
+ """
91
+ Writes the metadata dictionary to a YAML file.
92
+ """
93
+ with open(output_file, "w") as f:
94
+ yaml.dump(metadata, f, sort_keys=False, default_flow_style=True)
95
+ print(f"Metadata YAML file written to {output_file}")
96
+
97
+ def main():
98
+ parser = argparse.ArgumentParser(
99
+ description="Generate YAML metadata for session files in a folder."
100
+ )
101
+ parser.add_argument("folder", help="Path to the folder containing the session files.")
102
+ args = parser.parse_args()
103
+
104
+ folder = args.folder
105
+ if not os.path.isdir(folder):
106
+ print(f"Error: {folder} is not a valid directory.")
107
+ return
108
+
109
+ base_name, session_date = parse_basename_and_date(folder)
110
+ if not base_name:
111
+ print("Error: Could not find a file matching the expected pattern 'HRC_YYYYMMDDT[HHMM]' in the folder.")
112
+ return
113
+
114
+ include_video = check_video_file(folder, base_name)
115
+ metadata = generate_metadata(base_name, session_date, include_video)
116
+
117
+ # Output file is always in the same folder and named as <base_name>-files.yml
118
+ output_file = os.path.join(folder, f"{base_name}-files.yml")
119
+ write_yaml_file(metadata, output_file)
120
+
121
+ if __name__ == "__main__":
122
+ main()
app/s3_upload.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import re
4
+ import argparse
5
+ import boto3
6
+ from botocore.exceptions import NoCredentialsError, ClientError
7
+
8
+ def parse_basename(folder):
9
+ """
10
+ Scans the folder for a file matching the pattern 'HRC_YYYYMMDDT[HHMM]'
11
+ and returns the base name.
12
+ """
13
+ pattern = re.compile(r"^(HRC_\d{8}T\d{4})")
14
+ for filename in os.listdir(folder):
15
+ match = pattern.match(filename)
16
+ if match:
17
+ return match.group(1)
18
+ return None
19
+
20
+ def upload_files_to_s3(folder, bucket, base_name, region):
21
+ """
22
+ Uploads all files in the folder that start with the base_name to the specified S3 bucket,
23
+ placing them under a folder (key prefix) named after the base_name.
24
+ """
25
+ s3_key = os.environ.get("S3_MEDIA_KEY")
26
+ s3_secret = os.environ.get("S3_MEDIA_SECRET")
27
+
28
+ s3_client = boto3.client('s3', aws_access_key_id=s3_key, aws_secret_access_key=s3_secret, region_name=region)
29
+
30
+
31
+ # Gather all files that start with the base name
32
+ files_to_upload = [f for f in os.listdir(folder) if f.startswith(base_name)]
33
+ if not files_to_upload:
34
+ print(f"No files starting with '{base_name}' found in {folder}")
35
+ return
36
+
37
+ for file in files_to_upload:
38
+ file_path = os.path.join(folder, file)
39
+ s3_key = f"{base_name}/{file}" # Create a folder in S3 named after the base name
40
+ try:
41
+ s3_client.upload_file(file_path, bucket, s3_key)
42
+ print(f"Uploaded '{file}' to s3://{bucket}/{s3_key}")
43
+ except (NoCredentialsError, ClientError) as e:
44
+ print(f"Failed to upload '{file}': {e}")
45
+
46
+ def main():
47
+ parser = argparse.ArgumentParser(
48
+ description="Upload session files to an S3 bucket under a folder named by the base name."
49
+ )
50
+ parser.add_argument("folder", help="Path to the folder containing the session files")
51
+ args = parser.parse_args()
52
+
53
+ folder = args.folder
54
+ if not os.path.isdir(folder):
55
+ print(f"Error: '{folder}' is not a valid directory.")
56
+ return
57
+
58
+ # Retrieve environment variables for the bucket and datacenter (region)
59
+ bucket = os.environ.get("S3_MEDIA_BUCKET")
60
+ if not bucket:
61
+ print("Error: BUCKET_LINK environment variable not set.")
62
+ return
63
+
64
+ region = os.environ.get("S3_MEDIA_REGION", "us-east-1")
65
+
66
+ base_name = parse_basename(folder)
67
+ if not base_name:
68
+ print("Error: Could not find a file matching the expected pattern 'HRC_YYYYMMDDT[HHMM]' in the folder.")
69
+ return
70
+
71
+ upload_files_to_s3(folder, bucket, base_name, region)
72
+
73
+ if __name__ == "__main__":
74
+ main()
75
+
assets/screenshot.png ADDED
odtp.yml CHANGED
@@ -3,8 +3,8 @@ schema-version: "v0.5.0"
3
 
4
  # Component Information
5
  component-name: odtp-pyannote-whisper
6
- component-version: "v0.0.1"
7
- component-license: AGPL 3.0
8
  component-type: ephemeral
9
  component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization
10
  component-authors:
@@ -123,6 +123,30 @@ data-outputs:
123
  description: Transcription/translation output in JSON format with speaker diarization
124
  naming-convention: null
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  # Validation Schemas (Future Development)
127
  schema-input: null
128
  schema-output: null
 
3
 
4
  # Component Information
5
  component-name: odtp-pyannote-whisper
6
+ component-version: "v0.1.0"
7
+ component-license: Apache 2.0
8
  component-type: ephemeral
9
  component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization
10
  component-authors:
 
123
  description: Transcription/translation output in JSON format with speaker diarization
124
  naming-convention: null
125
 
126
+ - name: OUTPUT_AUDIO_FILE
127
+ type: .wav
128
+ path: /odtp/odtp-output
129
+ description: Audio in wav format
130
+ naming-convention: null
131
+
132
+ - name: OUTPUT_PARAGRAPHS_FILE
133
+ type: .json
134
+ path: /odtp/odtp-output
135
+ description: Markdown file with the paragraphs containing speaker diarization and transcription/translation
136
+ naming-convention: null
137
+
138
+ - name: OUTPUT_MD_FILE
139
+ type: .md
140
+ path: /odtp/odtp-output
141
+ description: Markdown file with the speaker diarization and transcription/translation
142
+ naming-convention: null
143
+
144
+ - name: OUTPUT_PDF_FILE
145
+ type: .pdf
146
+ path: /odtp/odtp-output
147
+ description: PDF file with the speaker diarization and transcription/translation
148
+ naming-convention: null
149
+
150
  # Validation Schemas (Future Development)
151
  schema-input: null
152
  schema-output: null
requirements.txt CHANGED
@@ -10,5 +10,7 @@ gradio==5.5.0
10
  numpy==1.24.4
11
  md2pdf==1.0.1
12
  transformers==4.48.0
13
- yt-dlp
14
- python-slugify
 
 
 
10
  numpy==1.24.4
11
  md2pdf==1.0.1
12
  transformers==4.48.0
13
+ yt-dlp==2025.1.26
14
+ python-slugify
15
+ pyyaml
16
+ boto3