Commit
·
c72eec6
1
Parent(s):
736b9fb
feat: v0.1.0
Browse files- .env.dist +14 -5
- .github/workflows/multiplatform_docker_build.yml +57 -0
- .github/workflows/multiplatform_docker_build_dockerhub.yml +56 -0
- .github/workflows/push_to_hf.yml +20 -0
- Dockerfile +31 -19
- LICENSE +197 -10
- README.md +64 -20
- app/add_annotation.py +0 -14
- app/app.py +24 -80
- app/app.sh +70 -28
- app/{createpdf.py → create_pdf.py} +0 -0
- app/{paragraphsCreator.py → paragraphs_creator.py} +0 -0
- app/project_metadata_export.py +122 -0
- app/s3_upload.py +75 -0
- assets/screenshot.png +0 -0
- odtp.yml +26 -2
- requirements.txt +4 -2
.env.dist
CHANGED
@@ -5,6 +5,19 @@ TASK=
|
|
5 |
LANGUAGE=
|
6 |
INPUT_FILE=
|
7 |
OUTPUT_FILE=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# ODTP ENV VARIABLES TO CONNECT
|
10 |
ODTP_MONGO_SERVER=
|
@@ -19,8 +32,4 @@ ODTP_DIGITAL_TWIN=
|
|
19 |
ODTP_EXCUTION=
|
20 |
ODTP_STEP=
|
21 |
ODTP_COMPONENT=
|
22 |
-
ODTP_COMPONENT_VERSION=
|
23 |
-
|
24 |
-
#ODTP_API_MODE=TRUE
|
25 |
-
#ODTP_GRADIO_SHARE=TRUE
|
26 |
-
TODO: User and password
|
|
|
5 |
LANGUAGE=
|
6 |
INPUT_FILE=
|
7 |
OUTPUT_FILE=
|
8 |
+
QUANTIZE=
|
9 |
+
|
10 |
+
# VARIABLES RELATED TO THE FULL PIPELINE
|
11 |
+
FULL_PIPELINE=
|
12 |
+
INPUT_METADATA_FILE=
|
13 |
+
S3_MEDIA_BUCKET=
|
14 |
+
S3_MEDIA_REGION=
|
15 |
+
S3_MEDIA_SECRET=
|
16 |
+
S3_MEDIA_KEY=
|
17 |
+
|
18 |
+
# ODTP ENV VARIABLES FOR API MODE
|
19 |
+
ODTP_API_MODE=
|
20 |
+
ODTP_GRADIO_SHARE=
|
21 |
|
22 |
# ODTP ENV VARIABLES TO CONNECT
|
23 |
ODTP_MONGO_SERVER=
|
|
|
32 |
ODTP_EXCUTION=
|
33 |
ODTP_STEP=
|
34 |
ODTP_COMPONENT=
|
35 |
+
ODTP_COMPONENT_VERSION=
|
|
|
|
|
|
|
|
.github/workflows/multiplatform_docker_build.yml
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Multi-Platform Docker Build
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch:
|
5 |
+
|
6 |
+
jobs:
|
7 |
+
build-and-publish:
|
8 |
+
runs-on: ubuntu-latest
|
9 |
+
|
10 |
+
steps:
|
11 |
+
# Step 1: Check out the repository and submodules
|
12 |
+
- name: Check out code
|
13 |
+
uses: actions/checkout@v3
|
14 |
+
with:
|
15 |
+
submodules: true # Fetch submodules
|
16 |
+
fetch-depth: 0 # Ensure the full history is fetched
|
17 |
+
|
18 |
+
# Step 2: Set up Docker Buildx
|
19 |
+
- name: Set up Docker Buildx
|
20 |
+
uses: docker/setup-buildx-action@v2
|
21 |
+
|
22 |
+
# Step 3: Install yq
|
23 |
+
- name: Install yq
|
24 |
+
run: |
|
25 |
+
sudo apt-get update && sudo apt-get install -y wget
|
26 |
+
sudo wget https://github.com/mikefarah/yq/releases/download/v4.35.1/yq_linux_amd64 -O /usr/bin/yq
|
27 |
+
sudo chmod +x /usr/bin/yq
|
28 |
+
|
29 |
+
# Step 4: Extract component-version and component-name from odtp.yml
|
30 |
+
- name: Extract component-version and component-name
|
31 |
+
id: extract_info
|
32 |
+
run: |
|
33 |
+
VERSION=$(yq e '.component-version' odtp.yml)
|
34 |
+
NAME=$(yq e '.component-name' odtp.yml)
|
35 |
+
echo "VERSION=${VERSION}"
|
36 |
+
echo "NAME=${NAME}"
|
37 |
+
echo "COMPONENT_VERSION=${VERSION}" >> $GITHUB_ENV
|
38 |
+
echo "COMPONENT_NAME=${NAME}" >> $GITHUB_ENV
|
39 |
+
|
40 |
+
# Step 5: Log in to GitHub Container Registry
|
41 |
+
- name: Log in to GitHub Container Registry
|
42 |
+
uses: docker/login-action@v2
|
43 |
+
with:
|
44 |
+
registry: ghcr.io
|
45 |
+
username: ${{ github.actor }}
|
46 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
47 |
+
|
48 |
+
# Step 6: Build and push Docker image for multiple platforms
|
49 |
+
- name: Build and push Docker image
|
50 |
+
run: |
|
51 |
+
IMAGE_NAME=ghcr.io/${{ github.repository }}/${{ env.COMPONENT_NAME }}
|
52 |
+
docker buildx build \
|
53 |
+
--platform linux/amd64,linux/arm64 \
|
54 |
+
--build-arg COMPONENT_VERSION=${{ env.COMPONENT_VERSION }} \
|
55 |
+
-t $IMAGE_NAME:${{ env.COMPONENT_VERSION }} \
|
56 |
+
-t $IMAGE_NAME:latest \
|
57 |
+
--push .
|
.github/workflows/multiplatform_docker_build_dockerhub.yml
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Multi-Platform Docker Build for Dockerhub
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch:
|
5 |
+
|
6 |
+
jobs:
|
7 |
+
build-and-publish:
|
8 |
+
runs-on: ubuntu-latest
|
9 |
+
|
10 |
+
steps:
|
11 |
+
# Step 1: Check out the repository and submodules
|
12 |
+
- name: Check out code
|
13 |
+
uses: actions/checkout@v3
|
14 |
+
with:
|
15 |
+
submodules: true # Fetch submodules
|
16 |
+
fetch-depth: 0 # Ensure the full history is fetched
|
17 |
+
|
18 |
+
# Step 2: Set up Docker Buildx
|
19 |
+
- name: Set up Docker Buildx
|
20 |
+
uses: docker/setup-buildx-action@v2
|
21 |
+
|
22 |
+
# Step 3: Install yq
|
23 |
+
- name: Install yq
|
24 |
+
run: |
|
25 |
+
sudo apt-get update && sudo apt-get install -y wget
|
26 |
+
sudo wget https://github.com/mikefarah/yq/releases/download/v4.35.1/yq_linux_amd64 -O /usr/bin/yq
|
27 |
+
sudo chmod +x /usr/bin/yq
|
28 |
+
|
29 |
+
# Step 4: Extract component-version and component-name from odtp.yml
|
30 |
+
- name: Extract component-version and component-name
|
31 |
+
id: extract_info
|
32 |
+
run: |
|
33 |
+
VERSION=$(yq e '.component-version' odtp.yml)
|
34 |
+
NAME=$(yq e '.component-name' odtp.yml)
|
35 |
+
echo "VERSION=${VERSION}"
|
36 |
+
echo "NAME=${NAME}"
|
37 |
+
echo "COMPONENT_VERSION=${VERSION}" >> $GITHUB_ENV
|
38 |
+
echo "COMPONENT_NAME=${NAME}" >> $GITHUB_ENV
|
39 |
+
|
40 |
+
# Step 5: Log in to Docker Hub
|
41 |
+
- name: Log in to Docker Hub
|
42 |
+
uses: docker/login-action@v2
|
43 |
+
with:
|
44 |
+
username: ${{ secrets.DOCKER_USERNAME }}
|
45 |
+
password: ${{ secrets.DOCKER_PASSWORD }}
|
46 |
+
|
47 |
+
# Step 6: Build and push Docker image for multiple platforms
|
48 |
+
- name: Build and push Docker image
|
49 |
+
run: |
|
50 |
+
IMAGE_NAME=${{ secrets.DOCKER_USERNAME }}/${{ env.COMPONENT_NAME }}
|
51 |
+
docker buildx build \
|
52 |
+
--platform linux/amd64,linux/arm64 \
|
53 |
+
--build-arg COMPONENT_VERSION=${{ env.COMPONENT_VERSION }} \
|
54 |
+
-t $IMAGE_NAME:${{ env.COMPONENT_VERSION }} \
|
55 |
+
-t $IMAGE_NAME:latest \
|
56 |
+
--push .
|
.github/workflows/push_to_hf.yml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to Hugging Face hub
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
branches: [main]
|
5 |
+
# to run this workflow manually from the Actions tab
|
6 |
+
workflow_dispatch:
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
sync-to-hub:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
steps:
|
12 |
+
- uses: actions/checkout@v3
|
13 |
+
with:
|
14 |
+
fetch-depth: 0
|
15 |
+
lfs: true
|
16 |
+
- name: Push to hub
|
17 |
+
env:
|
18 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
19 |
+
HF_USERNAME: ${{secrets.HF_USERNAME}}
|
20 |
+
run: git push --force https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/katospiegel/odtp-pyannote-whisper main
|
Dockerfile
CHANGED
@@ -1,16 +1,25 @@
|
|
1 |
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
|
2 |
|
3 |
-
|
4 |
-
ENV DEBIAN_FRONTEND=noninteractive
|
5 |
-
|
6 |
-
# Weasyprint is necessary for pdf printing
|
7 |
-
RUN apt-get update && apt-get install -y apt-utils weasyprint
|
8 |
|
9 |
RUN apt-get install -y python3.11 python3.11-venv python3-pip
|
10 |
|
11 |
-
|
12 |
-
RUN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
|
|
|
|
14 |
|
15 |
#######################################################################
|
16 |
# PLEASE INSTALL HERE ALL SYSTEM DEPENDENCIES RELATED TO YOUR TOOL
|
@@ -23,7 +32,7 @@ RUN pip install -r /tmp/requirements.txt
|
|
23 |
# Dependencies
|
24 |
|
25 |
RUN apt-get update && \
|
26 |
-
apt-get install -y zip git && \
|
27 |
apt-get clean && \
|
28 |
rm -rf /var/lib/apt/lists/*
|
29 |
|
@@ -31,6 +40,8 @@ RUN apt-get update && \
|
|
31 |
COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffmpeg /usr/local/bin/
|
32 |
COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
|
33 |
|
|
|
|
|
34 |
|
35 |
######################################################################
|
36 |
# ODTP COMPONENT CONFIGURATION.
|
@@ -41,18 +52,12 @@ COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
|
|
41 |
# ODTP Preparation
|
42 |
##################################################
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
/odtp/odtp-app \
|
47 |
-
/odtp/odtp-component-client \
|
48 |
-
/odtp/odtp-logs \
|
49 |
-
/odtp/odtp-input \
|
50 |
-
/odtp/odtp-workdir \
|
51 |
-
/odtp/odtp-output
|
52 |
|
53 |
-
#
|
54 |
-
|
55 |
-
|
56 |
|
57 |
# This copy all the information for running the ODTP component
|
58 |
COPY odtp.yml /odtp/odtp-config/odtp.yml
|
@@ -66,8 +71,15 @@ WORKDIR /odtp
|
|
66 |
# Fix for end of the line issue on Windows
|
67 |
##################################################
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
# Fix for end of the line issue on Windows. Avoid error when building on windows
|
70 |
RUN find /odtp -type f -iname "*.sh" -exec sed -i 's/\r$//' {} \;
|
71 |
|
|
|
72 |
|
73 |
ENTRYPOINT ["bash", "/odtp/odtp-component-client/startup.sh"]
|
|
|
1 |
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
|
2 |
|
3 |
+
RUN apt-get update && apt-get install -y apt-utils
|
|
|
|
|
|
|
|
|
4 |
|
5 |
RUN apt-get install -y python3.11 python3.11-venv python3-pip
|
6 |
|
7 |
+
# Create directories and set permissions before switching to the non-root user
|
8 |
+
RUN mkdir -p /odtp/odtp-tmp \
|
9 |
+
/odtp \
|
10 |
+
/odtp/odtp-config \
|
11 |
+
/odtp/odtp-app \
|
12 |
+
/odtp/odtp-component-client \
|
13 |
+
/odtp/odtp-logs \
|
14 |
+
/odtp/odtp-input \
|
15 |
+
/odtp/odtp-workdir \
|
16 |
+
/odtp/odtp-output \
|
17 |
+
/home/user && \
|
18 |
+
chown -R 1000:1000 /odtp /home/user
|
19 |
+
|
20 |
|
21 |
+
COPY odtp-component-client/requirements.txt /odtp/odtp-tmp/odtp.requirements.txt
|
22 |
+
RUN pip install -r /odtp/odtp-tmp/odtp.requirements.txt
|
23 |
|
24 |
#######################################################################
|
25 |
# PLEASE INSTALL HERE ALL SYSTEM DEPENDENCIES RELATED TO YOUR TOOL
|
|
|
32 |
# Dependencies
|
33 |
|
34 |
RUN apt-get update && \
|
35 |
+
apt-get install -y zip git libglib2.0-0 libpango1.0-0 && \
|
36 |
apt-get clean && \
|
37 |
rm -rf /var/lib/apt/lists/*
|
38 |
|
|
|
40 |
COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffmpeg /usr/local/bin/
|
41 |
COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
|
42 |
|
43 |
+
# Adjust permissions so user 1000 can access /usr/local/bin
|
44 |
+
RUN chown -R 1000:1000 /usr/local/bin/
|
45 |
|
46 |
######################################################################
|
47 |
# ODTP COMPONENT CONFIGURATION.
|
|
|
52 |
# ODTP Preparation
|
53 |
##################################################
|
54 |
|
55 |
+
# Switch to the "user" user
|
56 |
+
USER 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
# Set home to the user's home directory
|
59 |
+
ENV HOME=/home/user \
|
60 |
+
PATH=/home/user/.local/bin:$PATH
|
61 |
|
62 |
# This copy all the information for running the ODTP component
|
63 |
COPY odtp.yml /odtp/odtp-config/odtp.yml
|
|
|
71 |
# Fix for end of the line issue on Windows
|
72 |
##################################################
|
73 |
|
74 |
+
# Switch back to root user to run sed command
|
75 |
+
USER root
|
76 |
+
RUN chown -R 1000:1000 /odtp
|
77 |
+
|
78 |
+
# Switch back to the "user" user
|
79 |
+
USER 1000
|
80 |
# Fix for end of the line issue on Windows. Avoid error when building on windows
|
81 |
RUN find /odtp -type f -iname "*.sh" -exec sed -i 's/\r$//' {} \;
|
82 |
|
83 |
+
EXPOSE 7860
|
84 |
|
85 |
ENTRYPOINT ["bash", "/odtp/odtp-component-client/startup.sh"]
|
LICENSE
CHANGED
@@ -1,15 +1,202 @@
|
|
1 |
-
BSD 3-Clause "New" or "Revised" License
|
2 |
-
Licence ID
|
3 |
-
BSD-3-Clause
|
4 |
-
Licence text
|
5 |
-
Copyright (c) 2023-2024 Swiss Data Science Center. All rights reserved.
|
6 |
|
7 |
-
|
|
|
|
|
8 |
|
9 |
-
|
10 |
|
11 |
-
|
12 |
|
13 |
-
|
|
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
+
Apache License
|
3 |
+
Version 2.0, January 2004
|
4 |
+
http://www.apache.org/licenses/
|
5 |
|
6 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
7 |
|
8 |
+
1. Definitions.
|
9 |
|
10 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
11 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
12 |
|
13 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
14 |
+
the copyright owner that is granting the License.
|
15 |
+
|
16 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
17 |
+
other entities that control, are controlled by, or are under common
|
18 |
+
control with that entity. For the purposes of this definition,
|
19 |
+
"control" means (i) the power, direct or indirect, to cause the
|
20 |
+
direction or management of such entity, whether by contract or
|
21 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
23 |
+
|
24 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
25 |
+
exercising permissions granted by this License.
|
26 |
+
|
27 |
+
"Source" form shall mean the preferred form for making modifications,
|
28 |
+
including but not limited to software source code, documentation
|
29 |
+
source, and configuration files.
|
30 |
+
|
31 |
+
"Object" form shall mean any form resulting from mechanical
|
32 |
+
transformation or translation of a Source form, including but
|
33 |
+
not limited to compiled object code, generated documentation,
|
34 |
+
and conversions to other media types.
|
35 |
+
|
36 |
+
"Work" shall mean the work of authorship, whether in Source or
|
37 |
+
Object form, made available under the License, as indicated by a
|
38 |
+
copyright notice that is included in or attached to the work
|
39 |
+
(an example is provided in the Appendix below).
|
40 |
+
|
41 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
42 |
+
form, that is based on (or derived from) the Work and for which the
|
43 |
+
editorial revisions, annotations, elaborations, or other modifications
|
44 |
+
represent, as a whole, an original work of authorship. For the purposes
|
45 |
+
of this License, Derivative Works shall not include works that remain
|
46 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
47 |
+
the Work and Derivative Works thereof.
|
48 |
+
|
49 |
+
"Contribution" shall mean any work of authorship, including
|
50 |
+
the original version of the Work and any modifications or additions
|
51 |
+
to that Work or Derivative Works thereof, that is intentionally
|
52 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
53 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
54 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
55 |
+
means any form of electronic, verbal, or written communication sent
|
56 |
+
to the Licensor or its representatives, including but not limited to
|
57 |
+
communication on electronic mailing lists, source code control systems,
|
58 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
59 |
+
Licensor for the purpose of discussing and improving the Work, but
|
60 |
+
excluding communication that is conspicuously marked or otherwise
|
61 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
62 |
+
|
63 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64 |
+
on behalf of whom a Contribution has been received by Licensor and
|
65 |
+
subsequently incorporated within the Work.
|
66 |
+
|
67 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
68 |
+
this License, each Contributor hereby grants to You a perpetual,
|
69 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70 |
+
copyright license to reproduce, prepare Derivative Works of,
|
71 |
+
publicly display, publicly perform, sublicense, and distribute the
|
72 |
+
Work and such Derivative Works in Source or Object form.
|
73 |
+
|
74 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
75 |
+
this License, each Contributor hereby grants to You a perpetual,
|
76 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77 |
+
(except as stated in this section) patent license to make, have made,
|
78 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79 |
+
where such license applies only to those patent claims licensable
|
80 |
+
by such Contributor that are necessarily infringed by their
|
81 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
82 |
+
with the Work to which such Contribution(s) was submitted. If You
|
83 |
+
institute patent litigation against any entity (including a
|
84 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85 |
+
or a Contribution incorporated within the Work constitutes direct
|
86 |
+
or contributory patent infringement, then any patent licenses
|
87 |
+
granted to You under this License for that Work shall terminate
|
88 |
+
as of the date such litigation is filed.
|
89 |
+
|
90 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
91 |
+
Work or Derivative Works thereof in any medium, with or without
|
92 |
+
modifications, and in Source or Object form, provided that You
|
93 |
+
meet the following conditions:
|
94 |
+
|
95 |
+
(a) You must give any other recipients of the Work or
|
96 |
+
Derivative Works a copy of this License; and
|
97 |
+
|
98 |
+
(b) You must cause any modified files to carry prominent notices
|
99 |
+
stating that You changed the files; and
|
100 |
+
|
101 |
+
(c) You must retain, in the Source form of any Derivative Works
|
102 |
+
that You distribute, all copyright, patent, trademark, and
|
103 |
+
attribution notices from the Source form of the Work,
|
104 |
+
excluding those notices that do not pertain to any part of
|
105 |
+
the Derivative Works; and
|
106 |
+
|
107 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
108 |
+
distribution, then any Derivative Works that You distribute must
|
109 |
+
include a readable copy of the attribution notices contained
|
110 |
+
within such NOTICE file, excluding those notices that do not
|
111 |
+
pertain to any part of the Derivative Works, in at least one
|
112 |
+
of the following places: within a NOTICE text file distributed
|
113 |
+
as part of the Derivative Works; within the Source form or
|
114 |
+
documentation, if provided along with the Derivative Works; or,
|
115 |
+
within a display generated by the Derivative Works, if and
|
116 |
+
wherever such third-party notices normally appear. The contents
|
117 |
+
of the NOTICE file are for informational purposes only and
|
118 |
+
do not modify the License. You may add Your own attribution
|
119 |
+
notices within Derivative Works that You distribute, alongside
|
120 |
+
or as an addendum to the NOTICE text from the Work, provided
|
121 |
+
that such additional attribution notices cannot be construed
|
122 |
+
as modifying the License.
|
123 |
+
|
124 |
+
You may add Your own copyright statement to Your modifications and
|
125 |
+
may provide additional or different license terms and conditions
|
126 |
+
for use, reproduction, or distribution of Your modifications, or
|
127 |
+
for any such Derivative Works as a whole, provided Your use,
|
128 |
+
reproduction, and distribution of the Work otherwise complies with
|
129 |
+
the conditions stated in this License.
|
130 |
+
|
131 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132 |
+
any Contribution intentionally submitted for inclusion in the Work
|
133 |
+
by You to the Licensor shall be under the terms and conditions of
|
134 |
+
this License, without any additional terms or conditions.
|
135 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
136 |
+
the terms of any separate license agreement you may have executed
|
137 |
+
with Licensor regarding such Contributions.
|
138 |
+
|
139 |
+
6. Trademarks. This License does not grant permission to use the trade
|
140 |
+
names, trademarks, service marks, or product names of the Licensor,
|
141 |
+
except as required for reasonable and customary use in describing the
|
142 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
143 |
+
|
144 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
145 |
+
agreed to in writing, Licensor provides the Work (and each
|
146 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148 |
+
implied, including, without limitation, any warranties or conditions
|
149 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151 |
+
appropriateness of using or redistributing the Work and assume any
|
152 |
+
risks associated with Your exercise of permissions under this License.
|
153 |
+
|
154 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
155 |
+
whether in tort (including negligence), contract, or otherwise,
|
156 |
+
unless required by applicable law (such as deliberate and grossly
|
157 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
158 |
+
liable to You for damages, including any direct, indirect, special,
|
159 |
+
incidental, or consequential damages of any character arising as a
|
160 |
+
result of this License or out of the use or inability to use the
|
161 |
+
Work (including but not limited to damages for loss of goodwill,
|
162 |
+
work stoppage, computer failure or malfunction, or any and all
|
163 |
+
other commercial damages or losses), even if such Contributor
|
164 |
+
has been advised of the possibility of such damages.
|
165 |
+
|
166 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
167 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
168 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
169 |
+
or other liability obligations and/or rights consistent with this
|
170 |
+
License. However, in accepting such obligations, You may act only
|
171 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
172 |
+
of any other Contributor, and only if You agree to indemnify,
|
173 |
+
defend, and hold each Contributor harmless for any liability
|
174 |
+
incurred by, or claims asserted against, such Contributor by reason
|
175 |
+
of your accepting any such warranty or additional liability.
|
176 |
+
|
177 |
+
END OF TERMS AND CONDITIONS
|
178 |
+
|
179 |
+
APPENDIX: How to apply the Apache License to your work.
|
180 |
+
|
181 |
+
To apply the Apache License to your work, attach the following
|
182 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
183 |
+
replaced with your own identifying information. (Don't include
|
184 |
+
the brackets!) The text should be enclosed in the appropriate
|
185 |
+
comment syntax for the file format. We also recommend that a
|
186 |
+
file or class name and description of purpose be included on the
|
187 |
+
same "printed page" as the copyright notice for easier
|
188 |
+
identification within third-party archives.
|
189 |
+
|
190 |
+
Copyright [2025] [SDSC]
|
191 |
+
|
192 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
193 |
+
you may not use this file except in compliance with the License.
|
194 |
+
You may obtain a copy of the License at
|
195 |
+
|
196 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
197 |
+
|
198 |
+
Unless required by applicable law or agreed to in writing, software
|
199 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
200 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201 |
+
See the License for the specific language governing permissions and
|
202 |
+
limitations under the License.
|
README.md
CHANGED
@@ -1,20 +1,28 @@
|
|
1 |
# odtp-pyannote-whisper
|
2 |
|
3 |
-
|
4 |
-
|
5 |
-
Add here your badges:
|
6 |
-
[](http://localhost:8501/launch-component)
|
7 |
-
[]("")
|
8 |
|
9 |
> [!NOTE]
|
10 |
> This repository makes use of submodules. Therefore, when cloning it you need to include them.
|
11 |
>
|
12 |
> `git clone --recurse-submodules https://github.com/sdsc-ordes/odtp-pyannote-whisper`
|
13 |
|
14 |
-
This pipeline processes a `.wav`
|
15 |
|
16 |
Note: This application utilizes `pyannote.audio` and OpenAI's Whisper model. You must accept the terms of use on Hugging Face for the `pyannote/segmentation` and `pyannote/speaker-diarization` models before using this application.
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
## Table of Contents
|
19 |
|
20 |
- [Tools Information](#tools-information)
|
@@ -42,12 +50,12 @@ Note: This application utilizes `pyannote.audio` and OpenAI's Whisper model. You
|
|
42 |
|
43 |
## How to add this component to your ODTP instance
|
44 |
|
45 |
-
In order to add this component to your ODTP CLI, you can use. If you want to use the component directly, please refer to the docker section.
|
46 |
|
47 |
``` bash
|
48 |
odtp new odtp-component-entry \
|
49 |
--name odtp-pyannote-whisper \
|
50 |
-
--component-version v0.0
|
51 |
--repository https://github.com/sdsc-ordes/odtp-pyannote-whisper
|
52 |
```
|
53 |
|
@@ -92,14 +100,32 @@ Build the dockerfile.
|
|
92 |
docker build -t odtp-pyannote-whisper .
|
93 |
```
|
94 |
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
``` bash
|
98 |
docker run -it --rm \
|
99 |
-v {PATH_TO_YOUR_INPUT_VOLUME}:/odtp/odtp-input \
|
100 |
-v {PATH_TO_YOUR_OUTPUT_VOLUME}:/odtp/odtp-output \
|
101 |
-v {PATH_TO_YOUR_LOGS_VOLUME}:/odtp/odtp-logs \
|
102 |
-
--env-file .env
|
|
|
103 |
```
|
104 |
|
105 |
### Development Mode
|
@@ -128,24 +154,42 @@ docker run -it --rm \
|
|
128 |
--env-file .env odtp-pyannote-whisper
|
129 |
```
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
### Running in API Mode
|
132 |
|
133 |
-
To run the component in API mode and expose a port, use the following
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
``` bash
|
136 |
docker run -it --rm \
|
137 |
-
-
|
138 |
-
-v {PATH_TO_YOUR_OUTPUT_VOLUME}:/odtp/odtp-output \
|
139 |
-
-v {PATH_TO_YOUR_LOGS_VOLUME}:/odtp/odtp-logs \
|
140 |
-
-p {HOST_PORT}:7860 \
|
141 |
--env-file .env \
|
142 |
-
|
143 |
-
odtp-pyannote-whisper \
|
144 |
-
/odtp/odtp-app/gradio_app.py
|
145 |
```
|
146 |
|
147 |
-
|
|
|
|
|
148 |
|
149 |
-
|
|
|
150 |
|
151 |
This component has been created using the `odtp-component-template` `v0.5.0`.
|
|
|
|
|
|
1 |
# odtp-pyannote-whisper
|
2 |
|
3 |
+
[]("") [](https://huggingface.com/spaces/katospiegel/odtp-pyannote-whisper)
|
|
|
|
|
|
|
|
|
4 |
|
5 |
> [!NOTE]
|
6 |
> This repository makes use of submodules. Therefore, when cloning it you need to include them.
|
7 |
>
|
8 |
> `git clone --recurse-submodules https://github.com/sdsc-ordes/odtp-pyannote-whisper`
|
9 |
|
10 |
+
This pipeline processes a `.wav` or `mp4` media file by detecting the number of speakers present in the recording using `pyannote.audio`. For each detected speaker segment, it employs `OpenAI's Whisper model` to transcribe or translate the speech individually. This approach ensures accurate and speaker-specific transcriptions or translations, providing a clear understanding of who said what throughout the audio.
|
11 |
|
12 |
Note: This application utilizes `pyannote.audio` and OpenAI's Whisper model. You must accept the terms of use on Hugging Face for the `pyannote/segmentation` and `pyannote/speaker-diarization` models before using this application.
|
13 |
|
14 |
+
- [Speaker-Diarization](https://huggingface.co/pyannote/speaker-diarization-3.1)
|
15 |
+
- [Speaker-Segmentation](https://huggingface.co/pyannote/segmentation-3.0)
|
16 |
+
|
17 |
+
After accepting these terms and conditions for those models. You can obtain you HuggingFace API Key to allow the access to these models:
|
18 |
+
|
19 |
+
- [Hugging Face Access Keys](https://huggingface.co/settings/tokens)
|
20 |
+
|
21 |
+
This token should be provided to the component via the `ENV` variables or by the corresponding text field in the web app interface ([Here](https://huggingface.com/spaces/katospiegel/odtp-pyannote-whisper)).
|
22 |
+
|
23 |
+

|
24 |
+
|
25 |
+
|
26 |
## Table of Contents
|
27 |
|
28 |
- [Tools Information](#tools-information)
|
|
|
50 |
|
51 |
## How to add this component to your ODTP instance
|
52 |
|
53 |
+
This component can be run directly with Docker, however it is designed to be run with [ODTP](https://odtp-org.github.io/odtp-manuals/). In order to add this component to your ODTP CLI, you can use. If you want to use the component directly, please refer to the docker section.
|
54 |
|
55 |
``` bash
|
56 |
odtp new odtp-component-entry \
|
57 |
--name odtp-pyannote-whisper \
|
58 |
+
--component-version v0.1.0 \
|
59 |
--repository https://github.com/sdsc-ordes/odtp-pyannote-whisper
|
60 |
```
|
61 |
|
|
|
100 |
docker build -t odtp-pyannote-whisper .
|
101 |
```
|
102 |
|
103 |
+
Then create `.env` file similar to `.env.dist` and fill the variables values. Like on this example:
|
104 |
+
|
105 |
+
```
|
106 |
+
MODEL=base
|
107 |
+
HF_TOKEN=hf_xxxxxxxxxxx
|
108 |
+
TASK=transcribe
|
109 |
+
INPUT_FILE=HRC_20220328T0000.mp4
|
110 |
+
OUTPUT_FILE=HRC_20220328T0000
|
111 |
+
VERBOSE=TRUE
|
112 |
+
```
|
113 |
+
|
114 |
+
Then create 3 folders:
|
115 |
+
|
116 |
+
- `odtp-input`, where your input data should be located.
|
117 |
+
- `odtp-output`, where your output data will be stored.
|
118 |
+
- `odtp-logs`, where the logs will be shared.
|
119 |
+
|
120 |
+
After this, you can run the following command and the pipeline will execute.
|
121 |
|
122 |
``` bash
|
123 |
docker run -it --rm \
|
124 |
-v {PATH_TO_YOUR_INPUT_VOLUME}:/odtp/odtp-input \
|
125 |
-v {PATH_TO_YOUR_OUTPUT_VOLUME}:/odtp/odtp-output \
|
126 |
-v {PATH_TO_YOUR_LOGS_VOLUME}:/odtp/odtp-logs \
|
127 |
+
--env-file .env \
|
128 |
+
odtp-pyannote-whisper
|
129 |
```
|
130 |
|
131 |
### Development Mode
|
|
|
154 |
--env-file .env odtp-pyannote-whisper
|
155 |
```
|
156 |
|
157 |
+
On Windowss this is the command to execute.
|
158 |
+
|
159 |
+
``` powershell
|
160 |
+
docker run -it --rm `
|
161 |
+
--gpus all `
|
162 |
+
-v ${PWD}/odtp-input:/odtp/odtp-input `
|
163 |
+
-v ${PWD}/odtp-output:/odtp/odtp-output `
|
164 |
+
-v ${PWD}/odtp-logs:/odtp/odtp-logs `
|
165 |
+
--env-file .env odtp-pyannote-whisper
|
166 |
+
```
|
167 |
+
|
168 |
### Running in API Mode
|
169 |
|
170 |
+
To run the component in API mode and expose a port, you need to use the following environment variables:
|
171 |
+
|
172 |
+
```
|
173 |
+
ODTP_API_MODE=TRUE
|
174 |
+
ODTP_GRADIO_SHARE=FALSE #Only if you want to share the app via the gradio tunneling
|
175 |
+
```
|
176 |
+
|
177 |
+
After the configuration, you can run:
|
178 |
|
179 |
``` bash
|
180 |
docker run -it --rm \
|
181 |
+
-p 7860:7860 \
|
|
|
|
|
|
|
182 |
--env-file .env \
|
183 |
+
odtp-pyannote-whisper
|
|
|
|
|
184 |
```
|
185 |
|
186 |
+
And access to the web interface on `localhost:7860` in your browser.
|
187 |
+
|
188 |
+

|
189 |
|
190 |
+
|
191 |
+
## Credits and references
|
192 |
|
193 |
This component has been created using the `odtp-component-template` `v0.5.0`.
|
194 |
+
|
195 |
+
The development of this repository has been realized by SDSC.
|
app/add_annotation.py
CHANGED
@@ -1,17 +1,3 @@
|
|
1 |
-
# python3 addAnnotation.py /odtp/odtp-output/HRC_20160622T0000-transcription_original.json /odtp/odtp-input/HRC_20160622T0000-initial.json /odtp/odtp-output/HRC_20160622T0000.json --type audio_transcription --origin_channel original --id transcription_original
|
2 |
-
# python3 addAnnotation.py /odtp/odtp-output/HRC_20160622T0000-translation_original_english.json /odtp/odtp-output/HRC_20160622T0000.json /odtp/odtp-output/HRC_20160622T0000.json --type audio_translation --origin_channel original --id translation_original_english
|
3 |
-
|
4 |
-
|
5 |
-
# python3 addAnnotation.py /odtp/odtp-output/HRC_20220328T0000-transcription_original.json /odtp/odtp-input/HRC_20220328T0000-initial.json /odtp/odtp-output/HRC_20220328T0000.json --type audio_transcription --origin_channel original --id transcription_original
|
6 |
-
# python3 addAnnotation.py /odtp/odtp-output/HRC_20220328T0000-translation_original_english.json /odtp/odtp-output/HRC_20220328T0000.json /odtp/odtp-output/HRC_20220328T0000.json --type audio_translation --origin_channel original --id translation_original_english
|
7 |
-
|
8 |
-
# python3 addAnnotation.py /odtp/odtp-output/HRC_20220929T0000-transcription_original.json /odtp/odtp-input/HRC_20220929T0000-initial.json /odtp/odtp-output/HRC_20220929T0000.json --type audio_transcription --origin_channel original --id transcription_original
|
9 |
-
# python3 addAnnotation.py /odtp/odtp-output/HRC_20220929T0000-translation_original_english.json /odtp/odtp-output/HRC_20220929T0000.json /odtp/odtp-output/HRC_20220929T0000.json --type audio_translation --origin_channel original --id translation_original_english
|
10 |
-
|
11 |
-
# python3 add_annotation.py /odtp/odtp-output/HRC_20221010T1000-transcription_original.json /odtp/odtp-input/HRC_20221010T1000-initial.json /odtp/odtp-output/HRC_20221010T1000.json --type audio_transcription --origin_channel original --id transcription_original
|
12 |
-
# python3 add_annotation.py /odtp/odtp-output/HRC_20221010T1000-translation_original_english.json /odtp/odtp-output/HRC_20221010T1000.json /odtp/odtp-output/HRC_20221010T1000.json --type audio_translation --origin_channel original --id translation_original_english
|
13 |
-
|
14 |
-
|
15 |
import json
|
16 |
import argparse
|
17 |
from datetime import timedelta
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import json
|
2 |
import argparse
|
3 |
from datetime import timedelta
|
app/app.py
CHANGED
@@ -22,14 +22,15 @@ import json
|
|
22 |
from dataclasses import dataclass, asdict
|
23 |
from jsonschema import validate, ValidationError
|
24 |
|
25 |
-
import
|
26 |
-
import
|
27 |
|
28 |
from pydub import AudioSegment
|
29 |
import yt_dlp
|
30 |
|
31 |
from slugify import slugify
|
32 |
import uuid
|
|
|
33 |
|
34 |
|
35 |
|
@@ -470,7 +471,7 @@ def clip_audio(audio_file_path, sample_rate, start, end, output_path):
|
|
470 |
# Write the audio segment to the output path
|
471 |
sf.write(output_path, waveform[start_sample:end_sample], sr, format='WAV')
|
472 |
|
473 |
-
def convert_mpx_to_wav(file_path):
|
474 |
if file_path.lower().endswith('.mp3'):
|
475 |
# Load the MP3 file
|
476 |
audio = AudioSegment.from_mp3(file_path)
|
@@ -482,12 +483,12 @@ def convert_mpx_to_wav(file_path):
|
|
482 |
raise ValueError("Input file must be an MP3 or MP4 file")
|
483 |
|
484 |
# Define the output path
|
485 |
-
wav_file_path = os.path.splitext(file_path)[0] + '.wav'
|
486 |
|
487 |
# Export as WAV
|
488 |
-
audio.export(
|
489 |
|
490 |
-
return
|
491 |
|
492 |
|
493 |
def download_youtube_video(url, filename, output_path='/tmp'):
|
@@ -502,6 +503,8 @@ def download_youtube_video(url, filename, output_path='/tmp'):
|
|
502 |
}],
|
503 |
}
|
504 |
|
|
|
|
|
505 |
if not os.path.exists(output_path):
|
506 |
os.makedirs(output_path)
|
507 |
|
@@ -511,10 +514,11 @@ def download_youtube_video(url, filename, output_path='/tmp'):
|
|
511 |
print(output_file)
|
512 |
base, ext = os.path.splitext(output_file)
|
513 |
|
514 |
-
new_file = base + '.wav'
|
515 |
return new_file
|
516 |
|
517 |
import subprocess
|
|
|
518 |
|
519 |
def convert_video_to_wav(input_file, output_file):
|
520 |
"""
|
@@ -551,73 +555,31 @@ def convert_video_to_wav(input_file, output_file):
|
|
551 |
print(f"Error during conversion: {e}")
|
552 |
|
553 |
|
554 |
-
######################## Parallel
|
555 |
-
# import multiprocessing
|
556 |
-
# import tempfile
|
557 |
-
|
558 |
-
# def process_segment(segment, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options):
|
559 |
-
# start, end, speaker = segment
|
560 |
-
# clip_path = f"/tmp/speaker_{speaker}_start_{start:.1f}_end_{end:.1f}.wav"
|
561 |
-
# clip_audio(file_path, sample_rate, start, end, clip_path)
|
562 |
-
|
563 |
-
# result = asr_model.transcribe(start=start, end=end, options=whisper_options)
|
564 |
-
# language = result.get('language', args.language or 'unknown')
|
565 |
-
|
566 |
-
# if args.verbose:
|
567 |
-
# print(f"start={start:.1f}s stop={end:.1f}s lang={language} {speaker}")
|
568 |
-
|
569 |
-
# return {
|
570 |
-
# 'result': result,
|
571 |
-
# 'speaker': speaker,
|
572 |
-
# 'start': start,
|
573 |
-
# 'language': language
|
574 |
-
# }
|
575 |
-
|
576 |
-
# def chunkify(lst, n):
|
577 |
-
# for i in range(0, len(lst), n):
|
578 |
-
# yield lst[i:i + n]
|
579 |
-
|
580 |
-
# def process_chunk(chunk, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options):
|
581 |
-
# results = []
|
582 |
-
# for segment in chunk:
|
583 |
-
# result = process_segment(segment, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options)
|
584 |
-
# results.append(result)
|
585 |
-
|
586 |
-
# temp_file = tempfile.mktemp(suffix='.json')
|
587 |
-
# with open(temp_file, 'w') as f:
|
588 |
-
# json.dump(results, f)
|
589 |
-
|
590 |
-
# return temp_file
|
591 |
-
|
592 |
-
########################
|
593 |
-
|
594 |
-
|
595 |
def main(args):
|
596 |
# TODO: Take out the file_path from ODTP here
|
597 |
-
if args.input_file.startswith('http://') or args.input_file.startswith('https://'):
|
598 |
-
file_path = download_youtube_video(args.input_file, filename=os.path.basename(args.output_file) , output_path=os.path.dirname(args.output_file))
|
599 |
base_slug = slugify(file_path, separator='_')
|
600 |
-
#file_path = convert_mpx_to_wav(file_path)
|
601 |
elif args.input_file.lower().endswith('.mp3'):
|
602 |
file_path = convert_mpx_to_wav(args.input_file)
|
603 |
-
|
604 |
elif args.input_file.lower().endswith('.wav'):
|
605 |
file_path = args.input_file
|
606 |
-
|
607 |
elif args.input_file.lower().endswith('.mp4'):
|
608 |
-
file_path =
|
609 |
-
|
610 |
elif args.input_file.lower().endswith('.rm'):
|
611 |
-
file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.rm', '.wav')
|
612 |
convert_video_to_wav(args.input_file, file_path)
|
613 |
elif args.input_file.lower().endswith('.f4v'):
|
614 |
-
file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.f4v', '.wav')
|
615 |
convert_video_to_wav(args.input_file, file_path)
|
616 |
elif args.input_file.lower().endswith('.mkv'):
|
617 |
-
file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.mkv', '.wav')
|
618 |
convert_video_to_wav(args.input_file, file_path)
|
619 |
else:
|
620 |
-
raise ValueError("Input file must be an MP3, WAV, RM, F4V, MKV, Youtube Link, or MP4 file")
|
621 |
|
622 |
|
623 |
diarization, _, sample_rate = diarize_audio(args.hf_token, file_path)
|
@@ -691,39 +653,21 @@ def main(args):
|
|
691 |
writer_json(generate_segments(result['segments'], speaker, language), args.output_json_file)
|
692 |
|
693 |
writer_json.finalize()
|
694 |
-
# Parallel testing
|
695 |
-
# chunk_size = 2 #args.chunk_size # Assume chunk_size is passed as an argument
|
696 |
-
# temp_files = []
|
697 |
-
|
698 |
-
# with multiprocessing.Pool() as pool:
|
699 |
-
# chunks = list(chunkify(grouped_segments, chunk_size))
|
700 |
-
# results = [pool.apply_async(process_chunk, (chunk, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options)) for chunk in chunks]
|
701 |
-
|
702 |
-
# for result in results:
|
703 |
-
# temp_file = result.get()
|
704 |
-
# temp_files.append(temp_file)
|
705 |
-
|
706 |
-
# for temp_file in temp_files:
|
707 |
-
# with open(temp_file, 'r') as f:
|
708 |
-
# results = json.load(f)
|
709 |
-
# for result in results:
|
710 |
-
# writer(result['result'], args.output_file, result['speaker'], result['start'], writer_options)
|
711 |
-
# writer_json(generate_segments(result['result']['segments'], result['speaker'], result['language']), args.output_json_file)
|
712 |
-
# os.remove(temp_file)
|
713 |
|
714 |
# If you want to validate JSON, paragraphs, PDF creation, etc.
|
715 |
-
|
716 |
args.output_json_file,
|
717 |
args.output_paragraphs_json_file,
|
718 |
3
|
719 |
)
|
720 |
-
|
721 |
args.output_paragraphs_json_file,
|
722 |
args.output_md_file,
|
723 |
args.output_pdf_file
|
724 |
)
|
725 |
|
726 |
|
|
|
727 |
if __name__ == '__main__':
|
728 |
# Multiprocessing requires spawn when working with CUDA
|
729 |
#multiprocessing.set_start_method('spawn')
|
|
|
22 |
from dataclasses import dataclass, asdict
|
23 |
from jsonschema import validate, ValidationError
|
24 |
|
25 |
+
import create_pdf
|
26 |
+
import paragraphs_creator
|
27 |
|
28 |
from pydub import AudioSegment
|
29 |
import yt_dlp
|
30 |
|
31 |
from slugify import slugify
|
32 |
import uuid
|
33 |
+
import yaml
|
34 |
|
35 |
|
36 |
|
|
|
471 |
# Write the audio segment to the output path
|
472 |
sf.write(output_path, waveform[start_sample:end_sample], sr, format='WAV')
|
473 |
|
474 |
+
def convert_mpx_to_wav(file_path, output_path):
|
475 |
if file_path.lower().endswith('.mp3'):
|
476 |
# Load the MP3 file
|
477 |
audio = AudioSegment.from_mp3(file_path)
|
|
|
483 |
raise ValueError("Input file must be an MP3 or MP4 file")
|
484 |
|
485 |
# Define the output path
|
486 |
+
#wav_file_path = os.path.splitext(file_path)[0] + '.wav'
|
487 |
|
488 |
# Export as WAV
|
489 |
+
audio.export(output_path, format='wav')
|
490 |
|
491 |
+
return output_path
|
492 |
|
493 |
|
494 |
def download_youtube_video(url, filename, output_path='/tmp'):
|
|
|
503 |
}],
|
504 |
}
|
505 |
|
506 |
+
|
507 |
+
|
508 |
if not os.path.exists(output_path):
|
509 |
os.makedirs(output_path)
|
510 |
|
|
|
514 |
print(output_file)
|
515 |
base, ext = os.path.splitext(output_file)
|
516 |
|
517 |
+
new_file = base + '-original.wav'
|
518 |
return new_file
|
519 |
|
520 |
import subprocess
|
521 |
+
import shutil
|
522 |
|
523 |
def convert_video_to_wav(input_file, output_file):
|
524 |
"""
|
|
|
555 |
print(f"Error during conversion: {e}")
|
556 |
|
557 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
558 |
def main(args):
|
559 |
# TODO: Take out the file_path from ODTP here
|
560 |
+
if args.input_file.startswith('/odtp/odtp-input/http://') or args.input_file.startswith('/odtp/odtp-input/https://'):
|
561 |
+
file_path = download_youtube_video(args.input_file.replace("/odtp/odtp-input/",""), filename=os.path.basename(args.output_file) , output_path=os.path.dirname(args.output_file))
|
562 |
base_slug = slugify(file_path, separator='_')
|
|
|
563 |
elif args.input_file.lower().endswith('.mp3'):
|
564 |
file_path = convert_mpx_to_wav(args.input_file)
|
565 |
+
shutil.copy(file_path, os.path.join("/odtp/odtp-output", os.path.basename(file_path).replace('.wav', '-original.mp3')))
|
566 |
elif args.input_file.lower().endswith('.wav'):
|
567 |
file_path = args.input_file
|
568 |
+
shutil.copy(file_path, os.path.join("/odtp/odtp-output", os.path.basename(file_path).replace('.wav', '-original.wav')))
|
569 |
elif args.input_file.lower().endswith('.mp4'):
|
570 |
+
file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.mp4', '-original.wav')
|
571 |
+
convert_mpx_to_wav(args.input_file, file_path)
|
572 |
elif args.input_file.lower().endswith('.rm'):
|
573 |
+
file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.rm', '-original.wav')
|
574 |
convert_video_to_wav(args.input_file, file_path)
|
575 |
elif args.input_file.lower().endswith('.f4v'):
|
576 |
+
file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.f4v', '-original.wav')
|
577 |
convert_video_to_wav(args.input_file, file_path)
|
578 |
elif args.input_file.lower().endswith('.mkv'):
|
579 |
+
file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.mkv', '-original.wav')
|
580 |
convert_video_to_wav(args.input_file, file_path)
|
581 |
else:
|
582 |
+
raise ValueError(f"Input file must be an MP3, WAV, RM, F4V, MKV, Youtube Link, or MP4 file. Input file: {args.input_file}")
|
583 |
|
584 |
|
585 |
diarization, _, sample_rate = diarize_audio(args.hf_token, file_path)
|
|
|
653 |
writer_json(generate_segments(result['segments'], speaker, language), args.output_json_file)
|
654 |
|
655 |
writer_json.finalize()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
656 |
|
657 |
# If you want to validate JSON, paragraphs, PDF creation, etc.
|
658 |
+
paragraphs_creator.process_paragraphs(
|
659 |
args.output_json_file,
|
660 |
args.output_paragraphs_json_file,
|
661 |
3
|
662 |
)
|
663 |
+
create_pdf.convert_json_to_pdf(
|
664 |
args.output_paragraphs_json_file,
|
665 |
args.output_md_file,
|
666 |
args.output_pdf_file
|
667 |
)
|
668 |
|
669 |
|
670 |
+
|
671 |
if __name__ == '__main__':
|
672 |
# Multiprocessing requires spawn when working with CUDA
|
673 |
#multiprocessing.set_start_method('spawn')
|
app/app.sh
CHANGED
@@ -1,30 +1,72 @@
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
--
|
8 |
-
|
9 |
-
|
10 |
-
--
|
11 |
-
--
|
12 |
-
--output-
|
13 |
-
--output-
|
14 |
-
--output-
|
15 |
-
--output-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
+
if [ -n "$FULL_PIPELINE" ]; then
|
4 |
+
|
5 |
+
echo "RUNNING TRANSCRIPTION AND EN TRANSLATION PIPELINE"
|
6 |
+
python3 /odtp/odtp-app/app.py \
|
7 |
+
--model $MODEL \
|
8 |
+
$( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
|
9 |
+
--hf-token $HF_TOKEN \
|
10 |
+
--task transcribe \
|
11 |
+
--input-file /odtp/odtp-input/$INPUT_FILE \
|
12 |
+
--output-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original.srt \
|
13 |
+
--output-json-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original.json \
|
14 |
+
--output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}-transcription_original_paragraphs.json \
|
15 |
+
--output-md-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original_original.md \
|
16 |
+
--output-pdf-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original_original.pdf \
|
17 |
+
$( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
|
18 |
+
|
19 |
+
python3 /odtp/odtp-app/app.py \
|
20 |
+
--model $MODEL \
|
21 |
+
$( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
|
22 |
+
--hf-token $HF_TOKEN \
|
23 |
+
--task translate \
|
24 |
+
--language en \
|
25 |
+
--input-file /odtp/odtp-input/$INPUT_FILE \
|
26 |
+
--output-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.srt \
|
27 |
+
--output-json-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.json \
|
28 |
+
--output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}-translation_original_english_paragraphs.json \
|
29 |
+
--output-md-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.md \
|
30 |
+
--output-pdf-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.pdf \
|
31 |
+
$( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
|
32 |
+
|
33 |
+
echo "Adding annotations"
|
34 |
+
python3 /odtp/odtp-app/add_annotation.py \
|
35 |
+
/odtp/odtp-output/$OUTPUT_FILE-transcription_original.json \
|
36 |
+
/odtp/odtp-input/$INPUT_METADATA_FILE \
|
37 |
+
/odtp/odtp-output/$OUTPUT_FILE.json \
|
38 |
+
--type audio_transcription \
|
39 |
+
--origin_channel original \
|
40 |
+
--id transcription_original
|
41 |
+
|
42 |
+
python3 /odtp/odtp-app/add_annotation.py \
|
43 |
+
/odtp/odtp-output/$OUTPUT_FILE-translation_original_english.json \
|
44 |
+
/odtp/odtp-output/$OUTPUT_FILE.json \
|
45 |
+
/odtp/odtp-output/$OUTPUT_FILE.json \
|
46 |
+
--type audio_translation \
|
47 |
+
--origin_channel original \
|
48 |
+
--id translation_original_english
|
49 |
+
|
50 |
+
echo "Generating yml file"
|
51 |
+
python3 /odtp/odtp-app/project_metadata_export.py /odtp/odtp-output/
|
52 |
+
|
53 |
+
echo "Uploading to S3"
|
54 |
+
#python3 /odtp/odtp-app/s3_upload.py
|
55 |
+
#TBD
|
56 |
+
|
57 |
+
else
|
58 |
+
python3 /odtp/odtp-app/app.py \
|
59 |
+
--model $MODEL \
|
60 |
+
$( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
|
61 |
+
--hf-token $HF_TOKEN \
|
62 |
+
--task $TASK \
|
63 |
+
$( [ -n "$LANGUAGE" ] && echo "--language $LANGUAGE" ) \
|
64 |
+
--input-file /odtp/odtp-input/$INPUT_FILE \
|
65 |
+
--output-file /odtp/odtp-output/$OUTPUT_FILE.srt \
|
66 |
+
--output-json-file /odtp/odtp-output/$OUTPUT_FILE.json \
|
67 |
+
--output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}_paragraphs.json \
|
68 |
+
--output-md-file /odtp/odtp-output/$OUTPUT_FILE.md \
|
69 |
+
--output-pdf-file /odtp/odtp-output/$OUTPUT_FILE.pdf \
|
70 |
+
$( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
|
71 |
+
fi
|
72 |
+
|
app/{createpdf.py → create_pdf.py}
RENAMED
File without changes
|
app/{paragraphsCreator.py → paragraphs_creator.py}
RENAMED
File without changes
|
app/project_metadata_export.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import argparse
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
import yaml
|
6 |
+
|
7 |
+
def parse_basename_and_date(folder):
|
8 |
+
"""
|
9 |
+
Searches the folder for a file matching the pattern 'HRC_YYYYMMDDT[HHMM]'.
|
10 |
+
Returns the base name and a formatted session date (e.g., "2016 06 22 00:00").
|
11 |
+
"""
|
12 |
+
pattern = re.compile(r"^(HRC_\d{8}T\d{4})")
|
13 |
+
for filename in os.listdir(folder):
|
14 |
+
match = pattern.match(filename)
|
15 |
+
if match:
|
16 |
+
base_name = match.group(1)
|
17 |
+
# Extract date and time parts from the base name
|
18 |
+
dt_match = re.match(r"HRC_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})", base_name)
|
19 |
+
if dt_match:
|
20 |
+
year, month, day, hour, minute = dt_match.groups()
|
21 |
+
session_date = f"{year} {month} {day} {hour}:{minute}"
|
22 |
+
return base_name, session_date
|
23 |
+
return None, None
|
24 |
+
|
25 |
+
def check_video_file(folder, base_name):
|
26 |
+
"""
|
27 |
+
Checks if an MP4 file with the given base name exists in the folder.
|
28 |
+
"""
|
29 |
+
video_filename = f"{base_name}.mp4"
|
30 |
+
return video_filename in os.listdir(folder)
|
31 |
+
|
32 |
+
def generate_metadata(base_name, session_date, include_video):
|
33 |
+
"""
|
34 |
+
Builds a metadata dictionary containing file entries based on the base name,
|
35 |
+
session date, and whether a video file is present.
|
36 |
+
"""
|
37 |
+
metadata = {
|
38 |
+
"files": [
|
39 |
+
{
|
40 |
+
"name": f"{base_name}.json",
|
41 |
+
"type": "json",
|
42 |
+
"description": f"JSON file containing metadata transcription and translation from the {session_date} session"
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"name": f"{base_name}-files.yml",
|
46 |
+
"type": "yml",
|
47 |
+
"description": f"YAML file containing metadata of the files from the {session_date} session"
|
48 |
+
}
|
49 |
+
]
|
50 |
+
}
|
51 |
+
|
52 |
+
if include_video:
|
53 |
+
metadata["files"].append({
|
54 |
+
"name": f"{base_name}.mp4",
|
55 |
+
"type": "mp4",
|
56 |
+
"description": f"MP4 video file from the {session_date} session"
|
57 |
+
})
|
58 |
+
|
59 |
+
metadata["files"].extend([
|
60 |
+
{
|
61 |
+
"name": f"{base_name}-original.wav",
|
62 |
+
"type": "wav",
|
63 |
+
"description": f"Original audio file from the {session_date} session"
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"name": f"{base_name}-transcription_original.srt",
|
67 |
+
"type": "srt",
|
68 |
+
"description": f"Transcription file in SRT format from the original audio of the {session_date} session"
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"name": f"{base_name}-transcription_original.pdf",
|
72 |
+
"type": "pdf",
|
73 |
+
"description": f"PDF file containing the transcription from the original audio of the {session_date} session"
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": f"{base_name}-translation_original_english.srt",
|
77 |
+
"type": "srt",
|
78 |
+
"description": f"Translation file in SRT format to English from the original audio of the {session_date} session"
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"name": f"{base_name}-translation_original_english.pdf",
|
82 |
+
"type": "pdf",
|
83 |
+
"description": f"PDF file containing the English translation from the original audio of the {session_date} session"
|
84 |
+
}
|
85 |
+
])
|
86 |
+
|
87 |
+
return metadata
|
88 |
+
|
89 |
+
def write_yaml_file(metadata, output_file):
|
90 |
+
"""
|
91 |
+
Writes the metadata dictionary to a YAML file.
|
92 |
+
"""
|
93 |
+
with open(output_file, "w") as f:
|
94 |
+
yaml.dump(metadata, f, sort_keys=False, default_flow_style=True)
|
95 |
+
print(f"Metadata YAML file written to {output_file}")
|
96 |
+
|
97 |
+
def main():
|
98 |
+
parser = argparse.ArgumentParser(
|
99 |
+
description="Generate YAML metadata for session files in a folder."
|
100 |
+
)
|
101 |
+
parser.add_argument("folder", help="Path to the folder containing the session files.")
|
102 |
+
args = parser.parse_args()
|
103 |
+
|
104 |
+
folder = args.folder
|
105 |
+
if not os.path.isdir(folder):
|
106 |
+
print(f"Error: {folder} is not a valid directory.")
|
107 |
+
return
|
108 |
+
|
109 |
+
base_name, session_date = parse_basename_and_date(folder)
|
110 |
+
if not base_name:
|
111 |
+
print("Error: Could not find a file matching the expected pattern 'HRC_YYYYMMDDT[HHMM]' in the folder.")
|
112 |
+
return
|
113 |
+
|
114 |
+
include_video = check_video_file(folder, base_name)
|
115 |
+
metadata = generate_metadata(base_name, session_date, include_video)
|
116 |
+
|
117 |
+
# Output file is always in the same folder and named as <base_name>-files.yml
|
118 |
+
output_file = os.path.join(folder, f"{base_name}-files.yml")
|
119 |
+
write_yaml_file(metadata, output_file)
|
120 |
+
|
121 |
+
if __name__ == "__main__":
|
122 |
+
main()
|
app/s3_upload.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import argparse
|
5 |
+
import boto3
|
6 |
+
from botocore.exceptions import NoCredentialsError, ClientError
|
7 |
+
|
8 |
+
def parse_basename(folder):
|
9 |
+
"""
|
10 |
+
Scans the folder for a file matching the pattern 'HRC_YYYYMMDDT[HHMM]'
|
11 |
+
and returns the base name.
|
12 |
+
"""
|
13 |
+
pattern = re.compile(r"^(HRC_\d{8}T\d{4})")
|
14 |
+
for filename in os.listdir(folder):
|
15 |
+
match = pattern.match(filename)
|
16 |
+
if match:
|
17 |
+
return match.group(1)
|
18 |
+
return None
|
19 |
+
|
20 |
+
def upload_files_to_s3(folder, bucket, base_name, region):
|
21 |
+
"""
|
22 |
+
Uploads all files in the folder that start with the base_name to the specified S3 bucket,
|
23 |
+
placing them under a folder (key prefix) named after the base_name.
|
24 |
+
"""
|
25 |
+
s3_key = os.environ.get("S3_MEDIA_KEY")
|
26 |
+
s3_secret = os.environ.get("S3_MEDIA_SECRET")
|
27 |
+
|
28 |
+
s3_client = boto3.client('s3', aws_access_key_id=s3_key, aws_secret_access_key=s3_secret, region_name=region)
|
29 |
+
|
30 |
+
|
31 |
+
# Gather all files that start with the base name
|
32 |
+
files_to_upload = [f for f in os.listdir(folder) if f.startswith(base_name)]
|
33 |
+
if not files_to_upload:
|
34 |
+
print(f"No files starting with '{base_name}' found in {folder}")
|
35 |
+
return
|
36 |
+
|
37 |
+
for file in files_to_upload:
|
38 |
+
file_path = os.path.join(folder, file)
|
39 |
+
s3_key = f"{base_name}/{file}" # Create a folder in S3 named after the base name
|
40 |
+
try:
|
41 |
+
s3_client.upload_file(file_path, bucket, s3_key)
|
42 |
+
print(f"Uploaded '{file}' to s3://{bucket}/{s3_key}")
|
43 |
+
except (NoCredentialsError, ClientError) as e:
|
44 |
+
print(f"Failed to upload '{file}': {e}")
|
45 |
+
|
46 |
+
def main():
|
47 |
+
parser = argparse.ArgumentParser(
|
48 |
+
description="Upload session files to an S3 bucket under a folder named by the base name."
|
49 |
+
)
|
50 |
+
parser.add_argument("folder", help="Path to the folder containing the session files")
|
51 |
+
args = parser.parse_args()
|
52 |
+
|
53 |
+
folder = args.folder
|
54 |
+
if not os.path.isdir(folder):
|
55 |
+
print(f"Error: '{folder}' is not a valid directory.")
|
56 |
+
return
|
57 |
+
|
58 |
+
# Retrieve environment variables for the bucket and datacenter (region)
|
59 |
+
bucket = os.environ.get("S3_MEDIA_BUCKET")
|
60 |
+
if not bucket:
|
61 |
+
print("Error: BUCKET_LINK environment variable not set.")
|
62 |
+
return
|
63 |
+
|
64 |
+
region = os.environ.get("S3_MEDIA_REGION", "us-east-1")
|
65 |
+
|
66 |
+
base_name = parse_basename(folder)
|
67 |
+
if not base_name:
|
68 |
+
print("Error: Could not find a file matching the expected pattern 'HRC_YYYYMMDDT[HHMM]' in the folder.")
|
69 |
+
return
|
70 |
+
|
71 |
+
upload_files_to_s3(folder, bucket, base_name, region)
|
72 |
+
|
73 |
+
if __name__ == "__main__":
|
74 |
+
main()
|
75 |
+
|
assets/screenshot.png
ADDED
![]() |
odtp.yml
CHANGED
@@ -3,8 +3,8 @@ schema-version: "v0.5.0"
|
|
3 |
|
4 |
# Component Information
|
5 |
component-name: odtp-pyannote-whisper
|
6 |
-
component-version: "v0.0
|
7 |
-
component-license:
|
8 |
component-type: ephemeral
|
9 |
component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization
|
10 |
component-authors:
|
@@ -123,6 +123,30 @@ data-outputs:
|
|
123 |
description: Transcription/translation output in JSON format with speaker diarization
|
124 |
naming-convention: null
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
# Validation Schemas (Future Development)
|
127 |
schema-input: null
|
128 |
schema-output: null
|
|
|
3 |
|
4 |
# Component Information
|
5 |
component-name: odtp-pyannote-whisper
|
6 |
+
component-version: "v0.1.0"
|
7 |
+
component-license: Apache 2.0
|
8 |
component-type: ephemeral
|
9 |
component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization
|
10 |
component-authors:
|
|
|
123 |
description: Transcription/translation output in JSON format with speaker diarization
|
124 |
naming-convention: null
|
125 |
|
126 |
+
- name: OUTPUT_AUDIO_FILE
|
127 |
+
type: .wav
|
128 |
+
path: /odtp/odtp-output
|
129 |
+
description: Audio in wav format
|
130 |
+
naming-convention: null
|
131 |
+
|
132 |
+
- name: OUTPUT_PARAGRAPHS_FILE
|
133 |
+
type: .json
|
134 |
+
path: /odtp/odtp-output
|
135 |
+
description: Markdown file with the paragraphs containing speaker diarization and transcription/translation
|
136 |
+
naming-convention: null
|
137 |
+
|
138 |
+
- name: OUTPUT_MD_FILE
|
139 |
+
type: .md
|
140 |
+
path: /odtp/odtp-output
|
141 |
+
description: Markdown file with the speaker diarization and transcription/translation
|
142 |
+
naming-convention: null
|
143 |
+
|
144 |
+
- name: OUTPUT_PDF_FILE
|
145 |
+
type: .pdf
|
146 |
+
path: /odtp/odtp-output
|
147 |
+
description: PDF file with the speaker diarization and transcription/translation
|
148 |
+
naming-convention: null
|
149 |
+
|
150 |
# Validation Schemas (Future Development)
|
151 |
schema-input: null
|
152 |
schema-output: null
|
requirements.txt
CHANGED
@@ -10,5 +10,7 @@ gradio==5.5.0
|
|
10 |
numpy==1.24.4
|
11 |
md2pdf==1.0.1
|
12 |
transformers==4.48.0
|
13 |
-
yt-dlp
|
14 |
-
python-slugify
|
|
|
|
|
|
10 |
numpy==1.24.4
|
11 |
md2pdf==1.0.1
|
12 |
transformers==4.48.0
|
13 |
+
yt-dlp==2025.1.26
|
14 |
+
python-slugify
|
15 |
+
pyyaml
|
16 |
+
boto3
|