Spaces:

katospiegel
/

odtp-pyannote-whisper

Running

App Files Files Community

katospiegel commited on Feb 21

Commit

c72eec6

1 Parent(s): 736b9fb

feat: v0.1.0

Browse files

Files changed (17) hide show

.env.dist +14 -5
.github/workflows/multiplatform_docker_build.yml +57 -0
.github/workflows/multiplatform_docker_build_dockerhub.yml +56 -0
.github/workflows/push_to_hf.yml +20 -0
Dockerfile +31 -19
LICENSE +197 -10
README.md +64 -20
app/add_annotation.py +0 -14
app/app.py +24 -80
app/app.sh +70 -28
app/{createpdf.py → create_pdf.py} +0 -0
app/{paragraphsCreator.py → paragraphs_creator.py} +0 -0
app/project_metadata_export.py +122 -0
app/s3_upload.py +75 -0
assets/screenshot.png +0 -0
odtp.yml +26 -2
requirements.txt +4 -2

.env.dist CHANGED Viewed

@@ -5,6 +5,19 @@ TASK=
 LANGUAGE=
 INPUT_FILE=
 OUTPUT_FILE=
 # ODTP ENV VARIABLES TO CONNECT
 ODTP_MONGO_SERVER=
@@ -19,8 +32,4 @@ ODTP_DIGITAL_TWIN=
 ODTP_EXCUTION=
 ODTP_STEP=
 ODTP_COMPONENT=
-ODTP_COMPONENT_VERSION=
-#ODTP_API_MODE=TRUE
-#ODTP_GRADIO_SHARE=TRUE
-TODO: User and password

 LANGUAGE=
 INPUT_FILE=
 OUTPUT_FILE=
+QUANTIZE=
+# VARIABLES RELATED TO THE FULL PIPELINE
+FULL_PIPELINE=
+INPUT_METADATA_FILE=
+S3_MEDIA_BUCKET=
+S3_MEDIA_REGION=
+S3_MEDIA_SECRET=
+S3_MEDIA_KEY=
+# ODTP ENV VARIABLES FOR API MODE
+ODTP_API_MODE=
+ODTP_GRADIO_SHARE=
 # ODTP ENV VARIABLES TO CONNECT
 ODTP_MONGO_SERVER=
 ODTP_EXCUTION=
 ODTP_STEP=
 ODTP_COMPONENT=
+ODTP_COMPONENT_VERSION=

.github/workflows/multiplatform_docker_build.yml ADDED Viewed

	@@ -0,0 +1,57 @@

+name: Multi-Platform Docker Build
+on:
+  workflow_dispatch:
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    steps:
+      # Step 1: Check out the repository and submodules
+      - name: Check out code
+        uses: actions/checkout@v3
+        with:
+          submodules: true  # Fetch submodules
+          fetch-depth: 0    # Ensure the full history is fetched
+      # Step 2: Set up Docker Buildx
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      # Step 3: Install yq
+      - name: Install yq
+        run: |
+          sudo apt-get update && sudo apt-get install -y wget
+          sudo wget https://github.com/mikefarah/yq/releases/download/v4.35.1/yq_linux_amd64 -O /usr/bin/yq
+          sudo chmod +x /usr/bin/yq
+      # Step 4: Extract component-version and component-name from odtp.yml
+      - name: Extract component-version and component-name
+        id: extract_info
+        run: |
+          VERSION=$(yq e '.component-version' odtp.yml)
+          NAME=$(yq e '.component-name' odtp.yml)
+          echo "VERSION=${VERSION}"
+          echo "NAME=${NAME}"
+          echo "COMPONENT_VERSION=${VERSION}" >> $GITHUB_ENV
+          echo "COMPONENT_NAME=${NAME}" >> $GITHUB_ENV
+      # Step 5: Log in to GitHub Container Registry
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      # Step 6: Build and push Docker image for multiple platforms
+      - name: Build and push Docker image
+        run: |
+          IMAGE_NAME=ghcr.io/${{ github.repository }}/${{ env.COMPONENT_NAME }}
+          docker buildx build \
+            --platform linux/amd64,linux/arm64 \
+            --build-arg COMPONENT_VERSION=${{ env.COMPONENT_VERSION }} \
+            -t $IMAGE_NAME:${{ env.COMPONENT_VERSION }} \
+            -t $IMAGE_NAME:latest \
+            --push .

.github/workflows/multiplatform_docker_build_dockerhub.yml ADDED Viewed

	@@ -0,0 +1,56 @@

+name: Multi-Platform Docker Build for Dockerhub
+on:
+  workflow_dispatch:
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    steps:
+      # Step 1: Check out the repository and submodules
+      - name: Check out code
+        uses: actions/checkout@v3
+        with:
+          submodules: true  # Fetch submodules
+          fetch-depth: 0    # Ensure the full history is fetched
+      # Step 2: Set up Docker Buildx
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      # Step 3: Install yq
+      - name: Install yq
+        run: |
+          sudo apt-get update && sudo apt-get install -y wget
+          sudo wget https://github.com/mikefarah/yq/releases/download/v4.35.1/yq_linux_amd64 -O /usr/bin/yq
+          sudo chmod +x /usr/bin/yq
+      # Step 4: Extract component-version and component-name from odtp.yml
+      - name: Extract component-version and component-name
+        id: extract_info
+        run: |
+          VERSION=$(yq e '.component-version' odtp.yml)
+          NAME=$(yq e '.component-name' odtp.yml)
+          echo "VERSION=${VERSION}"
+          echo "NAME=${NAME}"
+          echo "COMPONENT_VERSION=${VERSION}" >> $GITHUB_ENV
+          echo "COMPONENT_NAME=${NAME}" >> $GITHUB_ENV
+      # Step 5: Log in to Docker Hub
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+      # Step 6: Build and push Docker image for multiple platforms
+      - name: Build and push Docker image
+        run: |
+          IMAGE_NAME=${{ secrets.DOCKER_USERNAME }}/${{ env.COMPONENT_NAME }}
+          docker buildx build \
+            --platform linux/amd64,linux/arm64 \
+            --build-arg COMPONENT_VERSION=${{ env.COMPONENT_VERSION }} \
+            -t $IMAGE_NAME:${{ env.COMPONENT_VERSION }} \
+            -t $IMAGE_NAME:latest \
+            --push .

.github/workflows/push_to_hf.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_USERNAME: ${{secrets.HF_USERNAME}}
+        run: git push --force https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/katospiegel/odtp-pyannote-whisper main

Dockerfile CHANGED Viewed

@@ -1,16 +1,25 @@
 FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
-# Set environment variable to avoid interactive prompts
-ENV DEBIAN_FRONTEND=noninteractive
-# Weasyprint is necessary for pdf printing
-RUN apt-get update && apt-get install -y apt-utils weasyprint
 RUN apt-get install -y python3.11 python3.11-venv python3-pip
-COPY odtp-component-client/requirements.txt /tmp/odtp.requirements.txt
-RUN pip install -r /tmp/odtp.requirements.txt
 #######################################################################
 # PLEASE INSTALL HERE ALL SYSTEM DEPENDENCIES RELATED TO YOUR TOOL
@@ -23,7 +32,7 @@ RUN pip install -r /tmp/requirements.txt
 # Dependencies
 RUN apt-get update && \
-    apt-get install -y zip git && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
@@ -31,6 +40,8 @@ RUN apt-get update && \
 COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffmpeg /usr/local/bin/
 COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
 ######################################################################
 # ODTP COMPONENT CONFIGURATION.
@@ -41,18 +52,12 @@ COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
 # ODTP Preparation
 ##################################################
-RUN mkdir /odtp \
-    /odtp/odtp-config \
-    /odtp/odtp-app \
-    /odtp/odtp-component-client \
-    /odtp/odtp-logs \
-    /odtp/odtp-input \
-    /odtp/odtp-workdir \
-    /odtp/odtp-output
-# This last 2 folders are specific from odtp-eqasim
-RUN mkdir /odtp/odtp-workdir/cache \
-    /odtp/odtp-workdir/output
 # This copy all the information for running the ODTP component
 COPY odtp.yml /odtp/odtp-config/odtp.yml
@@ -66,8 +71,15 @@ WORKDIR /odtp
 # Fix for end of the line issue on Windows
 ##################################################
 # Fix for end of the line issue on Windows. Avoid error when building on windows
 RUN find /odtp -type f -iname "*.sh" -exec sed -i 's/\r$//' {} \;
 ENTRYPOINT ["bash", "/odtp/odtp-component-client/startup.sh"]

 FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
+RUN apt-get update && apt-get install -y apt-utils
 RUN apt-get install -y python3.11 python3.11-venv python3-pip
+# Create directories and set permissions before switching to the non-root user
+RUN mkdir -p /odtp/odtp-tmp \
+    /odtp \
+    /odtp/odtp-config \
+    /odtp/odtp-app \
+    /odtp/odtp-component-client \
+    /odtp/odtp-logs \
+    /odtp/odtp-input \
+    /odtp/odtp-workdir \
+    /odtp/odtp-output \
+    /home/user && \
+    chown -R 1000:1000 /odtp /home/user
+COPY odtp-component-client/requirements.txt /odtp/odtp-tmp/odtp.requirements.txt
+RUN pip install -r /odtp/odtp-tmp/odtp.requirements.txt
 #######################################################################
 # PLEASE INSTALL HERE ALL SYSTEM DEPENDENCIES RELATED TO YOUR TOOL
 # Dependencies
 RUN apt-get update && \
+    apt-get install -y zip git libglib2.0-0 libpango1.0-0 && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffmpeg /usr/local/bin/
 COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
+# Adjust permissions so user 1000 can access /usr/local/bin
+RUN chown -R 1000:1000 /usr/local/bin/
 ######################################################################
 # ODTP COMPONENT CONFIGURATION.
 # ODTP Preparation
 ##################################################
+# Switch to the "user" user
+USER 1000
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
 # This copy all the information for running the ODTP component
 COPY odtp.yml /odtp/odtp-config/odtp.yml
 # Fix for end of the line issue on Windows
 ##################################################
+# Switch back to root user to run sed command
+USER root
+RUN chown -R 1000:1000 /odtp
+# Switch back to the "user" user
+USER 1000
 # Fix for end of the line issue on Windows. Avoid error when building on windows
 RUN find /odtp -type f -iname "*.sh" -exec sed -i 's/\r$//' {} \;
+EXPOSE 7860
 ENTRYPOINT ["bash", "/odtp/odtp-component-client/startup.sh"]

LICENSE CHANGED Viewed

@@ -1,15 +1,202 @@
-BSD 3-Clause "New" or "Revised" License
-Licence ID
-BSD-3-Clause
-Licence text
-Copyright (c) 2023-2024 Swiss Data Science Center. All rights reserved.
-Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [2025] [SDSC]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,20 +1,28 @@
 # odtp-pyannote-whisper
-This component is still under development.
-Add here your badges:
-[![Launch in your ODTP](https://img.shields.io/badge/Launch%20in%20your-ODTP-blue?logo=launch)](http://localhost:8501/launch-component)
-[![Compatible with ODTP v0.5.x](https://img.shields.io/badge/Compatible%20with-ODTP%20v0.5.0-green)]("")
 > [!NOTE]
 > This repository makes use of submodules. Therefore, when cloning it you need to include them.
 >
 > `git clone --recurse-submodules https://github.com/sdsc-ordes/odtp-pyannote-whisper`
-This pipeline processes a `.wav` audio file by detecting the number of speakers present in the recording using `pyannote.audio`. For each detected speaker segment, it employs `OpenAI's Whisper model` to transcribe or translate the speech individually. This approach ensures accurate and speaker-specific transcriptions or translations, providing a clear understanding of who said what throughout the audio.
 Note: This application utilizes `pyannote.audio` and OpenAI's Whisper model. You must accept the terms of use on Hugging Face for the `pyannote/segmentation` and `pyannote/speaker-diarization` models before using this application.
 ## Table of Contents
 - [Tools Information](#tools-information)
@@ -42,12 +50,12 @@ Note: This application utilizes `pyannote.audio` and OpenAI's Whisper model. You
 ## How to add this component to your ODTP instance
-In order to add this component to your ODTP CLI, you can use. If you want to use the component directly, please refer to the docker section.
 ``` bash
 odtp new odtp-component-entry \
 --name odtp-pyannote-whisper \
---component-version v0.0.1 \
 --repository https://github.com/sdsc-ordes/odtp-pyannote-whisper
 ```
@@ -92,14 +100,32 @@ Build the dockerfile.
 docker build -t odtp-pyannote-whisper .
 ```
-Run the following command. Mount the correct volumes for input/output/logs folders.
 ``` bash
 docker run -it --rm \
 -v {PATH_TO_YOUR_INPUT_VOLUME}:/odtp/odtp-input \
 -v {PATH_TO_YOUR_OUTPUT_VOLUME}:/odtp/odtp-output \
 -v {PATH_TO_YOUR_LOGS_VOLUME}:/odtp/odtp-logs \
---env-file .env odtp-pyannote-whisper
 ```
 ### Development Mode
@@ -128,24 +154,42 @@ docker run -it --rm \
 --env-file .env odtp-pyannote-whisper
 ```
 ### Running in API Mode
-To run the component in API mode and expose a port, use the following command:
 ``` bash
 docker run -it --rm \
--v {PATH_TO_YOUR_INPUT_VOLUME}:/odtp/odtp-input \
--v {PATH_TO_YOUR_OUTPUT_VOLUME}:/odtp/odtp-output \
--v {PATH_TO_YOUR_LOGS_VOLUME}:/odtp/odtp-logs \
--p {HOST_PORT}:7860 \
 --env-file .env \
---entrypoing python3 \
-odtp-pyannote-whisper \
-/odtp/odtp-app/gradio_app.py
 ```
-## Credits and references
-SDSC
 This component has been created using the `odtp-component-template` `v0.5.0`.

 # odtp-pyannote-whisper
+[![Compatible with ODTP v0.5.x](https://img.shields.io/badge/Compatible%20with-ODTP%20v0.5.0-green)]("") [![Open in Spaces](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md.svg)](https://huggingface.com/spaces/katospiegel/odtp-pyannote-whisper)
 > [!NOTE]
 > This repository makes use of submodules. Therefore, when cloning it you need to include them.
 >
 > `git clone --recurse-submodules https://github.com/sdsc-ordes/odtp-pyannote-whisper`
+This pipeline processes a `.wav` or `mp4` media file by detecting the number of speakers present in the recording using `pyannote.audio`. For each detected speaker segment, it employs `OpenAI's Whisper model` to transcribe or translate the speech individually. This approach ensures accurate and speaker-specific transcriptions or translations, providing a clear understanding of who said what throughout the audio.
 Note: This application utilizes `pyannote.audio` and OpenAI's Whisper model. You must accept the terms of use on Hugging Face for the `pyannote/segmentation` and `pyannote/speaker-diarization` models before using this application.
+- [Speaker-Diarization](https://huggingface.co/pyannote/speaker-diarization-3.1)
+- [Speaker-Segmentation](https://huggingface.co/pyannote/segmentation-3.0)
+After accepting these terms and conditions for those models. You can obtain you HuggingFace API Key to allow the access to these models:
+- [Hugging Face Access Keys](https://huggingface.co/settings/tokens)
+This token should be provided to the component via the `ENV` variables or by the corresponding text field in the web app interface ([Here](https://huggingface.com/spaces/katospiegel/odtp-pyannote-whisper)).
+![](assets/screenshot.png)
 ## Table of Contents
 - [Tools Information](#tools-information)
 ## How to add this component to your ODTP instance
+This component can be run directly with Docker, however it is designed to be run with [ODTP](https://odtp-org.github.io/odtp-manuals/). In order to add this component to your ODTP CLI, you can use. If you want to use the component directly, please refer to the docker section.
 ``` bash
 odtp new odtp-component-entry \
 --name odtp-pyannote-whisper \
+--component-version v0.1.0 \
 --repository https://github.com/sdsc-ordes/odtp-pyannote-whisper
 ```
 docker build -t odtp-pyannote-whisper .
 ```
+Then create `.env` file similar to `.env.dist` and fill the variables values. Like on this example:
+```
+MODEL=base
+HF_TOKEN=hf_xxxxxxxxxxx
+TASK=transcribe
+INPUT_FILE=HRC_20220328T0000.mp4
+OUTPUT_FILE=HRC_20220328T0000
+VERBOSE=TRUE
+```
+Then create 3 folders:
+- `odtp-input`, where your input data should be located.
+- `odtp-output`, where your output data will be stored.
+- `odtp-logs`, where the logs will be shared.
+After this, you can run the following command and the pipeline will execute.
 ``` bash
 docker run -it --rm \
 -v {PATH_TO_YOUR_INPUT_VOLUME}:/odtp/odtp-input \
 -v {PATH_TO_YOUR_OUTPUT_VOLUME}:/odtp/odtp-output \
 -v {PATH_TO_YOUR_LOGS_VOLUME}:/odtp/odtp-logs \
+--env-file .env \
+odtp-pyannote-whisper
 ```
 ### Development Mode
 --env-file .env odtp-pyannote-whisper
 ```
+On Windowss this is the command to execute.
+``` powershell
+docker run -it --rm `
+--gpus all `
+-v ${PWD}/odtp-input:/odtp/odtp-input `
+-v ${PWD}/odtp-output:/odtp/odtp-output `
+-v ${PWD}/odtp-logs:/odtp/odtp-logs `
+--env-file .env odtp-pyannote-whisper
+```
 ### Running in API Mode
+To run the component in API mode and expose a port, you need to use the following environment variables:
+```
+ODTP_API_MODE=TRUE
+ODTP_GRADIO_SHARE=FALSE #Only if you want to share the app via the gradio tunneling
+```
+After the configuration, you can run:
 ``` bash
 docker run -it --rm \
+-p 7860:7860 \
 --env-file .env \
+odtp-pyannote-whisper
 ```
+And access to the web interface on `localhost:7860` in your browser.
+![](assets/screenshot.png)
+## Credits and references
 This component has been created using the `odtp-component-template` `v0.5.0`.
+The development of this repository has been realized by SDSC.

app/add_annotation.py CHANGED Viewed

@@ -1,17 +1,3 @@
-# python3 addAnnotation.py /odtp/odtp-output/HRC_20160622T0000-transcription_original.json /odtp/odtp-input/HRC_20160622T0000-initial.json /odtp/odtp-output/HRC_20160622T0000.json --type audio_transcription --origin_channel original --id transcription_original
-# python3 addAnnotation.py /odtp/odtp-output/HRC_20160622T0000-translation_original_english.json /odtp/odtp-output/HRC_20160622T0000.json /odtp/odtp-output/HRC_20160622T0000.json --type audio_translation --origin_channel original --id translation_original_english
-# python3 addAnnotation.py /odtp/odtp-output/HRC_20220328T0000-transcription_original.json /odtp/odtp-input/HRC_20220328T0000-initial.json /odtp/odtp-output/HRC_20220328T0000.json --type audio_transcription --origin_channel original --id transcription_original
-# python3 addAnnotation.py /odtp/odtp-output/HRC_20220328T0000-translation_original_english.json /odtp/odtp-output/HRC_20220328T0000.json /odtp/odtp-output/HRC_20220328T0000.json --type audio_translation --origin_channel original --id translation_original_english
-# python3 addAnnotation.py /odtp/odtp-output/HRC_20220929T0000-transcription_original.json /odtp/odtp-input/HRC_20220929T0000-initial.json /odtp/odtp-output/HRC_20220929T0000.json --type audio_transcription --origin_channel original --id transcription_original
-# python3 addAnnotation.py /odtp/odtp-output/HRC_20220929T0000-translation_original_english.json /odtp/odtp-output/HRC_20220929T0000.json /odtp/odtp-output/HRC_20220929T0000.json --type audio_translation --origin_channel original --id translation_original_english
-# python3 add_annotation.py /odtp/odtp-output/HRC_20221010T1000-transcription_original.json /odtp/odtp-input/HRC_20221010T1000-initial.json /odtp/odtp-output/HRC_20221010T1000.json --type audio_transcription --origin_channel original --id transcription_original
-# python3 add_annotation.py /odtp/odtp-output/HRC_20221010T1000-translation_original_english.json /odtp/odtp-output/HRC_20221010T1000.json /odtp/odtp-output/HRC_20221010T1000.json --type audio_translation --origin_channel original --id translation_original_english
 import json
 import argparse
 from datetime import timedelta

 import json
 import argparse
 from datetime import timedelta

app/app.py CHANGED Viewed

@@ -22,14 +22,15 @@ import json
 from dataclasses import dataclass, asdict
 from jsonschema import validate, ValidationError
-import createpdf
-import paragraphsCreator
 from pydub import AudioSegment
 import yt_dlp
 from slugify import slugify
 import uuid
@@ -470,7 +471,7 @@ def clip_audio(audio_file_path, sample_rate, start, end, output_path):
     # Write the audio segment to the output path
     sf.write(output_path, waveform[start_sample:end_sample], sr, format='WAV')
-def convert_mpx_to_wav(file_path):
     if file_path.lower().endswith('.mp3'):
         # Load the MP3 file
         audio = AudioSegment.from_mp3(file_path)
@@ -482,12 +483,12 @@ def convert_mpx_to_wav(file_path):
         raise ValueError("Input file must be an MP3 or MP4 file")
     # Define the output path
-    wav_file_path = os.path.splitext(file_path)[0] + '.wav'
     # Export as WAV
-    audio.export(wav_file_path, format='wav')
-    return wav_file_path
 def download_youtube_video(url, filename, output_path='/tmp'):
@@ -502,6 +503,8 @@ def download_youtube_video(url, filename, output_path='/tmp'):
         }],
     }
     if not os.path.exists(output_path):
         os.makedirs(output_path)
@@ -511,10 +514,11 @@ def download_youtube_video(url, filename, output_path='/tmp'):
         print(output_file)
         base, ext = os.path.splitext(output_file)
-        new_file = base + '.wav'
         return new_file
 import subprocess
 def convert_video_to_wav(input_file, output_file):
     """
@@ -551,73 +555,31 @@ def convert_video_to_wav(input_file, output_file):
         print(f"Error during conversion: {e}")
-######################## Parallel
-# import multiprocessing
-# import tempfile
-# def process_segment(segment, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options):
-#     start, end, speaker = segment
-#     clip_path = f"/tmp/speaker_{speaker}_start_{start:.1f}_end_{end:.1f}.wav"
-#     clip_audio(file_path, sample_rate, start, end, clip_path)
-#     result = asr_model.transcribe(start=start, end=end, options=whisper_options)
-#     language = result.get('language', args.language or 'unknown')
-#     if args.verbose:
-#         print(f"start={start:.1f}s stop={end:.1f}s lang={language} {speaker}")
-#     return {
-#         'result': result,
-#         'speaker': speaker,
-#         'start': start,
-#         'language': language
-#     }
-# def chunkify(lst, n):
-#     for i in range(0, len(lst), n):
-#         yield lst[i:i + n]
-# def process_chunk(chunk, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options):
-#     results = []
-#     for segment in chunk:
-#         result = process_segment(segment, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options)
-#         results.append(result)
-#     temp_file = tempfile.mktemp(suffix='.json')
-#     with open(temp_file, 'w') as f:
-#         json.dump(results, f)
-#     return temp_file
-########################
 def main(args):
     # TODO: Take out the file_path from ODTP here
-    if args.input_file.startswith('http://') or args.input_file.startswith('https://'):
-        file_path = download_youtube_video(args.input_file, filename=os.path.basename(args.output_file) , output_path=os.path.dirname(args.output_file))
         base_slug = slugify(file_path, separator='_')
-        #file_path = convert_mpx_to_wav(file_path)
     elif args.input_file.lower().endswith('.mp3'):
         file_path = convert_mpx_to_wav(args.input_file)
-        #file_path = "/odtp/odtp-input/" + file_path
     elif args.input_file.lower().endswith('.wav'):
         file_path = args.input_file
-        #file_path = "/odtp/odtp-input/" + file_path
     elif args.input_file.lower().endswith('.mp4'):
-        file_path = convert_mpx_to_wav(args.input_file)
-        #file_path = "/odtp/odtp-input/" + file_path
     elif args.input_file.lower().endswith('.rm'):
-        file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.rm', '.wav')
         convert_video_to_wav(args.input_file, file_path)
     elif args.input_file.lower().endswith('.f4v'):
-        file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.f4v', '.wav')
         convert_video_to_wav(args.input_file, file_path)
     elif args.input_file.lower().endswith('.mkv'):
-        file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.mkv', '.wav')
         convert_video_to_wav(args.input_file, file_path)
     else:
-        raise ValueError("Input file must be an MP3, WAV, RM, F4V, MKV, Youtube Link, or MP4 file")
     diarization, _, sample_rate = diarize_audio(args.hf_token, file_path)
@@ -691,39 +653,21 @@ def main(args):
         writer_json(generate_segments(result['segments'], speaker, language), args.output_json_file)
     writer_json.finalize()
-    # Parallel testing
-    # chunk_size = 2 #args.chunk_size  # Assume chunk_size is passed as an argument
-    # temp_files = []
-    # with multiprocessing.Pool() as pool:
-    #     chunks = list(chunkify(grouped_segments, chunk_size))
-    #     results = [pool.apply_async(process_chunk, (chunk, file_path, sample_rate, whisper_options, asr_model, args, writer, writer_options)) for chunk in chunks]
-    #     for result in results:
-    #         temp_file = result.get()
-    #         temp_files.append(temp_file)
-    # for temp_file in temp_files:
-    #     with open(temp_file, 'r') as f:
-    #         results = json.load(f)
-    #         for result in results:
-    #             writer(result['result'], args.output_file, result['speaker'], result['start'], writer_options)
-    #             writer_json(generate_segments(result['result']['segments'], result['speaker'], result['language']), args.output_json_file)
-    #     os.remove(temp_file)
     # If you want to validate JSON, paragraphs, PDF creation, etc.
-    paragraphsCreator.process_paragraphs(
         args.output_json_file,
         args.output_paragraphs_json_file,
         3
     )
-    createpdf.convert_json_to_pdf(
         args.output_paragraphs_json_file,
         args.output_md_file,
         args.output_pdf_file
     )
 if __name__ == '__main__':
     # Multiprocessing requires spawn when working with CUDA
     #multiprocessing.set_start_method('spawn')

 from dataclasses import dataclass, asdict
 from jsonschema import validate, ValidationError
+import create_pdf
+import paragraphs_creator
 from pydub import AudioSegment
 import yt_dlp
 from slugify import slugify
 import uuid
+import yaml
     # Write the audio segment to the output path
     sf.write(output_path, waveform[start_sample:end_sample], sr, format='WAV')
+def convert_mpx_to_wav(file_path, output_path):
     if file_path.lower().endswith('.mp3'):
         # Load the MP3 file
         audio = AudioSegment.from_mp3(file_path)
         raise ValueError("Input file must be an MP3 or MP4 file")
     # Define the output path
+    #wav_file_path = os.path.splitext(file_path)[0] + '.wav'
     # Export as WAV
+    audio.export(output_path, format='wav')
+    return output_path
 def download_youtube_video(url, filename, output_path='/tmp'):
         }],
     }
     if not os.path.exists(output_path):
         os.makedirs(output_path)
         print(output_file)
         base, ext = os.path.splitext(output_file)
+        new_file = base + '-original.wav'
         return new_file
 import subprocess
+import shutil
 def convert_video_to_wav(input_file, output_file):
     """
         print(f"Error during conversion: {e}")
 def main(args):
     # TODO: Take out the file_path from ODTP here
+    if args.input_file.startswith('/odtp/odtp-input/http://') or args.input_file.startswith('/odtp/odtp-input/https://'):
+        file_path = download_youtube_video(args.input_file.replace("/odtp/odtp-input/",""), filename=os.path.basename(args.output_file) , output_path=os.path.dirname(args.output_file))
         base_slug = slugify(file_path, separator='_')
     elif args.input_file.lower().endswith('.mp3'):
         file_path = convert_mpx_to_wav(args.input_file)
+        shutil.copy(file_path, os.path.join("/odtp/odtp-output", os.path.basename(file_path).replace('.wav', '-original.mp3')))
     elif args.input_file.lower().endswith('.wav'):
         file_path = args.input_file
+        shutil.copy(file_path, os.path.join("/odtp/odtp-output", os.path.basename(file_path).replace('.wav', '-original.wav')))
     elif args.input_file.lower().endswith('.mp4'):
+        file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.mp4', '-original.wav')
+        convert_mpx_to_wav(args.input_file, file_path)
     elif args.input_file.lower().endswith('.rm'):
+        file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.rm', '-original.wav')
         convert_video_to_wav(args.input_file, file_path)
     elif args.input_file.lower().endswith('.f4v'):
+        file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.f4v', '-original.wav')
         convert_video_to_wav(args.input_file, file_path)
     elif args.input_file.lower().endswith('.mkv'):
+        file_path = "/odtp/odtp-output/" + os.path.basename(args.input_file).replace('.mkv', '-original.wav')
         convert_video_to_wav(args.input_file, file_path)
     else:
+        raise ValueError(f"Input file must be an MP3, WAV, RM, F4V, MKV, Youtube Link, or MP4 file. Input file: {args.input_file}")
     diarization, _, sample_rate = diarize_audio(args.hf_token, file_path)
         writer_json(generate_segments(result['segments'], speaker, language), args.output_json_file)
     writer_json.finalize()
     # If you want to validate JSON, paragraphs, PDF creation, etc.
+    paragraphs_creator.process_paragraphs(
         args.output_json_file,
         args.output_paragraphs_json_file,
         3
     )
+    create_pdf.convert_json_to_pdf(
         args.output_paragraphs_json_file,
         args.output_md_file,
         args.output_pdf_file
     )
 if __name__ == '__main__':
     # Multiprocessing requires spawn when working with CUDA
     #multiprocessing.set_start_method('spawn')

app/app.sh CHANGED Viewed

@@ -1,30 +1,72 @@
 #!/bin/bash
-# if [ -n "$LANGUAGE" ]; then
-python3 /odtp/odtp-app/app.py \
---model $MODEL \
-$( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
---hf-token $HF_TOKEN \
---task $TASK \
-$( [ "$LANGUAGE" = "TRUE" ] && echo "--language" ) \
---input-file /odtp/odtp-input/$INPUT_FILE \
---output-file /odtp/odtp-output/$OUTPUT_FILE.srt \
---output-json-file /odtp/odtp-output/$OUTPUT_FILE.json \
---output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}_paragraphs.json \
---output-md-file /odtp/odtp-output/$OUTPUT_FILE.md \
---output-pdf-file /odtp/odtp-output/$OUTPUT_FILE.pdf \
-$( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
-# else
-#     python3 /odtp/odtp-app/app.py \
-#     --model $MODEL \
-#     $( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
-#     --hf-token $HF_TOKEN \
-#     --task $TASK \
-#     --input-file /odtp/odtp-input/$INPUT_FILE \
-#     --output-file /odtp/odtp-output/$OUTPUT_FILE.srt \
-#     --output-json-file /odtp/odtp-output/$OUTPUT_FILE.json \
-#     --output-paragraphs-json-file /odtp/odtp-output/$OUTPUT_FILE-paragraphs.json \
-#     --output-md-file /odtp/odtp-output/$OUTPUT_FILE.md \
-#     --output-pdf-file /odtp/odtp-output/$OUTPUT_FILE.pdf \
-#     $( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
-# fi

 #!/bin/bash
+if [ -n "$FULL_PIPELINE" ]; then
+    echo "RUNNING TRANSCRIPTION AND EN TRANSLATION PIPELINE"
+    python3 /odtp/odtp-app/app.py \
+    --model $MODEL \
+    $( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
+    --hf-token $HF_TOKEN \
+    --task transcribe \
+    --input-file /odtp/odtp-input/$INPUT_FILE \
+    --output-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original.srt \
+    --output-json-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original.json \
+    --output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}-transcription_original_paragraphs.json \
+    --output-md-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original_original.md \
+    --output-pdf-file /odtp/odtp-output/$OUTPUT_FILE-transcription_original_original.pdf \
+    $( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
+    python3 /odtp/odtp-app/app.py \
+    --model $MODEL \
+    $( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
+    --hf-token $HF_TOKEN \
+    --task translate \
+    --language en \
+    --input-file /odtp/odtp-input/$INPUT_FILE \
+    --output-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.srt \
+    --output-json-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.json \
+    --output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}-translation_original_english_paragraphs.json \
+    --output-md-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.md \
+    --output-pdf-file /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.pdf \
+    $( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
+    echo "Adding annotations"
+    python3 /odtp/odtp-app/add_annotation.py \
+    /odtp/odtp-output/$OUTPUT_FILE-transcription_original.json \
+    /odtp/odtp-input/$INPUT_METADATA_FILE \
+    /odtp/odtp-output/$OUTPUT_FILE.json \
+    --type audio_transcription \
+    --origin_channel original \
+    --id transcription_original
+    python3 /odtp/odtp-app/add_annotation.py \
+    /odtp/odtp-output/$OUTPUT_FILE-translation_original_english.json \
+    /odtp/odtp-output/$OUTPUT_FILE.json \
+    /odtp/odtp-output/$OUTPUT_FILE.json \
+    --type audio_translation \
+    --origin_channel original \
+    --id translation_original_english
+    echo "Generating yml file"
+    python3 /odtp/odtp-app/project_metadata_export.py /odtp/odtp-output/
+    echo "Uploading to S3"
+    #python3 /odtp/odtp-app/s3_upload.py
+    #TBD
+else
+    python3 /odtp/odtp-app/app.py \
+    --model $MODEL \
+    $( [ "$QUANTIZE" = "TRUE" ] && echo "--quantize" ) \
+    --hf-token $HF_TOKEN \
+    --task $TASK \
+    $( [ -n "$LANGUAGE" ] && echo "--language $LANGUAGE" ) \
+    --input-file /odtp/odtp-input/$INPUT_FILE \
+    --output-file /odtp/odtp-output/$OUTPUT_FILE.srt \
+    --output-json-file /odtp/odtp-output/$OUTPUT_FILE.json \
+    --output-paragraphs-json-file /odtp/odtp-output/${OUTPUT_FILE}_paragraphs.json \
+    --output-md-file /odtp/odtp-output/$OUTPUT_FILE.md \
+    --output-pdf-file /odtp/odtp-output/$OUTPUT_FILE.pdf \
+    $( [ "$VERBOSE" = "TRUE" ] && echo "--verbose" )
+fi

app/{createpdf.py → create_pdf.py} RENAMED Viewed

File without changes

app/{paragraphsCreator.py → paragraphs_creator.py} RENAMED Viewed

File without changes

app/project_metadata_export.py ADDED Viewed

	@@ -0,0 +1,122 @@

+#!/usr/bin/env python3
+import argparse
+import os
+import re
+import yaml
+def parse_basename_and_date(folder):
+    """
+    Searches the folder for a file matching the pattern 'HRC_YYYYMMDDT[HHMM]'.
+    Returns the base name and a formatted session date (e.g., "2016 06 22 00:00").
+    """
+    pattern = re.compile(r"^(HRC_\d{8}T\d{4})")
+    for filename in os.listdir(folder):
+        match = pattern.match(filename)
+        if match:
+            base_name = match.group(1)
+            # Extract date and time parts from the base name
+            dt_match = re.match(r"HRC_(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})", base_name)
+            if dt_match:
+                year, month, day, hour, minute = dt_match.groups()
+                session_date = f"{year} {month} {day} {hour}:{minute}"
+                return base_name, session_date
+    return None, None
+def check_video_file(folder, base_name):
+    """
+    Checks if an MP4 file with the given base name exists in the folder.
+    """
+    video_filename = f"{base_name}.mp4"
+    return video_filename in os.listdir(folder)
+def generate_metadata(base_name, session_date, include_video):
+    """
+    Builds a metadata dictionary containing file entries based on the base name,
+    session date, and whether a video file is present.
+    """
+    metadata = {
+        "files": [
+            {
+                "name": f"{base_name}.json",
+                "type": "json",
+                "description": f"JSON file containing metadata transcription and translation from the {session_date} session"
+            },
+            {
+                "name": f"{base_name}-files.yml",
+                "type": "yml",
+                "description": f"YAML file containing metadata of the files from the {session_date} session"
+            }
+        ]
+    }
+    if include_video:
+        metadata["files"].append({
+            "name": f"{base_name}.mp4",
+            "type": "mp4",
+            "description": f"MP4 video file from the {session_date} session"
+        })
+    metadata["files"].extend([
+        {
+            "name": f"{base_name}-original.wav",
+            "type": "wav",
+            "description": f"Original audio file from the {session_date} session"
+        },
+        {
+            "name": f"{base_name}-transcription_original.srt",
+            "type": "srt",
+            "description": f"Transcription file in SRT format from the original audio of the {session_date} session"
+        },
+        {
+            "name": f"{base_name}-transcription_original.pdf",
+            "type": "pdf",
+            "description": f"PDF file containing the transcription from the original audio of the {session_date} session"
+        },
+        {
+            "name": f"{base_name}-translation_original_english.srt",
+            "type": "srt",
+            "description": f"Translation file in SRT format to English from the original audio of the {session_date} session"
+        },
+        {
+            "name": f"{base_name}-translation_original_english.pdf",
+            "type": "pdf",
+            "description": f"PDF file containing the English translation from the original audio of the {session_date} session"
+        }
+    ])
+    return metadata
+def write_yaml_file(metadata, output_file):
+    """
+    Writes the metadata dictionary to a YAML file.
+    """
+    with open(output_file, "w") as f:
+        yaml.dump(metadata, f, sort_keys=False, default_flow_style=True)
+    print(f"Metadata YAML file written to {output_file}")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate YAML metadata for session files in a folder."
+    )
+    parser.add_argument("folder", help="Path to the folder containing the session files.")
+    args = parser.parse_args()
+    folder = args.folder
+    if not os.path.isdir(folder):
+        print(f"Error: {folder} is not a valid directory.")
+        return
+    base_name, session_date = parse_basename_and_date(folder)
+    if not base_name:
+        print("Error: Could not find a file matching the expected pattern 'HRC_YYYYMMDDT[HHMM]' in the folder.")
+        return
+    include_video = check_video_file(folder, base_name)
+    metadata = generate_metadata(base_name, session_date, include_video)
+    # Output file is always in the same folder and named as <base_name>-files.yml
+    output_file = os.path.join(folder, f"{base_name}-files.yml")
+    write_yaml_file(metadata, output_file)
+if __name__ == "__main__":
+    main()

app/s3_upload.py ADDED Viewed

	@@ -0,0 +1,75 @@

+#!/usr/bin/env python3
+import os
+import re
+import argparse
+import boto3
+from botocore.exceptions import NoCredentialsError, ClientError
+def parse_basename(folder):
+    """
+    Scans the folder for a file matching the pattern 'HRC_YYYYMMDDT[HHMM]'
+    and returns the base name.
+    """
+    pattern = re.compile(r"^(HRC_\d{8}T\d{4})")
+    for filename in os.listdir(folder):
+        match = pattern.match(filename)
+        if match:
+            return match.group(1)
+    return None
+def upload_files_to_s3(folder, bucket, base_name, region):
+    """
+    Uploads all files in the folder that start with the base_name to the specified S3 bucket,
+    placing them under a folder (key prefix) named after the base_name.
+    """
+    s3_key = os.environ.get("S3_MEDIA_KEY")
+    s3_secret = os.environ.get("S3_MEDIA_SECRET")
+    s3_client = boto3.client('s3', aws_access_key_id=s3_key, aws_secret_access_key=s3_secret, region_name=region)
+    # Gather all files that start with the base name
+    files_to_upload = [f for f in os.listdir(folder) if f.startswith(base_name)]
+    if not files_to_upload:
+        print(f"No files starting with '{base_name}' found in {folder}")
+        return
+    for file in files_to_upload:
+        file_path = os.path.join(folder, file)
+        s3_key = f"{base_name}/{file}"  # Create a folder in S3 named after the base name
+        try:
+            s3_client.upload_file(file_path, bucket, s3_key)
+            print(f"Uploaded '{file}' to s3://{bucket}/{s3_key}")
+        except (NoCredentialsError, ClientError) as e:
+            print(f"Failed to upload '{file}': {e}")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Upload session files to an S3 bucket under a folder named by the base name."
+    )
+    parser.add_argument("folder", help="Path to the folder containing the session files")
+    args = parser.parse_args()
+    folder = args.folder
+    if not os.path.isdir(folder):
+        print(f"Error: '{folder}' is not a valid directory.")
+        return
+    # Retrieve environment variables for the bucket and datacenter (region)
+    bucket = os.environ.get("S3_MEDIA_BUCKET")
+    if not bucket:
+        print("Error: BUCKET_LINK environment variable not set.")
+        return
+    region = os.environ.get("S3_MEDIA_REGION", "us-east-1")
+    base_name = parse_basename(folder)
+    if not base_name:
+        print("Error: Could not find a file matching the expected pattern 'HRC_YYYYMMDDT[HHMM]' in the folder.")
+        return
+    upload_files_to_s3(folder, bucket, base_name, region)
+if __name__ == "__main__":
+    main()

assets/screenshot.png ADDED Viewed

odtp.yml CHANGED Viewed

@@ -3,8 +3,8 @@ schema-version: "v0.5.0"
 # Component Information
 component-name: odtp-pyannote-whisper
-component-version: "v0.0.1"
-component-license: AGPL 3.0
 component-type: ephemeral
 component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization
 component-authors:
@@ -123,6 +123,30 @@ data-outputs:
     description: Transcription/translation output in JSON format with speaker diarization
     naming-convention: null
 # Validation Schemas (Future Development)
 schema-input: null
 schema-output: null

 # Component Information
 component-name: odtp-pyannote-whisper
+component-version: "v0.1.0"
+component-license: Apache 2.0
 component-type: ephemeral
 component-description: Transcribe or translate audio files using Whisper and Pyannote for speaker diarization
 component-authors:
     description: Transcription/translation output in JSON format with speaker diarization
     naming-convention: null
+  - name: OUTPUT_AUDIO_FILE
+    type: .wav
+    path: /odtp/odtp-output
+    description: Audio in wav format
+    naming-convention: null
+  - name: OUTPUT_PARAGRAPHS_FILE
+    type: .json
+    path: /odtp/odtp-output
+    description: Markdown file with the paragraphs containing speaker diarization and transcription/translation
+    naming-convention: null
+  - name: OUTPUT_MD_FILE
+    type: .md
+    path: /odtp/odtp-output
+    description: Markdown file with the speaker diarization and transcription/translation
+    naming-convention: null
+  - name: OUTPUT_PDF_FILE
+    type: .pdf
+    path: /odtp/odtp-output
+    description: PDF file with the speaker diarization and transcription/translation
+    naming-convention: null
 # Validation Schemas (Future Development)
 schema-input: null
 schema-output: null

requirements.txt CHANGED Viewed

@@ -10,5 +10,7 @@ gradio==5.5.0
 numpy==1.24.4
 md2pdf==1.0.1
 transformers==4.48.0
-yt-dlp
-python-slugify

 numpy==1.24.4
 md2pdf==1.0.1
 transformers==4.48.0
+yt-dlp==2025.1.26
+python-slugify
+pyyaml
+boto3