Spaces:

MothersTongue
/

voice-matcher-api

Running on T4

App Files Files Community

arnabg95 commited on Jun 13

Commit

fe79a8f

•

1 Parent(s): 96c81e6

v2

Browse files

Files changed (43) hide show

Dockerfile +35 -0
README.md +5 -4
app/.vscode/PythonImportHelper-v2-Completion.json +144 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-310.pyc +0 -0
app/__pycache__/__init__.cpython-311.pyc +0 -0
app/__pycache__/main.cpython-310.pyc +0 -0
app/__pycache__/main.cpython-311.pyc +0 -0
app/__pycache__/matcher.cpython-310.pyc +0 -0
app/__pycache__/matcher.cpython-311.pyc +0 -0
app/__pycache__/mfcc.cpython-310.pyc +0 -0
app/__pycache__/mfcc.cpython-311.pyc +0 -0
app/__pycache__/passing.cpython-310.pyc +0 -0
app/__pycache__/string_processor.cpython-310.pyc +0 -0
app/__pycache__/transcriber.cpython-310.pyc +0 -0
app/__pycache__/transcriber.cpython-311.pyc +0 -0
app/main.py +64 -0
app/matcher.py +24 -0
app/mfcc.py +45 -0
app/passing.py +36 -0
app/routers/V1/__init__.py +0 -0
app/routers/V1/__pycache__/__init__.cpython-310.pyc +0 -0
app/routers/V1/__pycache__/__init__.cpython-311.pyc +0 -0
app/routers/V1/__pycache__/v1_routers.cpython-310.pyc +0 -0
app/routers/V1/__pycache__/v1_routers.cpython-311.pyc +0 -0
app/routers/V1/v1_routers.py +14 -0
app/routers/V1/voice/__init__.py +0 -0
app/routers/V1/voice/__pycache__/__init__.cpython-310.pyc +0 -0
app/routers/V1/voice/__pycache__/__init__.cpython-311.pyc +0 -0
app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc +0 -0
app/routers/V1/voice/__pycache__/voice_router.cpython-311.pyc +0 -0
app/routers/V1/voice/voice_router.py +79 -0
app/routers/__init__.py +0 -0
app/routers/__pycache__/__init__.cpython-310.pyc +0 -0
app/routers/__pycache__/__init__.cpython-311.pyc +0 -0
app/routers/__pycache__/routes.cpython-310.pyc +0 -0
app/routers/__pycache__/routes.cpython-311.pyc +0 -0
app/routers/routes.py +13 -0
app/static/main.css +0 -0
app/string_processor.py +22 -0
app/templates/index.html +323 -0
app/transcriber.py +39 -0
requirements.txt +115 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+# Use the official Python image
+FROM python:3.11
+# Set the working directory inside the container
+WORKDIR /code
+# Install ffmpeg and other dependencies
+RUN apt-get update && \
+    apt-get install -y ffmpeg && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+# Copy the requirements file and install dependencies
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Create the Hugging Face cache directory, set permissions, and create a non-root user
+RUN mkdir -p /.cache/huggingface/hub && \
+    chmod -R 777 /.cache/huggingface && \
+    useradd -m nonrootuser
+# Set ownership of the .cache directory to the non-root user
+RUN chown -R nonrootuser:nonrootuser /.cache/huggingface
+# Copy the rest of the application code
+COPY . .
+# Change ownership of the application code to the non-root user
+RUN chown -R nonrootuser:nonrootuser /code
+# Switch to the non-root user
+USER nonrootuser
+# Specify the command to run the application
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 ---
-title: Voice Matcher Api
-emoji: 🐠
-colorFrom: pink
-colorTo: red
 sdk: docker
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Mother Tongue
+emoji: 📊
+colorFrom: gray
+colorTo: green
 sdk: docker
 pinned: false
+license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app/.vscode/PythonImportHelper-v2-Completion.json ADDED Viewed

	@@ -0,0 +1,144 @@

+[
+    {
+        "label": "APIRouter",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "APIRouter",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "APIRouter",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "FastAPI",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "Request",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "v1_routers",
+        "importPath": "app.routers.V1",
+        "description": "app.routers.V1",
+        "isExtraImport": true,
+        "detail": "app.routers.V1",
+        "documentation": {}
+    },
+    {
+        "label": "HTMLResponse",
+        "importPath": "fastapi.responses",
+        "description": "fastapi.responses",
+        "isExtraImport": true,
+        "detail": "fastapi.responses",
+        "documentation": {}
+    },
+    {
+        "label": "StaticFiles",
+        "importPath": "fastapi.staticfiles",
+        "description": "fastapi.staticfiles",
+        "isExtraImport": true,
+        "detail": "fastapi.staticfiles",
+        "documentation": {}
+    },
+    {
+        "label": "Jinja2Templates",
+        "importPath": "fastapi.templating",
+        "description": "fastapi.templating",
+        "isExtraImport": true,
+        "detail": "fastapi.templating",
+        "documentation": {}
+    },
+    {
+        "label": "CORSMiddleware",
+        "importPath": "fastapi.middleware.cors",
+        "description": "fastapi.middleware.cors",
+        "isExtraImport": true,
+        "detail": "fastapi.middleware.cors",
+        "documentation": {}
+    },
+    {
+        "label": "routes",
+        "importPath": "app.routers",
+        "description": "app.routers",
+        "isExtraImport": true,
+        "detail": "app.routers",
+        "documentation": {}
+    },
+    {
+        "label": "router",
+        "kind": 5,
+        "importPath": "routers.V1.voice.voice_router",
+        "description": "routers.V1.voice.voice_router",
+        "peekOfCode": "router = APIRouter(prefix=\"/voice\", tags=[\"Voice\"])\n@router.post(\"/transcribe\")\nasync def transcribe_audio():\n    return",
+        "detail": "routers.V1.voice.voice_router",
+        "documentation": {}
+    },
+    {
+        "label": "router",
+        "kind": 5,
+        "importPath": "routers.V1.v1_routers",
+        "description": "routers.V1.v1_routers",
+        "peekOfCode": "router = APIRouter()\n\"\"\" include auth routes \"\"\"\nrouter.include_router()",
+        "detail": "routers.V1.v1_routers",
+        "documentation": {}
+    },
+    {
+        "label": "router",
+        "kind": 5,
+        "importPath": "routers.routes",
+        "description": "routers.routes",
+        "peekOfCode": "router = APIRouter()\n\"\"\" include the v1 routes here \"\"\"\nrouter.include_router(v1_routers.router)",
+        "detail": "routers.routes",
+        "documentation": {}
+    },
+    {
+        "label": "app",
+        "kind": 5,
+        "importPath": "main",
+        "description": "main",
+        "peekOfCode": "app = FastAPI(\n    title=\"Mother Tongue Voice Matcher\",\n    version=\"0.0.5\",\n    servers=[{\n        \"url\": \"http://127.0.0.1:8000/api/v1\", \"description\": \"Local Server\"\n    }],\n    root_path=\"/api/v1\",\n    root_path_in_servers=False,\n)\n# cors policy",
+        "detail": "main",
+        "documentation": {}
+    },
+    {
+        "label": "origins",
+        "kind": 5,
+        "importPath": "main",
+        "description": "main",
+        "peekOfCode": "origins = [\n    \"http://localhost\",\n    \"http://localhost:8080\",\n    \"http://localhost:3000\",\n    \"http://localhost:5173\",\n    \"http://127.0.0.1\",\n    \"http://127.0.0.1:8080\",\n    \"http://127.0.0.1:3000\",\n    \"http://127.0.0.1:5173\",\n]",
+        "detail": "main",
+        "documentation": {}
+    },
+    {
+        "label": "templates",
+        "kind": 5,
+        "importPath": "main",
+        "description": "main",
+        "peekOfCode": "templates = Jinja2Templates(directory=\"app/templates\")\n@app.get(\"/\", response_class=HTMLResponse, include_in_schema=False)\nasync def root(request: Request):\n    \"\"\"set the root to show a html welcome page\"\"\"\n    return templates.TemplateResponse(request=request, name=\"index.html\")\n# include all the other api endpoints\napp.include_router(routes.router)",
+        "detail": "main",
+        "documentation": {}
+    }
+]

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (152 Bytes). View file

app/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (141 Bytes). View file

app/__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (1.61 kB). View file

app/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (2.07 kB). View file

app/__pycache__/matcher.cpython-310.pyc ADDED Viewed

Binary file (829 Bytes). View file

app/__pycache__/matcher.cpython-311.pyc ADDED Viewed

Binary file (1.92 kB). View file

app/__pycache__/mfcc.cpython-310.pyc ADDED Viewed

Binary file (1.67 kB). View file

app/__pycache__/mfcc.cpython-311.pyc ADDED Viewed

Binary file (2.67 kB). View file

app/__pycache__/passing.cpython-310.pyc ADDED Viewed

Binary file (963 Bytes). View file

app/__pycache__/string_processor.cpython-310.pyc ADDED Viewed

Binary file (657 Bytes). View file

app/__pycache__/transcriber.cpython-310.pyc ADDED Viewed

Binary file (1.17 kB). View file

app/__pycache__/transcriber.cpython-311.pyc ADDED Viewed

Binary file (1.72 kB). View file

app/main.py ADDED Viewed

	@@ -0,0 +1,64 @@

+""" main api file """
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from app.routers import routes
+""" initialize app with openapi configurations """
+app = FastAPI(
+    title="Mother Tongue Voice Matcher",
+    version="0.0.5",
+    servers=[
+        {
+            "url": "http://127.0.0.1:8000/api/v1",
+            "description": "Local Server",
+        },
+        {
+            "url": "https://r3vibe-mother-tongue.hf.space/api/v1",
+            "description": "Huggingface Server",
+        }
+    ],
+    root_path="/api/v1",
+    root_path_in_servers=False,
+)
+# cors policy
+origins = [
+    "http://localhost",
+    "http://localhost:8080",
+    "http://localhost:3000",
+    "http://localhost:5173",
+    "http://127.0.0.1",
+    "http://127.0.0.1:8080",
+    "http://127.0.0.1:3000",
+    "http://127.0.0.1:5173",
+    "https://r3vibe-mother-tongue.hf.space",
+]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# mount the static folder
+app.mount("/static", StaticFiles(directory="app/static"), name="static")
+# mount the templets folder
+templates = Jinja2Templates(directory="app/templates")
+@app.get("/", response_class=HTMLResponse, include_in_schema=False)
+async def root(request: Request):
+    """set the root to show a html welcome page"""
+    return templates.TemplateResponse(request=request, name="index.html")
+# include all the other api endpoints
+app.include_router(routes.router)

app/matcher.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import difflib
+from fuzzywuzzy import fuzz
+# Custom phonetic matching function
+def phonetic_match(word1, word2):
+    """
+    Compares two words based on their phonetic similarity.
+    """
+    return fuzz.ratio(word1, word2) / 100
+# Custom sequence matching function
+def sequence_match(a, b):
+    """
+    Uses sequence matching to compare two sequences of words.
+    """
+    return difflib.SequenceMatcher(None, a, b).ratio()
+def match(original, transcription):
+    sequence = sequence_match(original, transcription)
+    phonetic = phonetic_match(original, transcription)
+    return sequence, phonetic

app/mfcc.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import librosa
+from transformers import AutoFeatureExtractor, Wav2Vec2BertModel
+import soundfile as sf
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+# Model and feature extractor (same as before)
+model_id = "facebook/w2v-bert-2.0"
+feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
+model = Wav2Vec2BertModel.from_pretrained(model_id)
+def load_and_resample_audio(file_path, target_sample_rate=16000):
+    audio_input, sample_rate = sf.read(file_path)
+    if sample_rate != target_sample_rate:
+        audio_input = librosa.resample(
+            audio_input, orig_sr=sample_rate, target_sr=target_sample_rate
+        )
+    return audio_input, target_sample_rate
+def calculate_mfcc(audio_data, sample_rate):
+    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
+    mfccs_scaled = np.mean(mfccs.T, axis=0)  # Average across time dimension
+    return mfccs_scaled
+def calculate_similarity(mfccs1, mfccs2):
+    similarity = cosine_similarity(mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))
+    return similarity[0][0]
+def mfcc_similarty_check(original: str, recorded: str):
+    correct_pronunciation_audio, _ = load_and_resample_audio(original)
+    user_pronunciation_audio, sample_rate = load_and_resample_audio(recorded)
+    # Extract MFCCs from audio data
+    correct_mfccs = calculate_mfcc(correct_pronunciation_audio.flatten(), sample_rate)
+    user_mfccs = calculate_mfcc(user_pronunciation_audio.flatten(), sample_rate)
+    distance = np.linalg.norm(correct_mfccs.flatten() - user_mfccs.flatten())
+    # Calculate cosine similarity using MFCCs
+    similarity_score = calculate_similarity(correct_mfccs, user_mfccs)
+    accuracy_percentage = similarity_score * 100
+    return distance, accuracy_percentage

app/passing.py ADDED Viewed

	@@ -0,0 +1,36 @@

+def normalize_euclidean(euclidean, max_value):
+    """
+    Normalize the Euclidean distance to a 0-100 scale, where 0 is the maximum distance
+    and 100 is the minimum distance.
+    """
+    return max(0, 100 - (euclidean / max_value) * 100)
+def calculate_passing(sequence, phonetic, cosine, euclidean, passing_threshold=70, euclidean_max=200):
+    # Normalize sequence and phonetic to 0-100 scale
+    sequence_normalized = sequence * 100
+    phonetic_normalized = phonetic * 100
+    # Normalize Euclidean distance to a similarity measure (0-100 scale)
+    euclidean_similarity = normalize_euclidean(euclidean, euclidean_max)
+    # Calculate the weighted average
+    weights = {
+        'sequence': 0.35,
+        'phonetic': 0.35,
+        'cosine': 0.10,
+        'euclidean': 0.20
+    }
+    weighted_score = (
+        sequence_normalized * weights['sequence'] +
+        phonetic_normalized * weights['phonetic'] +
+        cosine * weights['cosine'] +
+        euclidean_similarity * weights['euclidean']
+    )
+    # Check if the weighted score meets or exceeds the passing threshold
+    is_passing = weighted_score >= passing_threshold
+    return weighted_score, is_passing

app/routers/V1/__init__.py ADDED Viewed

File without changes

app/routers/V1/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (163 Bytes). View file

app/routers/V1/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (171 Bytes). View file

app/routers/V1/__pycache__/v1_routers.cpython-310.pyc ADDED Viewed

Binary file (411 Bytes). View file

app/routers/V1/__pycache__/v1_routers.cpython-311.pyc ADDED Viewed

Binary file (526 Bytes). View file

app/routers/V1/v1_routers.py ADDED Viewed

	@@ -0,0 +1,14 @@

+"""
+v1 routes file
+all the v1 routes like auth
+profile... will be included here
+"""
+from fastapi import APIRouter
+from app.routers.V1.voice import voice_router
+""" initialize the router """
+router = APIRouter()
+""" include auth routes """
+router.include_router(voice_router.router)

app/routers/V1/voice/__init__.py ADDED Viewed

File without changes

app/routers/V1/voice/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (169 Bytes). View file

app/routers/V1/voice/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (158 Bytes). View file

app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc ADDED Viewed

Binary file (2.15 kB). View file

app/routers/V1/voice/__pycache__/voice_router.cpython-311.pyc ADDED Viewed

Binary file (3.17 kB). View file

app/routers/V1/voice/voice_router.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from fastapi import APIRouter, UploadFile, File, Body, HTTPException, status
+from fastapi.responses import JSONResponse
+from typing import Annotated
+import time
+import os
+from app.transcriber import get_transcription
+from app.matcher import match
+from app.mfcc import mfcc_similarty_check
+from app.string_processor import clean_transcription
+from app.passing import calculate_passing
+import requests
+""" initialize the router """
+router = APIRouter(prefix="/voice", tags=["Voice"])
+@router.post("/transcribe")
+async def transcribe_audio(
+    original_url: Annotated[str, Body()],
+    recorded: Annotated[UploadFile, File()],
+    matcher_text: Annotated[str, Body()],
+):
+    try:
+        # Validate URL
+        if not original_url.endswith(".wav"):
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Invalid URL. Please provide a URL pointing to a wav file.",
+            )
+        # Download the audio file from the URL
+        response = requests.get(original_url)
+        if response.status_code != 200:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Unable to download the audio file from the URL.",
+            )
+        filename_original = f"audio_{int(time.time())}_original.wav"
+        # Save the downloaded file temporarily
+        with open(filename_original, "wb") as buffer:
+            buffer.write(response.content)
+        # Read file bytes
+        recorded_bytes = await recorded.read()
+        filename_recorded = f"audio_{int(time.time())}_recorded.wav"
+        # Save the file temporarily
+        with open(filename_recorded, "wb") as buffer:
+            buffer.write(recorded_bytes)
+        try:
+            text = get_transcription(filename_recorded)
+            text = clean_transcription(text)
+            sequence, phonetic = match(matcher_text, text)
+            Euclidean, Cosine = mfcc_similarty_check(
+                filename_original, filename_recorded
+            )
+            weighted_score, is_passing = calculate_passing(sequence, phonetic, Cosine, Euclidean)
+            return JSONResponse(
+                {
+                    "transcription": text,
+                    "score": round(weighted_score),
+                    "passing": str(is_passing)
+                }
+            )
+        finally:
+            # Clean up the temporary file
+            os.remove(filename_original)
+            os.remove(filename_recorded)
+    except Exception as e:
+        print(e)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Unable to process the audio. Please try again later.",
+        )

app/routers/__init__.py ADDED Viewed

File without changes

app/routers/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (160 Bytes). View file

app/routers/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (168 Bytes). View file

app/routers/__pycache__/routes.cpython-310.pyc ADDED Viewed

Binary file (378 Bytes). View file

app/routers/__pycache__/routes.cpython-311.pyc ADDED Viewed

Binary file (512 Bytes). View file

app/routers/routes.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""
+main routes file
+all the v1 and v2... routes will go here
+"""
+from fastapi import APIRouter
+from app.routers.V1 import v1_routers
+""" initialize the router """
+router = APIRouter()
+""" include the v1 routes here """
+router.include_router(v1_routers.router)

app/static/main.css ADDED Viewed

File without changes

app/string_processor.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import unicodedata
+import re
+def clean_transcription(text):
+    # Normalize the text to NFKD form
+    normalized_text = unicodedata.normalize('NFKD', text)
+    # Remove diacritics
+    cleaned_text = ''.join([c for c in normalized_text if not unicodedata.combining(c)])
+    # Explicitly remove the leading ʻ character and any other specific characters
+    cleaned_text = cleaned_text.replace('ʻ', '')
+    # Remove any remaining special characters (if any)
+    cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text)
+    # Ensure the text is stripped of any unwanted leading or trailing whitespace
+    cleaned_text = cleaned_text.strip()
+    return cleaned_text

app/templates/index.html ADDED Viewed

	@@ -0,0 +1,323 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Server Status</title>
+    <style>
+      @import url("https://fonts.googleapis.com/css2?family=DM+Mono:ital,wght@0,300;1,500&display=swap");
+      body {
+        background: linear-gradient(#3800e7, #8a15ff);
+        height: 100vh;
+        font-size: calc(14px + (26 - 14) * ((100vw - 300px) / (1600 - 300)));
+        font-family: "DM Mono", monospace;
+        font-weight: 300;
+        overflow: hidden;
+        color: white;
+        text-align: center;
+      }
+      h1 {
+        font-size: 3em;
+        margin-bottom: 0.2em;
+      }
+      h2 {
+        font-size: 2em;
+      }
+      .main {
+        height: 100vh;
+        display: flex;
+        flex-direction: column;
+        flex-wrap: wrap;
+        position: relative;
+        justify-content: center;
+        align-items: center;
+      }
+      .main:before,
+      .main:after {
+        content: "";
+        display: block;
+        position: absolute;
+        z-index: -3;
+      }
+      .main:before {
+        right: 0;
+        bottom: -19;
+        height: 30em;
+        width: 30em;
+        border-radius: 30em;
+        background: linear-gradient(#3800e7, #8a15ff);
+        align-self: flex-end;
+        animation: gradient-fade 8s ease-in-out 3s infinite alternate;
+      }
+      .main:after {
+        top: 0;
+        left: 30;
+        height: 10em;
+        width: 10em;
+        border-radius: 10em;
+        background: linear-gradient(#3800e7, #8a15ff);
+        animation: gradient-fade-alt 6s ease-in-out 3s infinite alternate;
+      }
+      .main__text-wrapper {
+        position: relative;
+        padding: 2em;
+      }
+      .main__text-wrapper:before,
+      .main__text-wrapper:after {
+        content: "";
+        display: block;
+        position: absolute;
+      }
+      .main__text-wrapper:before {
+        z-index: -1;
+        top: -3em;
+        right: -3em;
+        width: 13em;
+        height: 13em;
+        opacity: 0.7;
+        border-radius: 13em;
+        background: linear-gradient(#15e0ff, #8a15ff);
+        animation: rotation 7s linear infinite;
+      }
+      .main__text-wrapper:after {
+        z-index: -1;
+        bottom: -20em;
+        width: 20em;
+        height: 20em;
+        border-radius: 20em;
+        background: linear-gradient(#d000c5, #8a15ff);
+        animation: rotation 7s linear infinite;
+      }
+      .arrow {
+        z-index: 1000;
+        opacity: 0.5;
+        position: absolute;
+      }
+      .arrow--top {
+        top: 0;
+        left: -5em;
+      }
+      .arrow--bottom {
+        bottom: 0;
+        right: 3em;
+      }
+      .circle {
+        transform: translate(50%, -50%) rotate(0deg);
+        transform-origin: center;
+      }
+      .circle--ltblue {
+        height: 20em;
+        width: 20em;
+        border-radius: 20em;
+        background: linear-gradient(#15e0ff, #3800e7);
+      }
+      .backdrop {
+        position: absolute;
+        width: 100vw;
+        height: 100vh;
+        display: block;
+        background-color: pink;
+      }
+      .dotted-circle {
+        position: absolute;
+        top: 0;
+        right: 0;
+        opacity: 0.3;
+        animation: rotation 38s linear infinite;
+      }
+      .draw-in {
+        stroke-dasharray: 1000;
+        stroke-dashoffset: 10;
+        animation: draw 15s ease-in-out alternate infinite;
+      }
+      @keyframes draw {
+        from {
+          stroke-dashoffset: 1000;
+        }
+        to {
+          stroke-dashoffset: 0;
+        }
+      }
+      .item-to {
+        animation-duration: 10s;
+        animation-iteration-count: infinite;
+        transform-origin: bottom;
+      }
+      .bounce-1 {
+        animation-name: bounce-1;
+        animation-timing-function: ease;
+      }
+      .bounce-2 {
+        animation-name: bounce-2;
+        animation-timing-function: ease;
+      }
+      .bounce-3 {
+        animation-name: bounce-3;
+        animation-timing-function: ease;
+      }
+      @keyframes bounce-1 {
+        0% {
+          transform: translateY(0);
+        }
+        50% {
+          transform: translateY(50px);
+        }
+        100% {
+          transform: translateY(0);
+        }
+      }
+      @keyframes bounce-2 {
+        0% {
+          transform: translateY(0);
+        }
+        50% {
+          transform: translateY(-30px);
+        }
+        100% {
+          transform: translateY(0);
+        }
+      }
+      @keyframes bounce-3 {
+        0% {
+          transform: translateY(0);
+        }
+        50% {
+          transform: translateY(30px);
+        }
+        100% {
+          transform: translateY(0);
+        }
+      }
+      @keyframes rotation {
+        from {
+          transform: rotate(0deg);
+        }
+        to {
+          transform: rotate(360deg);
+        }
+      }
+      @keyframes gradient-fade {
+        from {
+          transform: translate(10%, -10%) rotate(0deg);
+        }
+        to {
+          transform: translate(50%, -50%) rotate(360deg);
+        }
+      }
+      @keyframes gradient-fade-alt {
+        from {
+          transform: translate(-20%, 20%) rotate(0deg);
+        }
+        to {
+          transform: translate(-60%, 60%) rotate(360deg);
+        }
+      }
+    </style>
+  </head>
+  <body>
+    <div class="arrow arrow--top">
+      <svg
+        xmlns="http://www.w3.org/2000/svg"
+        width="270.11"
+        height="649.9"
+        overflow="visible"
+      >
+        <style>
+          .geo-arrow {
+            fill: none;
+            stroke: #fff;
+            stroke-width: 2;
+            stroke-miterlimit: 10;
+          }
+        </style>
+        <g class="item-to bounce-1">
+          <path
+            class="geo-arrow draw-in"
+            d="M135.06 142.564L267.995 275.5 135.06 408.434 2.125 275.499z"
+          />
+        </g>
+        <circle
+          class="geo-arrow item-to bounce-2"
+          cx="194.65"
+          cy="69.54"
+          r="7.96"
+        />
+        <circle class="geo-arrow draw-in" cx="194.65" cy="39.5" r="7.96" />
+        <circle
+          class="geo-arrow item-to bounce-3"
+          cx="194.65"
+          cy="9.46"
+          r="7.96"
+        />
+        <g class="geo-arrow item-to bounce-2">
+          <path
+            class="st0 draw-in"
+            d="M181.21 619.5l13.27 27 13.27-27zM194.48 644.5v-552"
+          />
+        </g>
+      </svg>
+    </div>
+    <div class="arrow arrow--bottom">
+      <svg
+        xmlns="http://www.w3.org/2000/svg"
+        width="31.35"
+        height="649.9"
+        overflow="visible"
+      >
+        <style>
+          .geo-arrow {
+            fill: none;
+            stroke: #fff;
+            stroke-width: 2;
+            stroke-miterlimit: 10;
+          }
+        </style>
+        <g class="item-to bounce-1">
+          <circle
+            class="geo-arrow item-to bounce-3"
+            cx="15.5"
+            cy="580.36"
+            r="7.96"
+          />
+          <circle class="geo-arrow draw-in" cx="15.5" cy="610.4" r="7.96" />
+          <circle
+            class="geo-arrow item-to bounce-2"
+            cx="15.5"
+            cy="640.44"
+            r="7.96"
+          />
+          <g class="item-to bounce-2">
+            <path
+              class="geo-arrow draw-in"
+              d="M28.94 30.4l-13.26-27-13.27 27zM15.68 5.4v552"
+            />
+          </g>
+        </g>
+      </svg>
+    </div>
+    <div class="main">
+      <div class="main__text-wrapper">
+        <h1 class="main__title">Backend Server</h1>
+        <h2>Up and Running</h2>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          class="dotted-circle"
+          width="352"
+          height="352"
+          overflow="visible"
+        >
+          <circle
+            cx="176"
+            cy="176"
+            r="174"
+            fill="none"
+            stroke="#fff"
+            stroke-width="2"
+            stroke-miterlimit="10"
+            stroke-dasharray="12.921,11.9271"
+          />
+        </svg>
+      </div>
+    </div>
+  </body>
+</html>

app/transcriber.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+from datasets import load_dataset
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model_id = "openai/whisper-large-v3"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+)
+model.to(device)
+processor = AutoProcessor.from_pretrained(model_id)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    chunk_length_s=30,
+    batch_size=16,
+    return_timestamps=True,
+    torch_dtype=torch_dtype,
+    device=device,
+)
+dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
+sample = dataset[0]["audio"]
+def get_transcription(file: str):
+    result = pipe(file, generate_kwargs={"language": "shona"})
+    return result["text"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,115 @@

+accelerate==0.30.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.4.0
+async-timeout==4.0.3
+attrs==23.2.0
+audioread==3.0.1
+certifi==2024.6.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+datasets==2.19.1
+decorator==5.1.1
+dill==0.3.8
+dnspython==2.6.1
+email_validator==2.1.1
+exceptiongroup==1.2.1
+fastapi==0.111.0
+fastapi-cli==0.0.4
+filelock==3.14.0
+frozenlist==1.4.1
+fsspec==2024.3.1
+fuzzywuzzy==0.18.0
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.2
+idna==3.7
+intel-openmp==2021.4.0
+itsdangerous==2.2.0
+Jinja2==3.1.4
+joblib==1.4.2
+langdetect==1.0.9
+lazy_loader==0.4
+Levenshtein==0.25.1
+librosa==0.10.2.post1
+llvmlite==0.42.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+mkl==2021.4.0
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+multiprocess==0.70.16
+networkx==3.3
+numba==0.59.1
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.5.40
+nvidia-nvtx-cu12==12.1.105
+orjson==3.10.3
+packaging==24.0
+pandas==2.2.2
+platformdirs==4.2.2
+pooch==1.8.1
+psutil==5.9.8
+pyarrow==16.1.0
+pyarrow-hotfix==0.6
+pycparser==2.22
+pydantic==2.7.2
+pydantic-extra-types==2.7.0
+pydantic-settings==2.2.1
+pydantic_core==2.18.3
+Pygments==2.18.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-Levenshtein==0.25.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+rapidfuzz==3.9.3
+regex==2024.5.15
+requests==2.32.3
+rich==13.7.1
+safetensors==0.4.3
+scikit-learn==1.5.0
+scipy==1.13.1
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+soundfile==0.12.1
+soxr==0.3.7
+starlette==0.37.2
+sympy==1.12.1
+tbb==2021.12.0
+threadpoolctl==3.5.0
+tokenizers==0.19.1
+torch==2.3.0
+tqdm==4.66.4
+transformers==4.41.2
+triton==2.3.0
+typer==0.12.3
+typing_extensions==4.12.1
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.1
+uvicorn==0.30.0
+uvloop==0.19.0
+watchfiles==0.22.0
+websockets==12.0
+xxhash==3.4.1
+yarl==1.9.4