arnabg95 commited on
Commit
fe79a8f
·
1 Parent(s): 96c81e6
Files changed (43) hide show
  1. Dockerfile +35 -0
  2. README.md +5 -4
  3. app/.vscode/PythonImportHelper-v2-Completion.json +144 -0
  4. app/__init__.py +0 -0
  5. app/__pycache__/__init__.cpython-310.pyc +0 -0
  6. app/__pycache__/__init__.cpython-311.pyc +0 -0
  7. app/__pycache__/main.cpython-310.pyc +0 -0
  8. app/__pycache__/main.cpython-311.pyc +0 -0
  9. app/__pycache__/matcher.cpython-310.pyc +0 -0
  10. app/__pycache__/matcher.cpython-311.pyc +0 -0
  11. app/__pycache__/mfcc.cpython-310.pyc +0 -0
  12. app/__pycache__/mfcc.cpython-311.pyc +0 -0
  13. app/__pycache__/passing.cpython-310.pyc +0 -0
  14. app/__pycache__/string_processor.cpython-310.pyc +0 -0
  15. app/__pycache__/transcriber.cpython-310.pyc +0 -0
  16. app/__pycache__/transcriber.cpython-311.pyc +0 -0
  17. app/main.py +64 -0
  18. app/matcher.py +24 -0
  19. app/mfcc.py +45 -0
  20. app/passing.py +36 -0
  21. app/routers/V1/__init__.py +0 -0
  22. app/routers/V1/__pycache__/__init__.cpython-310.pyc +0 -0
  23. app/routers/V1/__pycache__/__init__.cpython-311.pyc +0 -0
  24. app/routers/V1/__pycache__/v1_routers.cpython-310.pyc +0 -0
  25. app/routers/V1/__pycache__/v1_routers.cpython-311.pyc +0 -0
  26. app/routers/V1/v1_routers.py +14 -0
  27. app/routers/V1/voice/__init__.py +0 -0
  28. app/routers/V1/voice/__pycache__/__init__.cpython-310.pyc +0 -0
  29. app/routers/V1/voice/__pycache__/__init__.cpython-311.pyc +0 -0
  30. app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc +0 -0
  31. app/routers/V1/voice/__pycache__/voice_router.cpython-311.pyc +0 -0
  32. app/routers/V1/voice/voice_router.py +79 -0
  33. app/routers/__init__.py +0 -0
  34. app/routers/__pycache__/__init__.cpython-310.pyc +0 -0
  35. app/routers/__pycache__/__init__.cpython-311.pyc +0 -0
  36. app/routers/__pycache__/routes.cpython-310.pyc +0 -0
  37. app/routers/__pycache__/routes.cpython-311.pyc +0 -0
  38. app/routers/routes.py +13 -0
  39. app/static/main.css +0 -0
  40. app/string_processor.py +22 -0
  41. app/templates/index.html +323 -0
  42. app/transcriber.py +39 -0
  43. requirements.txt +115 -0
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image
2
+ FROM python:3.11
3
+
4
+ # Set the working directory inside the container
5
+ WORKDIR /code
6
+
7
+ # Install ffmpeg and other dependencies
8
+ RUN apt-get update && \
9
+ apt-get install -y ffmpeg && \
10
+ apt-get clean && \
11
+ rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy the requirements file and install dependencies
14
+ COPY ./requirements.txt /code/requirements.txt
15
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
16
+
17
+ # Create the Hugging Face cache directory, set permissions, and create a non-root user
18
+ RUN mkdir -p /.cache/huggingface/hub && \
19
+ chmod -R 777 /.cache/huggingface && \
20
+ useradd -m nonrootuser
21
+
22
+ # Set ownership of the .cache directory to the non-root user
23
+ RUN chown -R nonrootuser:nonrootuser /.cache/huggingface
24
+
25
+ # Copy the rest of the application code
26
+ COPY . .
27
+
28
+ # Change ownership of the application code to the non-root user
29
+ RUN chown -R nonrootuser:nonrootuser /code
30
+
31
+ # Switch to the non-root user
32
+ USER nonrootuser
33
+
34
+ # Specify the command to run the application
35
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,11 @@
1
  ---
2
- title: Voice Matcher Api
3
- emoji: 🐠
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Mother Tongue
3
+ emoji: 📊
4
+ colorFrom: gray
5
+ colorTo: green
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app/.vscode/PythonImportHelper-v2-Completion.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "label": "APIRouter",
4
+ "importPath": "fastapi",
5
+ "description": "fastapi",
6
+ "isExtraImport": true,
7
+ "detail": "fastapi",
8
+ "documentation": {}
9
+ },
10
+ {
11
+ "label": "APIRouter",
12
+ "importPath": "fastapi",
13
+ "description": "fastapi",
14
+ "isExtraImport": true,
15
+ "detail": "fastapi",
16
+ "documentation": {}
17
+ },
18
+ {
19
+ "label": "APIRouter",
20
+ "importPath": "fastapi",
21
+ "description": "fastapi",
22
+ "isExtraImport": true,
23
+ "detail": "fastapi",
24
+ "documentation": {}
25
+ },
26
+ {
27
+ "label": "FastAPI",
28
+ "importPath": "fastapi",
29
+ "description": "fastapi",
30
+ "isExtraImport": true,
31
+ "detail": "fastapi",
32
+ "documentation": {}
33
+ },
34
+ {
35
+ "label": "Request",
36
+ "importPath": "fastapi",
37
+ "description": "fastapi",
38
+ "isExtraImport": true,
39
+ "detail": "fastapi",
40
+ "documentation": {}
41
+ },
42
+ {
43
+ "label": "v1_routers",
44
+ "importPath": "app.routers.V1",
45
+ "description": "app.routers.V1",
46
+ "isExtraImport": true,
47
+ "detail": "app.routers.V1",
48
+ "documentation": {}
49
+ },
50
+ {
51
+ "label": "HTMLResponse",
52
+ "importPath": "fastapi.responses",
53
+ "description": "fastapi.responses",
54
+ "isExtraImport": true,
55
+ "detail": "fastapi.responses",
56
+ "documentation": {}
57
+ },
58
+ {
59
+ "label": "StaticFiles",
60
+ "importPath": "fastapi.staticfiles",
61
+ "description": "fastapi.staticfiles",
62
+ "isExtraImport": true,
63
+ "detail": "fastapi.staticfiles",
64
+ "documentation": {}
65
+ },
66
+ {
67
+ "label": "Jinja2Templates",
68
+ "importPath": "fastapi.templating",
69
+ "description": "fastapi.templating",
70
+ "isExtraImport": true,
71
+ "detail": "fastapi.templating",
72
+ "documentation": {}
73
+ },
74
+ {
75
+ "label": "CORSMiddleware",
76
+ "importPath": "fastapi.middleware.cors",
77
+ "description": "fastapi.middleware.cors",
78
+ "isExtraImport": true,
79
+ "detail": "fastapi.middleware.cors",
80
+ "documentation": {}
81
+ },
82
+ {
83
+ "label": "routes",
84
+ "importPath": "app.routers",
85
+ "description": "app.routers",
86
+ "isExtraImport": true,
87
+ "detail": "app.routers",
88
+ "documentation": {}
89
+ },
90
+ {
91
+ "label": "router",
92
+ "kind": 5,
93
+ "importPath": "routers.V1.voice.voice_router",
94
+ "description": "routers.V1.voice.voice_router",
95
+ "peekOfCode": "router = APIRouter(prefix=\"/voice\", tags=[\"Voice\"])\n@router.post(\"/transcribe\")\nasync def transcribe_audio():\n return",
96
+ "detail": "routers.V1.voice.voice_router",
97
+ "documentation": {}
98
+ },
99
+ {
100
+ "label": "router",
101
+ "kind": 5,
102
+ "importPath": "routers.V1.v1_routers",
103
+ "description": "routers.V1.v1_routers",
104
+ "peekOfCode": "router = APIRouter()\n\"\"\" include auth routes \"\"\"\nrouter.include_router()",
105
+ "detail": "routers.V1.v1_routers",
106
+ "documentation": {}
107
+ },
108
+ {
109
+ "label": "router",
110
+ "kind": 5,
111
+ "importPath": "routers.routes",
112
+ "description": "routers.routes",
113
+ "peekOfCode": "router = APIRouter()\n\"\"\" include the v1 routes here \"\"\"\nrouter.include_router(v1_routers.router)",
114
+ "detail": "routers.routes",
115
+ "documentation": {}
116
+ },
117
+ {
118
+ "label": "app",
119
+ "kind": 5,
120
+ "importPath": "main",
121
+ "description": "main",
122
+ "peekOfCode": "app = FastAPI(\n title=\"Mother Tongue Voice Matcher\",\n version=\"0.0.5\",\n servers=[{\n \"url\": \"http://127.0.0.1:8000/api/v1\", \"description\": \"Local Server\"\n }],\n root_path=\"/api/v1\",\n root_path_in_servers=False,\n)\n# cors policy",
123
+ "detail": "main",
124
+ "documentation": {}
125
+ },
126
+ {
127
+ "label": "origins",
128
+ "kind": 5,
129
+ "importPath": "main",
130
+ "description": "main",
131
+ "peekOfCode": "origins = [\n \"http://localhost\",\n \"http://localhost:8080\",\n \"http://localhost:3000\",\n \"http://localhost:5173\",\n \"http://127.0.0.1\",\n \"http://127.0.0.1:8080\",\n \"http://127.0.0.1:3000\",\n \"http://127.0.0.1:5173\",\n]",
132
+ "detail": "main",
133
+ "documentation": {}
134
+ },
135
+ {
136
+ "label": "templates",
137
+ "kind": 5,
138
+ "importPath": "main",
139
+ "description": "main",
140
+ "peekOfCode": "templates = Jinja2Templates(directory=\"app/templates\")\n@app.get(\"/\", response_class=HTMLResponse, include_in_schema=False)\nasync def root(request: Request):\n \"\"\"set the root to show a html welcome page\"\"\"\n return templates.TemplateResponse(request=request, name=\"index.html\")\n# include all the other api endpoints\napp.include_router(routes.router)",
141
+ "detail": "main",
142
+ "documentation": {}
143
+ }
144
+ ]
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (152 Bytes). View file
 
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (141 Bytes). View file
 
app/__pycache__/main.cpython-310.pyc ADDED
Binary file (1.61 kB). View file
 
app/__pycache__/main.cpython-311.pyc ADDED
Binary file (2.07 kB). View file
 
app/__pycache__/matcher.cpython-310.pyc ADDED
Binary file (829 Bytes). View file
 
app/__pycache__/matcher.cpython-311.pyc ADDED
Binary file (1.92 kB). View file
 
app/__pycache__/mfcc.cpython-310.pyc ADDED
Binary file (1.67 kB). View file
 
app/__pycache__/mfcc.cpython-311.pyc ADDED
Binary file (2.67 kB). View file
 
app/__pycache__/passing.cpython-310.pyc ADDED
Binary file (963 Bytes). View file
 
app/__pycache__/string_processor.cpython-310.pyc ADDED
Binary file (657 Bytes). View file
 
app/__pycache__/transcriber.cpython-310.pyc ADDED
Binary file (1.17 kB). View file
 
app/__pycache__/transcriber.cpython-311.pyc ADDED
Binary file (1.72 kB). View file
 
app/main.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ main api file """
2
+
3
+ from fastapi.responses import HTMLResponse
4
+ from fastapi.staticfiles import StaticFiles
5
+ from fastapi.templating import Jinja2Templates
6
+ from fastapi import FastAPI, Request
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from app.routers import routes
9
+
10
+ """ initialize app with openapi configurations """
11
+ app = FastAPI(
12
+ title="Mother Tongue Voice Matcher",
13
+ version="0.0.5",
14
+ servers=[
15
+ {
16
+ "url": "http://127.0.0.1:8000/api/v1",
17
+ "description": "Local Server",
18
+ },
19
+ {
20
+ "url": "https://r3vibe-mother-tongue.hf.space/api/v1",
21
+ "description": "Huggingface Server",
22
+ }
23
+ ],
24
+ root_path="/api/v1",
25
+ root_path_in_servers=False,
26
+ )
27
+
28
+
29
+ # cors policy
30
+ origins = [
31
+ "http://localhost",
32
+ "http://localhost:8080",
33
+ "http://localhost:3000",
34
+ "http://localhost:5173",
35
+ "http://127.0.0.1",
36
+ "http://127.0.0.1:8080",
37
+ "http://127.0.0.1:3000",
38
+ "http://127.0.0.1:5173",
39
+ "https://r3vibe-mother-tongue.hf.space",
40
+ ]
41
+
42
+ app.add_middleware(
43
+ CORSMiddleware,
44
+ allow_origins=origins,
45
+ allow_credentials=True,
46
+ allow_methods=["*"],
47
+ allow_headers=["*"],
48
+ )
49
+
50
+ # mount the static folder
51
+ app.mount("/static", StaticFiles(directory="app/static"), name="static")
52
+
53
+ # mount the templets folder
54
+ templates = Jinja2Templates(directory="app/templates")
55
+
56
+
57
+ @app.get("/", response_class=HTMLResponse, include_in_schema=False)
58
+ async def root(request: Request):
59
+ """set the root to show a html welcome page"""
60
+ return templates.TemplateResponse(request=request, name="index.html")
61
+
62
+
63
+ # include all the other api endpoints
64
+ app.include_router(routes.router)
app/matcher.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import difflib
2
+ from fuzzywuzzy import fuzz
3
+
4
+
5
+ # Custom phonetic matching function
6
+ def phonetic_match(word1, word2):
7
+ """
8
+ Compares two words based on their phonetic similarity.
9
+ """
10
+ return fuzz.ratio(word1, word2) / 100
11
+
12
+
13
+ # Custom sequence matching function
14
+ def sequence_match(a, b):
15
+ """
16
+ Uses sequence matching to compare two sequences of words.
17
+ """
18
+ return difflib.SequenceMatcher(None, a, b).ratio()
19
+
20
+
21
+ def match(original, transcription):
22
+ sequence = sequence_match(original, transcription)
23
+ phonetic = phonetic_match(original, transcription)
24
+ return sequence, phonetic
app/mfcc.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ from transformers import AutoFeatureExtractor, Wav2Vec2BertModel
3
+ import soundfile as sf
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import numpy as np
6
+
7
+
8
+ # Model and feature extractor (same as before)
9
+ model_id = "facebook/w2v-bert-2.0"
10
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
11
+ model = Wav2Vec2BertModel.from_pretrained(model_id)
12
+
13
+ def load_and_resample_audio(file_path, target_sample_rate=16000):
14
+ audio_input, sample_rate = sf.read(file_path)
15
+ if sample_rate != target_sample_rate:
16
+ audio_input = librosa.resample(
17
+ audio_input, orig_sr=sample_rate, target_sr=target_sample_rate
18
+ )
19
+ return audio_input, target_sample_rate
20
+
21
+ def calculate_mfcc(audio_data, sample_rate):
22
+ mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
23
+ mfccs_scaled = np.mean(mfccs.T, axis=0) # Average across time dimension
24
+ return mfccs_scaled
25
+
26
+ def calculate_similarity(mfccs1, mfccs2):
27
+ similarity = cosine_similarity(mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))
28
+ return similarity[0][0]
29
+
30
+ def mfcc_similarty_check(original: str, recorded: str):
31
+ correct_pronunciation_audio, _ = load_and_resample_audio(original)
32
+ user_pronunciation_audio, sample_rate = load_and_resample_audio(recorded)
33
+
34
+ # Extract MFCCs from audio data
35
+ correct_mfccs = calculate_mfcc(correct_pronunciation_audio.flatten(), sample_rate)
36
+ user_mfccs = calculate_mfcc(user_pronunciation_audio.flatten(), sample_rate)
37
+
38
+ distance = np.linalg.norm(correct_mfccs.flatten() - user_mfccs.flatten())
39
+
40
+
41
+ # Calculate cosine similarity using MFCCs
42
+ similarity_score = calculate_similarity(correct_mfccs, user_mfccs)
43
+ accuracy_percentage = similarity_score * 100
44
+
45
+ return distance, accuracy_percentage
app/passing.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def normalize_euclidean(euclidean, max_value):
2
+ """
3
+ Normalize the Euclidean distance to a 0-100 scale, where 0 is the maximum distance
4
+ and 100 is the minimum distance.
5
+ """
6
+ return max(0, 100 - (euclidean / max_value) * 100)
7
+
8
+ def calculate_passing(sequence, phonetic, cosine, euclidean, passing_threshold=70, euclidean_max=200):
9
+ # Normalize sequence and phonetic to 0-100 scale
10
+ sequence_normalized = sequence * 100
11
+ phonetic_normalized = phonetic * 100
12
+
13
+ # Normalize Euclidean distance to a similarity measure (0-100 scale)
14
+ euclidean_similarity = normalize_euclidean(euclidean, euclidean_max)
15
+
16
+
17
+
18
+ # Calculate the weighted average
19
+ weights = {
20
+ 'sequence': 0.35,
21
+ 'phonetic': 0.35,
22
+ 'cosine': 0.10,
23
+ 'euclidean': 0.20
24
+ }
25
+
26
+ weighted_score = (
27
+ sequence_normalized * weights['sequence'] +
28
+ phonetic_normalized * weights['phonetic'] +
29
+ cosine * weights['cosine'] +
30
+ euclidean_similarity * weights['euclidean']
31
+ )
32
+
33
+ # Check if the weighted score meets or exceeds the passing threshold
34
+ is_passing = weighted_score >= passing_threshold
35
+
36
+ return weighted_score, is_passing
app/routers/V1/__init__.py ADDED
File without changes
app/routers/V1/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (163 Bytes). View file
 
app/routers/V1/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (171 Bytes). View file
 
app/routers/V1/__pycache__/v1_routers.cpython-310.pyc ADDED
Binary file (411 Bytes). View file
 
app/routers/V1/__pycache__/v1_routers.cpython-311.pyc ADDED
Binary file (526 Bytes). View file
 
app/routers/V1/v1_routers.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ v1 routes file
3
+ all the v1 routes like auth
4
+ profile... will be included here
5
+ """
6
+
7
+ from fastapi import APIRouter
8
+ from app.routers.V1.voice import voice_router
9
+
10
+ """ initialize the router """
11
+ router = APIRouter()
12
+
13
+ """ include auth routes """
14
+ router.include_router(voice_router.router)
app/routers/V1/voice/__init__.py ADDED
File without changes
app/routers/V1/voice/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (169 Bytes). View file
 
app/routers/V1/voice/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (158 Bytes). View file
 
app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc ADDED
Binary file (2.15 kB). View file
 
app/routers/V1/voice/__pycache__/voice_router.cpython-311.pyc ADDED
Binary file (3.17 kB). View file
 
app/routers/V1/voice/voice_router.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, Body, HTTPException, status
2
+ from fastapi.responses import JSONResponse
3
+ from typing import Annotated
4
+ import time
5
+ import os
6
+ from app.transcriber import get_transcription
7
+ from app.matcher import match
8
+ from app.mfcc import mfcc_similarty_check
9
+ from app.string_processor import clean_transcription
10
+ from app.passing import calculate_passing
11
+ import requests
12
+
13
+
14
+ """ initialize the router """
15
+ router = APIRouter(prefix="/voice", tags=["Voice"])
16
+
17
+
18
+ @router.post("/transcribe")
19
+ async def transcribe_audio(
20
+ original_url: Annotated[str, Body()],
21
+ recorded: Annotated[UploadFile, File()],
22
+ matcher_text: Annotated[str, Body()],
23
+ ):
24
+ try:
25
+ # Validate URL
26
+ if not original_url.endswith(".wav"):
27
+ raise HTTPException(
28
+ status_code=status.HTTP_400_BAD_REQUEST,
29
+ detail="Invalid URL. Please provide a URL pointing to a wav file.",
30
+ )
31
+
32
+ # Download the audio file from the URL
33
+ response = requests.get(original_url)
34
+ if response.status_code != 200:
35
+ raise HTTPException(
36
+ status_code=status.HTTP_400_BAD_REQUEST,
37
+ detail="Unable to download the audio file from the URL.",
38
+ )
39
+
40
+ filename_original = f"audio_{int(time.time())}_original.wav"
41
+
42
+ # Save the downloaded file temporarily
43
+ with open(filename_original, "wb") as buffer:
44
+ buffer.write(response.content)
45
+
46
+ # Read file bytes
47
+ recorded_bytes = await recorded.read()
48
+ filename_recorded = f"audio_{int(time.time())}_recorded.wav"
49
+
50
+ # Save the file temporarily
51
+ with open(filename_recorded, "wb") as buffer:
52
+ buffer.write(recorded_bytes)
53
+
54
+ try:
55
+ text = get_transcription(filename_recorded)
56
+ text = clean_transcription(text)
57
+ sequence, phonetic = match(matcher_text, text)
58
+ Euclidean, Cosine = mfcc_similarty_check(
59
+ filename_original, filename_recorded
60
+ )
61
+ weighted_score, is_passing = calculate_passing(sequence, phonetic, Cosine, Euclidean)
62
+ return JSONResponse(
63
+ {
64
+ "transcription": text,
65
+ "score": round(weighted_score),
66
+ "passing": str(is_passing)
67
+ }
68
+ )
69
+ finally:
70
+ # Clean up the temporary file
71
+ os.remove(filename_original)
72
+ os.remove(filename_recorded)
73
+
74
+ except Exception as e:
75
+ print(e)
76
+ raise HTTPException(
77
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
78
+ detail="Unable to process the audio. Please try again later.",
79
+ )
app/routers/__init__.py ADDED
File without changes
app/routers/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (160 Bytes). View file
 
app/routers/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (168 Bytes). View file
 
app/routers/__pycache__/routes.cpython-310.pyc ADDED
Binary file (378 Bytes). View file
 
app/routers/__pycache__/routes.cpython-311.pyc ADDED
Binary file (512 Bytes). View file
 
app/routers/routes.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ main routes file
3
+ all the v1 and v2... routes will go here
4
+ """
5
+
6
+ from fastapi import APIRouter
7
+ from app.routers.V1 import v1_routers
8
+
9
+ """ initialize the router """
10
+ router = APIRouter()
11
+
12
+ """ include the v1 routes here """
13
+ router.include_router(v1_routers.router)
app/static/main.css ADDED
File without changes
app/string_processor.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unicodedata
2
+ import re
3
+
4
+
5
+ def clean_transcription(text):
6
+ # Normalize the text to NFKD form
7
+ normalized_text = unicodedata.normalize('NFKD', text)
8
+
9
+ # Remove diacritics
10
+ cleaned_text = ''.join([c for c in normalized_text if not unicodedata.combining(c)])
11
+
12
+ # Explicitly remove the leading ʻ character and any other specific characters
13
+ cleaned_text = cleaned_text.replace('ʻ', '')
14
+
15
+ # Remove any remaining special characters (if any)
16
+ cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text)
17
+
18
+ # Ensure the text is stripped of any unwanted leading or trailing whitespace
19
+ cleaned_text = cleaned_text.strip()
20
+
21
+ return cleaned_text
22
+
app/templates/index.html ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Server Status</title>
7
+ <style>
8
+ @import url("https://fonts.googleapis.com/css2?family=DM+Mono:ital,wght@0,300;1,500&display=swap");
9
+ body {
10
+ background: linear-gradient(#3800e7, #8a15ff);
11
+ height: 100vh;
12
+ font-size: calc(14px + (26 - 14) * ((100vw - 300px) / (1600 - 300)));
13
+ font-family: "DM Mono", monospace;
14
+ font-weight: 300;
15
+ overflow: hidden;
16
+ color: white;
17
+ text-align: center;
18
+ }
19
+ h1 {
20
+ font-size: 3em;
21
+ margin-bottom: 0.2em;
22
+ }
23
+ h2 {
24
+ font-size: 2em;
25
+ }
26
+ .main {
27
+ height: 100vh;
28
+ display: flex;
29
+ flex-direction: column;
30
+ flex-wrap: wrap;
31
+ position: relative;
32
+ justify-content: center;
33
+ align-items: center;
34
+ }
35
+ .main:before,
36
+ .main:after {
37
+ content: "";
38
+ display: block;
39
+ position: absolute;
40
+ z-index: -3;
41
+ }
42
+ .main:before {
43
+ right: 0;
44
+ bottom: -19;
45
+ height: 30em;
46
+ width: 30em;
47
+ border-radius: 30em;
48
+ background: linear-gradient(#3800e7, #8a15ff);
49
+ align-self: flex-end;
50
+ animation: gradient-fade 8s ease-in-out 3s infinite alternate;
51
+ }
52
+ .main:after {
53
+ top: 0;
54
+ left: 30;
55
+ height: 10em;
56
+ width: 10em;
57
+ border-radius: 10em;
58
+ background: linear-gradient(#3800e7, #8a15ff);
59
+ animation: gradient-fade-alt 6s ease-in-out 3s infinite alternate;
60
+ }
61
+ .main__text-wrapper {
62
+ position: relative;
63
+ padding: 2em;
64
+ }
65
+ .main__text-wrapper:before,
66
+ .main__text-wrapper:after {
67
+ content: "";
68
+ display: block;
69
+ position: absolute;
70
+ }
71
+ .main__text-wrapper:before {
72
+ z-index: -1;
73
+ top: -3em;
74
+ right: -3em;
75
+ width: 13em;
76
+ height: 13em;
77
+ opacity: 0.7;
78
+ border-radius: 13em;
79
+ background: linear-gradient(#15e0ff, #8a15ff);
80
+ animation: rotation 7s linear infinite;
81
+ }
82
+ .main__text-wrapper:after {
83
+ z-index: -1;
84
+ bottom: -20em;
85
+ width: 20em;
86
+ height: 20em;
87
+ border-radius: 20em;
88
+ background: linear-gradient(#d000c5, #8a15ff);
89
+ animation: rotation 7s linear infinite;
90
+ }
91
+ .arrow {
92
+ z-index: 1000;
93
+ opacity: 0.5;
94
+ position: absolute;
95
+ }
96
+ .arrow--top {
97
+ top: 0;
98
+ left: -5em;
99
+ }
100
+ .arrow--bottom {
101
+ bottom: 0;
102
+ right: 3em;
103
+ }
104
+ .circle {
105
+ transform: translate(50%, -50%) rotate(0deg);
106
+ transform-origin: center;
107
+ }
108
+ .circle--ltblue {
109
+ height: 20em;
110
+ width: 20em;
111
+ border-radius: 20em;
112
+ background: linear-gradient(#15e0ff, #3800e7);
113
+ }
114
+ .backdrop {
115
+ position: absolute;
116
+ width: 100vw;
117
+ height: 100vh;
118
+ display: block;
119
+ background-color: pink;
120
+ }
121
+ .dotted-circle {
122
+ position: absolute;
123
+ top: 0;
124
+ right: 0;
125
+ opacity: 0.3;
126
+ animation: rotation 38s linear infinite;
127
+ }
128
+ .draw-in {
129
+ stroke-dasharray: 1000;
130
+ stroke-dashoffset: 10;
131
+ animation: draw 15s ease-in-out alternate infinite;
132
+ }
133
+ @keyframes draw {
134
+ from {
135
+ stroke-dashoffset: 1000;
136
+ }
137
+ to {
138
+ stroke-dashoffset: 0;
139
+ }
140
+ }
141
+ .item-to {
142
+ animation-duration: 10s;
143
+ animation-iteration-count: infinite;
144
+ transform-origin: bottom;
145
+ }
146
+ .bounce-1 {
147
+ animation-name: bounce-1;
148
+ animation-timing-function: ease;
149
+ }
150
+ .bounce-2 {
151
+ animation-name: bounce-2;
152
+ animation-timing-function: ease;
153
+ }
154
+ .bounce-3 {
155
+ animation-name: bounce-3;
156
+ animation-timing-function: ease;
157
+ }
158
+ @keyframes bounce-1 {
159
+ 0% {
160
+ transform: translateY(0);
161
+ }
162
+ 50% {
163
+ transform: translateY(50px);
164
+ }
165
+ 100% {
166
+ transform: translateY(0);
167
+ }
168
+ }
169
+ @keyframes bounce-2 {
170
+ 0% {
171
+ transform: translateY(0);
172
+ }
173
+ 50% {
174
+ transform: translateY(-30px);
175
+ }
176
+ 100% {
177
+ transform: translateY(0);
178
+ }
179
+ }
180
+ @keyframes bounce-3 {
181
+ 0% {
182
+ transform: translateY(0);
183
+ }
184
+ 50% {
185
+ transform: translateY(30px);
186
+ }
187
+ 100% {
188
+ transform: translateY(0);
189
+ }
190
+ }
191
+ @keyframes rotation {
192
+ from {
193
+ transform: rotate(0deg);
194
+ }
195
+ to {
196
+ transform: rotate(360deg);
197
+ }
198
+ }
199
+ @keyframes gradient-fade {
200
+ from {
201
+ transform: translate(10%, -10%) rotate(0deg);
202
+ }
203
+ to {
204
+ transform: translate(50%, -50%) rotate(360deg);
205
+ }
206
+ }
207
+ @keyframes gradient-fade-alt {
208
+ from {
209
+ transform: translate(-20%, 20%) rotate(0deg);
210
+ }
211
+ to {
212
+ transform: translate(-60%, 60%) rotate(360deg);
213
+ }
214
+ }
215
+ </style>
216
+ </head>
217
+ <body>
218
+ <div class="arrow arrow--top">
219
+ <svg
220
+ xmlns="http://www.w3.org/2000/svg"
221
+ width="270.11"
222
+ height="649.9"
223
+ overflow="visible"
224
+ >
225
+ <style>
226
+ .geo-arrow {
227
+ fill: none;
228
+ stroke: #fff;
229
+ stroke-width: 2;
230
+ stroke-miterlimit: 10;
231
+ }
232
+ </style>
233
+ <g class="item-to bounce-1">
234
+ <path
235
+ class="geo-arrow draw-in"
236
+ d="M135.06 142.564L267.995 275.5 135.06 408.434 2.125 275.499z"
237
+ />
238
+ </g>
239
+ <circle
240
+ class="geo-arrow item-to bounce-2"
241
+ cx="194.65"
242
+ cy="69.54"
243
+ r="7.96"
244
+ />
245
+ <circle class="geo-arrow draw-in" cx="194.65" cy="39.5" r="7.96" />
246
+ <circle
247
+ class="geo-arrow item-to bounce-3"
248
+ cx="194.65"
249
+ cy="9.46"
250
+ r="7.96"
251
+ />
252
+ <g class="geo-arrow item-to bounce-2">
253
+ <path
254
+ class="st0 draw-in"
255
+ d="M181.21 619.5l13.27 27 13.27-27zM194.48 644.5v-552"
256
+ />
257
+ </g>
258
+ </svg>
259
+ </div>
260
+ <div class="arrow arrow--bottom">
261
+ <svg
262
+ xmlns="http://www.w3.org/2000/svg"
263
+ width="31.35"
264
+ height="649.9"
265
+ overflow="visible"
266
+ >
267
+ <style>
268
+ .geo-arrow {
269
+ fill: none;
270
+ stroke: #fff;
271
+ stroke-width: 2;
272
+ stroke-miterlimit: 10;
273
+ }
274
+ </style>
275
+ <g class="item-to bounce-1">
276
+ <circle
277
+ class="geo-arrow item-to bounce-3"
278
+ cx="15.5"
279
+ cy="580.36"
280
+ r="7.96"
281
+ />
282
+ <circle class="geo-arrow draw-in" cx="15.5" cy="610.4" r="7.96" />
283
+ <circle
284
+ class="geo-arrow item-to bounce-2"
285
+ cx="15.5"
286
+ cy="640.44"
287
+ r="7.96"
288
+ />
289
+ <g class="item-to bounce-2">
290
+ <path
291
+ class="geo-arrow draw-in"
292
+ d="M28.94 30.4l-13.26-27-13.27 27zM15.68 5.4v552"
293
+ />
294
+ </g>
295
+ </g>
296
+ </svg>
297
+ </div>
298
+ <div class="main">
299
+ <div class="main__text-wrapper">
300
+ <h1 class="main__title">Backend Server</h1>
301
+ <h2>Up and Running</h2>
302
+ <svg
303
+ xmlns="http://www.w3.org/2000/svg"
304
+ class="dotted-circle"
305
+ width="352"
306
+ height="352"
307
+ overflow="visible"
308
+ >
309
+ <circle
310
+ cx="176"
311
+ cy="176"
312
+ r="174"
313
+ fill="none"
314
+ stroke="#fff"
315
+ stroke-width="2"
316
+ stroke-miterlimit="10"
317
+ stroke-dasharray="12.921,11.9271"
318
+ />
319
+ </svg>
320
+ </div>
321
+ </div>
322
+ </body>
323
+ </html>
app/transcriber.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ from datasets import load_dataset
4
+
5
+
6
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
8
+
9
+ model_id = "openai/whisper-large-v3"
10
+
11
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
12
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
13
+ )
14
+
15
+ model.to(device)
16
+
17
+ processor = AutoProcessor.from_pretrained(model_id)
18
+
19
+ pipe = pipeline(
20
+ "automatic-speech-recognition",
21
+ model=model,
22
+ tokenizer=processor.tokenizer,
23
+ feature_extractor=processor.feature_extractor,
24
+ max_new_tokens=128,
25
+ chunk_length_s=30,
26
+ batch_size=16,
27
+ return_timestamps=True,
28
+ torch_dtype=torch_dtype,
29
+ device=device,
30
+ )
31
+
32
+ dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
33
+
34
+ sample = dataset[0]["audio"]
35
+
36
+
37
+ def get_transcription(file: str):
38
+ result = pipe(file, generate_kwargs={"language": "shona"})
39
+ return result["text"]
requirements.txt ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.30.1
2
+ aiohttp==3.9.5
3
+ aiosignal==1.3.1
4
+ annotated-types==0.7.0
5
+ anyio==4.4.0
6
+ async-timeout==4.0.3
7
+ attrs==23.2.0
8
+ audioread==3.0.1
9
+ certifi==2024.6.2
10
+ cffi==1.16.0
11
+ charset-normalizer==3.3.2
12
+ click==8.1.7
13
+ colorama==0.4.6
14
+ datasets==2.19.1
15
+ decorator==5.1.1
16
+ dill==0.3.8
17
+ dnspython==2.6.1
18
+ email_validator==2.1.1
19
+ exceptiongroup==1.2.1
20
+ fastapi==0.111.0
21
+ fastapi-cli==0.0.4
22
+ filelock==3.14.0
23
+ frozenlist==1.4.1
24
+ fsspec==2024.3.1
25
+ fuzzywuzzy==0.18.0
26
+ h11==0.14.0
27
+ httpcore==1.0.5
28
+ httptools==0.6.1
29
+ httpx==0.27.0
30
+ huggingface-hub==0.23.2
31
+ idna==3.7
32
+ intel-openmp==2021.4.0
33
+ itsdangerous==2.2.0
34
+ Jinja2==3.1.4
35
+ joblib==1.4.2
36
+ langdetect==1.0.9
37
+ lazy_loader==0.4
38
+ Levenshtein==0.25.1
39
+ librosa==0.10.2.post1
40
+ llvmlite==0.42.0
41
+ markdown-it-py==3.0.0
42
+ MarkupSafe==2.1.5
43
+ mdurl==0.1.2
44
+ mkl==2021.4.0
45
+ mpmath==1.3.0
46
+ msgpack==1.0.8
47
+ multidict==6.0.5
48
+ multiprocess==0.70.16
49
+ networkx==3.3
50
+ numba==0.59.1
51
+ numpy==1.26.4
52
+ nvidia-cublas-cu12==12.1.3.1
53
+ nvidia-cuda-cupti-cu12==12.1.105
54
+ nvidia-cuda-nvrtc-cu12==12.1.105
55
+ nvidia-cuda-runtime-cu12==12.1.105
56
+ nvidia-cudnn-cu12==8.9.2.26
57
+ nvidia-cufft-cu12==11.0.2.54
58
+ nvidia-curand-cu12==10.3.2.106
59
+ nvidia-cusolver-cu12==11.4.5.107
60
+ nvidia-cusparse-cu12==12.1.0.106
61
+ nvidia-nccl-cu12==2.20.5
62
+ nvidia-nvjitlink-cu12==12.5.40
63
+ nvidia-nvtx-cu12==12.1.105
64
+ orjson==3.10.3
65
+ packaging==24.0
66
+ pandas==2.2.2
67
+ platformdirs==4.2.2
68
+ pooch==1.8.1
69
+ psutil==5.9.8
70
+ pyarrow==16.1.0
71
+ pyarrow-hotfix==0.6
72
+ pycparser==2.22
73
+ pydantic==2.7.2
74
+ pydantic-extra-types==2.7.0
75
+ pydantic-settings==2.2.1
76
+ pydantic_core==2.18.3
77
+ Pygments==2.18.0
78
+ python-dateutil==2.9.0.post0
79
+ python-dotenv==1.0.1
80
+ python-Levenshtein==0.25.1
81
+ python-multipart==0.0.9
82
+ pytz==2024.1
83
+ PyYAML==6.0.1
84
+ rapidfuzz==3.9.3
85
+ regex==2024.5.15
86
+ requests==2.32.3
87
+ rich==13.7.1
88
+ safetensors==0.4.3
89
+ scikit-learn==1.5.0
90
+ scipy==1.13.1
91
+ shellingham==1.5.4
92
+ six==1.16.0
93
+ sniffio==1.3.1
94
+ soundfile==0.12.1
95
+ soxr==0.3.7
96
+ starlette==0.37.2
97
+ sympy==1.12.1
98
+ tbb==2021.12.0
99
+ threadpoolctl==3.5.0
100
+ tokenizers==0.19.1
101
+ torch==2.3.0
102
+ tqdm==4.66.4
103
+ transformers==4.41.2
104
+ triton==2.3.0
105
+ typer==0.12.3
106
+ typing_extensions==4.12.1
107
+ tzdata==2024.1
108
+ ujson==5.10.0
109
+ urllib3==2.2.1
110
+ uvicorn==0.30.0
111
+ uvloop==0.19.0
112
+ watchfiles==0.22.0
113
+ websockets==12.0
114
+ xxhash==3.4.1
115
+ yarl==1.9.4