arnabg95 commited on
Commit
c3c3a5e
1 Parent(s): 3fa3e73

all files added

Browse files
.vscode/PythonImportHelper-v2-Completion.json ADDED
@@ -0,0 +1,646 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "label": "APIRouter",
4
+ "importPath": "fastapi",
5
+ "description": "fastapi",
6
+ "isExtraImport": true,
7
+ "detail": "fastapi",
8
+ "documentation": {}
9
+ },
10
+ {
11
+ "label": "UploadFile",
12
+ "importPath": "fastapi",
13
+ "description": "fastapi",
14
+ "isExtraImport": true,
15
+ "detail": "fastapi",
16
+ "documentation": {}
17
+ },
18
+ {
19
+ "label": "File",
20
+ "importPath": "fastapi",
21
+ "description": "fastapi",
22
+ "isExtraImport": true,
23
+ "detail": "fastapi",
24
+ "documentation": {}
25
+ },
26
+ {
27
+ "label": "Body",
28
+ "importPath": "fastapi",
29
+ "description": "fastapi",
30
+ "isExtraImport": true,
31
+ "detail": "fastapi",
32
+ "documentation": {}
33
+ },
34
+ {
35
+ "label": "HTTPException",
36
+ "importPath": "fastapi",
37
+ "description": "fastapi",
38
+ "isExtraImport": true,
39
+ "detail": "fastapi",
40
+ "documentation": {}
41
+ },
42
+ {
43
+ "label": "status",
44
+ "importPath": "fastapi",
45
+ "description": "fastapi",
46
+ "isExtraImport": true,
47
+ "detail": "fastapi",
48
+ "documentation": {}
49
+ },
50
+ {
51
+ "label": "APIRouter",
52
+ "importPath": "fastapi",
53
+ "description": "fastapi",
54
+ "isExtraImport": true,
55
+ "detail": "fastapi",
56
+ "documentation": {}
57
+ },
58
+ {
59
+ "label": "APIRouter",
60
+ "importPath": "fastapi",
61
+ "description": "fastapi",
62
+ "isExtraImport": true,
63
+ "detail": "fastapi",
64
+ "documentation": {}
65
+ },
66
+ {
67
+ "label": "FastAPI",
68
+ "importPath": "fastapi",
69
+ "description": "fastapi",
70
+ "isExtraImport": true,
71
+ "detail": "fastapi",
72
+ "documentation": {}
73
+ },
74
+ {
75
+ "label": "Request",
76
+ "importPath": "fastapi",
77
+ "description": "fastapi",
78
+ "isExtraImport": true,
79
+ "detail": "fastapi",
80
+ "documentation": {}
81
+ },
82
+ {
83
+ "label": "JSONResponse",
84
+ "importPath": "fastapi.responses",
85
+ "description": "fastapi.responses",
86
+ "isExtraImport": true,
87
+ "detail": "fastapi.responses",
88
+ "documentation": {}
89
+ },
90
+ {
91
+ "label": "HTMLResponse",
92
+ "importPath": "fastapi.responses",
93
+ "description": "fastapi.responses",
94
+ "isExtraImport": true,
95
+ "detail": "fastapi.responses",
96
+ "documentation": {}
97
+ },
98
+ {
99
+ "label": "Annotated",
100
+ "importPath": "typing",
101
+ "description": "typing",
102
+ "isExtraImport": true,
103
+ "detail": "typing",
104
+ "documentation": {}
105
+ },
106
+ {
107
+ "label": "time",
108
+ "kind": 6,
109
+ "isExtraImport": true,
110
+ "importPath": "time",
111
+ "description": "time",
112
+ "detail": "time",
113
+ "documentation": {}
114
+ },
115
+ {
116
+ "label": "os",
117
+ "kind": 6,
118
+ "isExtraImport": true,
119
+ "importPath": "os",
120
+ "description": "os",
121
+ "detail": "os",
122
+ "documentation": {}
123
+ },
124
+ {
125
+ "label": "get_transcription",
126
+ "importPath": "app.transcriber",
127
+ "description": "app.transcriber",
128
+ "isExtraImport": true,
129
+ "detail": "app.transcriber",
130
+ "documentation": {}
131
+ },
132
+ {
133
+ "label": "match",
134
+ "importPath": "app.matcher",
135
+ "description": "app.matcher",
136
+ "isExtraImport": true,
137
+ "detail": "app.matcher",
138
+ "documentation": {}
139
+ },
140
+ {
141
+ "label": "mfcc_similarty_check",
142
+ "importPath": "app.mfcc",
143
+ "description": "app.mfcc",
144
+ "isExtraImport": true,
145
+ "detail": "app.mfcc",
146
+ "documentation": {}
147
+ },
148
+ {
149
+ "label": "voice_router",
150
+ "importPath": "app.routers.V1.voice",
151
+ "description": "app.routers.V1.voice",
152
+ "isExtraImport": true,
153
+ "detail": "app.routers.V1.voice",
154
+ "documentation": {}
155
+ },
156
+ {
157
+ "label": "v1_routers",
158
+ "importPath": "app.routers.V1",
159
+ "description": "app.routers.V1",
160
+ "isExtraImport": true,
161
+ "detail": "app.routers.V1",
162
+ "documentation": {}
163
+ },
164
+ {
165
+ "label": "StaticFiles",
166
+ "importPath": "fastapi.staticfiles",
167
+ "description": "fastapi.staticfiles",
168
+ "isExtraImport": true,
169
+ "detail": "fastapi.staticfiles",
170
+ "documentation": {}
171
+ },
172
+ {
173
+ "label": "Jinja2Templates",
174
+ "importPath": "fastapi.templating",
175
+ "description": "fastapi.templating",
176
+ "isExtraImport": true,
177
+ "detail": "fastapi.templating",
178
+ "documentation": {}
179
+ },
180
+ {
181
+ "label": "CORSMiddleware",
182
+ "importPath": "fastapi.middleware.cors",
183
+ "description": "fastapi.middleware.cors",
184
+ "isExtraImport": true,
185
+ "detail": "fastapi.middleware.cors",
186
+ "documentation": {}
187
+ },
188
+ {
189
+ "label": "routes",
190
+ "importPath": "app.routers",
191
+ "description": "app.routers",
192
+ "isExtraImport": true,
193
+ "detail": "app.routers",
194
+ "documentation": {}
195
+ },
196
+ {
197
+ "label": "difflib",
198
+ "kind": 6,
199
+ "isExtraImport": true,
200
+ "importPath": "difflib",
201
+ "description": "difflib",
202
+ "detail": "difflib",
203
+ "documentation": {}
204
+ },
205
+ {
206
+ "label": "fuzz",
207
+ "importPath": "fuzzywuzzy",
208
+ "description": "fuzzywuzzy",
209
+ "isExtraImport": true,
210
+ "detail": "fuzzywuzzy",
211
+ "documentation": {}
212
+ },
213
+ {
214
+ "label": "librosa",
215
+ "kind": 6,
216
+ "isExtraImport": true,
217
+ "importPath": "librosa",
218
+ "description": "librosa",
219
+ "detail": "librosa",
220
+ "documentation": {}
221
+ },
222
+ {
223
+ "label": "AutoFeatureExtractor",
224
+ "importPath": "transformers",
225
+ "description": "transformers",
226
+ "isExtraImport": true,
227
+ "detail": "transformers",
228
+ "documentation": {}
229
+ },
230
+ {
231
+ "label": "Wav2Vec2BertModel",
232
+ "importPath": "transformers",
233
+ "description": "transformers",
234
+ "isExtraImport": true,
235
+ "detail": "transformers",
236
+ "documentation": {}
237
+ },
238
+ {
239
+ "label": "AutoModelForSpeechSeq2Seq",
240
+ "importPath": "transformers",
241
+ "description": "transformers",
242
+ "isExtraImport": true,
243
+ "detail": "transformers",
244
+ "documentation": {}
245
+ },
246
+ {
247
+ "label": "AutoProcessor",
248
+ "importPath": "transformers",
249
+ "description": "transformers",
250
+ "isExtraImport": true,
251
+ "detail": "transformers",
252
+ "documentation": {}
253
+ },
254
+ {
255
+ "label": "pipeline",
256
+ "importPath": "transformers",
257
+ "description": "transformers",
258
+ "isExtraImport": true,
259
+ "detail": "transformers",
260
+ "documentation": {}
261
+ },
262
+ {
263
+ "label": "soundfile",
264
+ "kind": 6,
265
+ "isExtraImport": true,
266
+ "importPath": "soundfile",
267
+ "description": "soundfile",
268
+ "detail": "soundfile",
269
+ "documentation": {}
270
+ },
271
+ {
272
+ "label": "cosine_similarity",
273
+ "importPath": "sklearn.metrics.pairwise",
274
+ "description": "sklearn.metrics.pairwise",
275
+ "isExtraImport": true,
276
+ "detail": "sklearn.metrics.pairwise",
277
+ "documentation": {}
278
+ },
279
+ {
280
+ "label": "numpy",
281
+ "kind": 6,
282
+ "isExtraImport": true,
283
+ "importPath": "numpy",
284
+ "description": "numpy",
285
+ "detail": "numpy",
286
+ "documentation": {}
287
+ },
288
+ {
289
+ "label": "torch",
290
+ "kind": 6,
291
+ "isExtraImport": true,
292
+ "importPath": "torch",
293
+ "description": "torch",
294
+ "detail": "torch",
295
+ "documentation": {}
296
+ },
297
+ {
298
+ "label": "load_dataset",
299
+ "importPath": "datasets",
300
+ "description": "datasets",
301
+ "isExtraImport": true,
302
+ "detail": "datasets",
303
+ "documentation": {}
304
+ },
305
+ {
306
+ "label": "annotations",
307
+ "importPath": "__future__",
308
+ "description": "__future__",
309
+ "isExtraImport": true,
310
+ "detail": "__future__",
311
+ "documentation": {}
312
+ },
313
+ {
314
+ "label": "site",
315
+ "kind": 6,
316
+ "isExtraImport": true,
317
+ "importPath": "site",
318
+ "description": "site",
319
+ "detail": "site",
320
+ "documentation": {}
321
+ },
322
+ {
323
+ "label": "sys",
324
+ "kind": 6,
325
+ "isExtraImport": true,
326
+ "importPath": "sys",
327
+ "description": "sys",
328
+ "detail": "sys",
329
+ "documentation": {}
330
+ },
331
+ {
332
+ "label": "router",
333
+ "kind": 5,
334
+ "importPath": "app.routers.V1.voice.voice_router",
335
+ "description": "app.routers.V1.voice.voice_router",
336
+ "peekOfCode": "router = APIRouter(prefix=\"/voice\", tags=[\"Voice\"])\n@router.post(\"/transcribe\")\nasync def transcribe_audio(\n file: Annotated[UploadFile, File()], matcher_text: Annotated[str, Body()]\n):\n try:\n # Validate file type\n if not file.filename.endswith(\".wav\"):\n raise HTTPException(\n status_code=status.HTTP_400_BAD_REQUEST,",
337
+ "detail": "app.routers.V1.voice.voice_router",
338
+ "documentation": {}
339
+ },
340
+ {
341
+ "label": "router",
342
+ "kind": 5,
343
+ "importPath": "app.routers.V1.v1_routers",
344
+ "description": "app.routers.V1.v1_routers",
345
+ "peekOfCode": "router = APIRouter()\n\"\"\" include auth routes \"\"\"\nrouter.include_router(voice_router.router)",
346
+ "detail": "app.routers.V1.v1_routers",
347
+ "documentation": {}
348
+ },
349
+ {
350
+ "label": "router",
351
+ "kind": 5,
352
+ "importPath": "app.routers.routes",
353
+ "description": "app.routers.routes",
354
+ "peekOfCode": "router = APIRouter()\n\"\"\" include the v1 routes here \"\"\"\nrouter.include_router(v1_routers.router)",
355
+ "detail": "app.routers.routes",
356
+ "documentation": {}
357
+ },
358
+ {
359
+ "label": "app",
360
+ "kind": 5,
361
+ "importPath": "app.main",
362
+ "description": "app.main",
363
+ "peekOfCode": "app = FastAPI(\n title=\"Mother Tongue Voice Matcher\",\n version=\"0.0.5\",\n servers=[{\n \"url\": \"http://127.0.0.1:8000/api/v1\", \"description\": \"Local Server\"\n }],\n root_path=\"/api/v1\",\n root_path_in_servers=False,\n)\n# cors policy",
364
+ "detail": "app.main",
365
+ "documentation": {}
366
+ },
367
+ {
368
+ "label": "origins",
369
+ "kind": 5,
370
+ "importPath": "app.main",
371
+ "description": "app.main",
372
+ "peekOfCode": "origins = [\n \"http://localhost\",\n \"http://localhost:8080\",\n \"http://localhost:3000\",\n \"http://localhost:5173\",\n \"http://127.0.0.1\",\n \"http://127.0.0.1:8080\",\n \"http://127.0.0.1:3000\",\n \"http://127.0.0.1:5173\",\n]",
373
+ "detail": "app.main",
374
+ "documentation": {}
375
+ },
376
+ {
377
+ "label": "templates",
378
+ "kind": 5,
379
+ "importPath": "app.main",
380
+ "description": "app.main",
381
+ "peekOfCode": "templates = Jinja2Templates(directory=\"app/templates\")\n@app.get(\"/\", response_class=HTMLResponse, include_in_schema=False)\nasync def root(request: Request):\n \"\"\"set the root to show a html welcome page\"\"\"\n return templates.TemplateResponse(request=request, name=\"index.html\")\n# include all the other api endpoints\napp.include_router(routes.router)",
382
+ "detail": "app.main",
383
+ "documentation": {}
384
+ },
385
+ {
386
+ "label": "phonetic_match",
387
+ "kind": 2,
388
+ "importPath": "app.matcher",
389
+ "description": "app.matcher",
390
+ "peekOfCode": "def phonetic_match(word1, word2):\n \"\"\"\n Compares two words based on their phonetic similarity.\n \"\"\"\n return fuzz.ratio(word1, word2)\n# Custom sequence matching function\ndef sequence_match(a, b):\n \"\"\"\n Uses sequence matching to compare two sequences of words.\n \"\"\"",
391
+ "detail": "app.matcher",
392
+ "documentation": {}
393
+ },
394
+ {
395
+ "label": "sequence_match",
396
+ "kind": 2,
397
+ "importPath": "app.matcher",
398
+ "description": "app.matcher",
399
+ "peekOfCode": "def sequence_match(a, b):\n \"\"\"\n Uses sequence matching to compare two sequences of words.\n \"\"\"\n return difflib.SequenceMatcher(None, a, b).ratio()\n# Main function to compare texts with percentage match\ndef compare_texts(text1, text2):\n \"\"\"\n Compares two texts using phonetic matching and sequence matching,\n returning a percentage match score.",
400
+ "detail": "app.matcher",
401
+ "documentation": {}
402
+ },
403
+ {
404
+ "label": "compare_texts",
405
+ "kind": 2,
406
+ "importPath": "app.matcher",
407
+ "description": "app.matcher",
408
+ "peekOfCode": "def compare_texts(text1, text2):\n \"\"\"\n Compares two texts using phonetic matching and sequence matching,\n returning a percentage match score.\n \"\"\"\n words1 = text1.lower().split()\n words2 = text2.lower().split()\n total_matches = len(words1)\n mismatches = 0\n for word1, word2 in zip(words1, words2):",
409
+ "detail": "app.matcher",
410
+ "documentation": {}
411
+ },
412
+ {
413
+ "label": "match",
414
+ "kind": 2,
415
+ "importPath": "app.matcher",
416
+ "description": "app.matcher",
417
+ "peekOfCode": "def match(original, transcription):\n return compare_texts(original, transcription)",
418
+ "detail": "app.matcher",
419
+ "documentation": {}
420
+ },
421
+ {
422
+ "label": "load_and_resample_audio",
423
+ "kind": 2,
424
+ "importPath": "app.mfcc",
425
+ "description": "app.mfcc",
426
+ "peekOfCode": "def load_and_resample_audio(file_path, target_sample_rate=16000):\n audio_input, sample_rate = sf.read(file_path)\n if sample_rate != target_sample_rate:\n audio_input = librosa.resample(\n audio_input, orig_sr=sample_rate, target_sr=target_sample_rate\n )\n return audio_input, sample_rate\ndef calculate_mfcc(audio_data, sample_rate):\n mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)\n mfccs_scaled = np.mean(mfccs.T, axis=0) # Average across time dimension",
427
+ "detail": "app.mfcc",
428
+ "documentation": {}
429
+ },
430
+ {
431
+ "label": "calculate_mfcc",
432
+ "kind": 2,
433
+ "importPath": "app.mfcc",
434
+ "description": "app.mfcc",
435
+ "peekOfCode": "def calculate_mfcc(audio_data, sample_rate):\n mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)\n mfccs_scaled = np.mean(mfccs.T, axis=0) # Average across time dimension\n return mfccs_scaled\ndef calculate_similarity(mfccs1, mfccs2):\n similarity = cosine_similarity(\n mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))\n return similarity[0][0]\ndef mfcc_similarty_check(original: str, recorded: str):\n correct_pronunciation_audio, _ = load_and_resample_audio(original)",
436
+ "detail": "app.mfcc",
437
+ "documentation": {}
438
+ },
439
+ {
440
+ "label": "calculate_similarity",
441
+ "kind": 2,
442
+ "importPath": "app.mfcc",
443
+ "description": "app.mfcc",
444
+ "peekOfCode": "def calculate_similarity(mfccs1, mfccs2):\n similarity = cosine_similarity(\n mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))\n return similarity[0][0]\ndef mfcc_similarty_check(original: str, recorded: str):\n correct_pronunciation_audio, _ = load_and_resample_audio(original)\n user_pronunciation_audio, sample_rate = load_and_resample_audio(recorded)\n # Extract MFCCs from audio data\n correct_mfccs = calculate_mfcc(correct_pronunciation_audio, sample_rate)\n user_mfccs = calculate_mfcc(user_pronunciation_audio, sample_rate)",
445
+ "detail": "app.mfcc",
446
+ "documentation": {}
447
+ },
448
+ {
449
+ "label": "mfcc_similarty_check",
450
+ "kind": 2,
451
+ "importPath": "app.mfcc",
452
+ "description": "app.mfcc",
453
+ "peekOfCode": "def mfcc_similarty_check(original: str, recorded: str):\n correct_pronunciation_audio, _ = load_and_resample_audio(original)\n user_pronunciation_audio, sample_rate = load_and_resample_audio(recorded)\n # Extract MFCCs from audio data\n correct_mfccs = calculate_mfcc(correct_pronunciation_audio, sample_rate)\n user_mfccs = calculate_mfcc(user_pronunciation_audio, sample_rate)\n distance = np.linalg.norm(correct_mfccs.flatten() - user_mfccs.flatten())\n # Calculate cosine similarity using MFCCs\n similarity_score = calculate_similarity(correct_mfccs, user_mfccs)\n accuracy_percentage = similarity_score * 100",
454
+ "detail": "app.mfcc",
455
+ "documentation": {}
456
+ },
457
+ {
458
+ "label": "model_id",
459
+ "kind": 5,
460
+ "importPath": "app.mfcc",
461
+ "description": "app.mfcc",
462
+ "peekOfCode": "model_id = \"facebook/w2v-bert-2.0\"\nfeature_extractor = AutoFeatureExtractor.from_pretrained(model_id)\nmodel = Wav2Vec2BertModel.from_pretrained(model_id)\ndef load_and_resample_audio(file_path, target_sample_rate=16000):\n audio_input, sample_rate = sf.read(file_path)\n if sample_rate != target_sample_rate:\n audio_input = librosa.resample(\n audio_input, orig_sr=sample_rate, target_sr=target_sample_rate\n )\n return audio_input, sample_rate",
463
+ "detail": "app.mfcc",
464
+ "documentation": {}
465
+ },
466
+ {
467
+ "label": "feature_extractor",
468
+ "kind": 5,
469
+ "importPath": "app.mfcc",
470
+ "description": "app.mfcc",
471
+ "peekOfCode": "feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)\nmodel = Wav2Vec2BertModel.from_pretrained(model_id)\ndef load_and_resample_audio(file_path, target_sample_rate=16000):\n audio_input, sample_rate = sf.read(file_path)\n if sample_rate != target_sample_rate:\n audio_input = librosa.resample(\n audio_input, orig_sr=sample_rate, target_sr=target_sample_rate\n )\n return audio_input, sample_rate\ndef calculate_mfcc(audio_data, sample_rate):",
472
+ "detail": "app.mfcc",
473
+ "documentation": {}
474
+ },
475
+ {
476
+ "label": "model",
477
+ "kind": 5,
478
+ "importPath": "app.mfcc",
479
+ "description": "app.mfcc",
480
+ "peekOfCode": "model = Wav2Vec2BertModel.from_pretrained(model_id)\ndef load_and_resample_audio(file_path, target_sample_rate=16000):\n audio_input, sample_rate = sf.read(file_path)\n if sample_rate != target_sample_rate:\n audio_input = librosa.resample(\n audio_input, orig_sr=sample_rate, target_sr=target_sample_rate\n )\n return audio_input, sample_rate\ndef calculate_mfcc(audio_data, sample_rate):\n mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)",
481
+ "detail": "app.mfcc",
482
+ "documentation": {}
483
+ },
484
+ {
485
+ "label": "get_transcription",
486
+ "kind": 2,
487
+ "importPath": "app.transcriber",
488
+ "description": "app.transcriber",
489
+ "peekOfCode": "def get_transcription(file: str):\n result = pipe(file, generate_kwargs={\"language\": \"shona\"})\n return result[\"text\"]",
490
+ "detail": "app.transcriber",
491
+ "documentation": {}
492
+ },
493
+ {
494
+ "label": "device",
495
+ "kind": 5,
496
+ "importPath": "app.transcriber",
497
+ "description": "app.transcriber",
498
+ "peekOfCode": "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\ntorch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32\nmodel_id = \"openai/whisper-large-v3\"\nmodel = AutoModelForSpeechSeq2Seq.from_pretrained(\n model_id,\n torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True\n)\nmodel.to(device)\nprocessor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(",
499
+ "detail": "app.transcriber",
500
+ "documentation": {}
501
+ },
502
+ {
503
+ "label": "torch_dtype",
504
+ "kind": 5,
505
+ "importPath": "app.transcriber",
506
+ "description": "app.transcriber",
507
+ "peekOfCode": "torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32\nmodel_id = \"openai/whisper-large-v3\"\nmodel = AutoModelForSpeechSeq2Seq.from_pretrained(\n model_id,\n torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True\n)\nmodel.to(device)\nprocessor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(\n \"automatic-speech-recognition\",",
508
+ "detail": "app.transcriber",
509
+ "documentation": {}
510
+ },
511
+ {
512
+ "label": "model_id",
513
+ "kind": 5,
514
+ "importPath": "app.transcriber",
515
+ "description": "app.transcriber",
516
+ "peekOfCode": "model_id = \"openai/whisper-large-v3\"\nmodel = AutoModelForSpeechSeq2Seq.from_pretrained(\n model_id,\n torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True\n)\nmodel.to(device)\nprocessor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(\n \"automatic-speech-recognition\",\n model=model,",
517
+ "detail": "app.transcriber",
518
+ "documentation": {}
519
+ },
520
+ {
521
+ "label": "model",
522
+ "kind": 5,
523
+ "importPath": "app.transcriber",
524
+ "description": "app.transcriber",
525
+ "peekOfCode": "model = AutoModelForSpeechSeq2Seq.from_pretrained(\n model_id,\n torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True\n)\nmodel.to(device)\nprocessor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(\n \"automatic-speech-recognition\",\n model=model,\n tokenizer=processor.tokenizer,",
526
+ "detail": "app.transcriber",
527
+ "documentation": {}
528
+ },
529
+ {
530
+ "label": "processor",
531
+ "kind": 5,
532
+ "importPath": "app.transcriber",
533
+ "description": "app.transcriber",
534
+ "peekOfCode": "processor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(\n \"automatic-speech-recognition\",\n model=model,\n tokenizer=processor.tokenizer,\n feature_extractor=processor.feature_extractor,\n max_new_tokens=128,\n chunk_length_s=30,\n batch_size=16,\n return_timestamps=True,",
535
+ "detail": "app.transcriber",
536
+ "documentation": {}
537
+ },
538
+ {
539
+ "label": "pipe",
540
+ "kind": 5,
541
+ "importPath": "app.transcriber",
542
+ "description": "app.transcriber",
543
+ "peekOfCode": "pipe = pipeline(\n \"automatic-speech-recognition\",\n model=model,\n tokenizer=processor.tokenizer,\n feature_extractor=processor.feature_extractor,\n max_new_tokens=128,\n chunk_length_s=30,\n batch_size=16,\n return_timestamps=True,\n torch_dtype=torch_dtype,",
544
+ "detail": "app.transcriber",
545
+ "documentation": {}
546
+ },
547
+ {
548
+ "label": "dataset",
549
+ "kind": 5,
550
+ "importPath": "app.transcriber",
551
+ "description": "app.transcriber",
552
+ "peekOfCode": "dataset = load_dataset(\n \"distil-whisper/librispeech_long\", \"clean\", split=\"validation\")\nsample = dataset[0][\"audio\"]\ndef get_transcription(file: str):\n result = pipe(file, generate_kwargs={\"language\": \"shona\"})\n return result[\"text\"]",
553
+ "detail": "app.transcriber",
554
+ "documentation": {}
555
+ },
556
+ {
557
+ "label": "sample",
558
+ "kind": 5,
559
+ "importPath": "app.transcriber",
560
+ "description": "app.transcriber",
561
+ "peekOfCode": "sample = dataset[0][\"audio\"]\ndef get_transcription(file: str):\n result = pipe(file, generate_kwargs={\"language\": \"shona\"})\n return result[\"text\"]",
562
+ "detail": "app.transcriber",
563
+ "documentation": {}
564
+ },
565
+ {
566
+ "label": "bin_dir",
567
+ "kind": 5,
568
+ "importPath": "env.Scripts.activate_this",
569
+ "description": "env.Scripts.activate_this",
570
+ "peekOfCode": "bin_dir = os.path.dirname(abs_file)\nbase = bin_dir[: -len(\"Scripts\") - 1] # strip away the bin part from the __file__, plus the path separator\n# prepend bin to PATH (this file is inside the bin directory)\nos.environ[\"PATH\"] = os.pathsep.join([bin_dir, *os.environ.get(\"PATH\", \"\").split(os.pathsep)])\nos.environ[\"VIRTUAL_ENV\"] = base # virtual env is right above bin directory\nos.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base) # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n path = os.path.realpath(os.path.join(bin_dir, lib))",
571
+ "detail": "env.Scripts.activate_this",
572
+ "documentation": {}
573
+ },
574
+ {
575
+ "label": "base",
576
+ "kind": 5,
577
+ "importPath": "env.Scripts.activate_this",
578
+ "description": "env.Scripts.activate_this",
579
+ "peekOfCode": "base = bin_dir[: -len(\"Scripts\") - 1] # strip away the bin part from the __file__, plus the path separator\n# prepend bin to PATH (this file is inside the bin directory)\nos.environ[\"PATH\"] = os.pathsep.join([bin_dir, *os.environ.get(\"PATH\", \"\").split(os.pathsep)])\nos.environ[\"VIRTUAL_ENV\"] = base # virtual env is right above bin directory\nos.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base) # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n path = os.path.realpath(os.path.join(bin_dir, lib))\n site.addsitedir(path.decode(\"utf-8\") if \"\" else path)",
580
+ "detail": "env.Scripts.activate_this",
581
+ "documentation": {}
582
+ },
583
+ {
584
+ "label": "os.environ[\"PATH\"]",
585
+ "kind": 5,
586
+ "importPath": "env.Scripts.activate_this",
587
+ "description": "env.Scripts.activate_this",
588
+ "peekOfCode": "os.environ[\"PATH\"] = os.pathsep.join([bin_dir, *os.environ.get(\"PATH\", \"\").split(os.pathsep)])\nos.environ[\"VIRTUAL_ENV\"] = base # virtual env is right above bin directory\nos.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base) # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n path = os.path.realpath(os.path.join(bin_dir, lib))\n site.addsitedir(path.decode(\"utf-8\") if \"\" else path)\nsys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix",
589
+ "detail": "env.Scripts.activate_this",
590
+ "documentation": {}
591
+ },
592
+ {
593
+ "label": "os.environ[\"VIRTUAL_ENV\"]",
594
+ "kind": 5,
595
+ "importPath": "env.Scripts.activate_this",
596
+ "description": "env.Scripts.activate_this",
597
+ "peekOfCode": "os.environ[\"VIRTUAL_ENV\"] = base # virtual env is right above bin directory\nos.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base) # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n path = os.path.realpath(os.path.join(bin_dir, lib))\n site.addsitedir(path.decode(\"utf-8\") if \"\" else path)\nsys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix\nsys.prefix = base",
598
+ "detail": "env.Scripts.activate_this",
599
+ "documentation": {}
600
+ },
601
+ {
602
+ "label": "os.environ[\"VIRTUAL_ENV_PROMPT\"]",
603
+ "kind": 5,
604
+ "importPath": "env.Scripts.activate_this",
605
+ "description": "env.Scripts.activate_this",
606
+ "peekOfCode": "os.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base) # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n path = os.path.realpath(os.path.join(bin_dir, lib))\n site.addsitedir(path.decode(\"utf-8\") if \"\" else path)\nsys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix\nsys.prefix = base",
607
+ "detail": "env.Scripts.activate_this",
608
+ "documentation": {}
609
+ },
610
+ {
611
+ "label": "prev_length",
612
+ "kind": 5,
613
+ "importPath": "env.Scripts.activate_this",
614
+ "description": "env.Scripts.activate_this",
615
+ "peekOfCode": "prev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n path = os.path.realpath(os.path.join(bin_dir, lib))\n site.addsitedir(path.decode(\"utf-8\") if \"\" else path)\nsys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix\nsys.prefix = base",
616
+ "detail": "env.Scripts.activate_this",
617
+ "documentation": {}
618
+ },
619
+ {
620
+ "label": "sys.path[:]",
621
+ "kind": 5,
622
+ "importPath": "env.Scripts.activate_this",
623
+ "description": "env.Scripts.activate_this",
624
+ "peekOfCode": "sys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix\nsys.prefix = base",
625
+ "detail": "env.Scripts.activate_this",
626
+ "documentation": {}
627
+ },
628
+ {
629
+ "label": "sys.real_prefix",
630
+ "kind": 5,
631
+ "importPath": "env.Scripts.activate_this",
632
+ "description": "env.Scripts.activate_this",
633
+ "peekOfCode": "sys.real_prefix = sys.prefix\nsys.prefix = base",
634
+ "detail": "env.Scripts.activate_this",
635
+ "documentation": {}
636
+ },
637
+ {
638
+ "label": "sys.prefix",
639
+ "kind": 5,
640
+ "importPath": "env.Scripts.activate_this",
641
+ "description": "env.Scripts.activate_this",
642
+ "peekOfCode": "sys.prefix = base",
643
+ "detail": "env.Scripts.activate_this",
644
+ "documentation": {}
645
+ }
646
+ ]
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.11
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY . .
13
+
14
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/.vscode/PythonImportHelper-v2-Completion.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "label": "APIRouter",
4
+ "importPath": "fastapi",
5
+ "description": "fastapi",
6
+ "isExtraImport": true,
7
+ "detail": "fastapi",
8
+ "documentation": {}
9
+ },
10
+ {
11
+ "label": "APIRouter",
12
+ "importPath": "fastapi",
13
+ "description": "fastapi",
14
+ "isExtraImport": true,
15
+ "detail": "fastapi",
16
+ "documentation": {}
17
+ },
18
+ {
19
+ "label": "APIRouter",
20
+ "importPath": "fastapi",
21
+ "description": "fastapi",
22
+ "isExtraImport": true,
23
+ "detail": "fastapi",
24
+ "documentation": {}
25
+ },
26
+ {
27
+ "label": "FastAPI",
28
+ "importPath": "fastapi",
29
+ "description": "fastapi",
30
+ "isExtraImport": true,
31
+ "detail": "fastapi",
32
+ "documentation": {}
33
+ },
34
+ {
35
+ "label": "Request",
36
+ "importPath": "fastapi",
37
+ "description": "fastapi",
38
+ "isExtraImport": true,
39
+ "detail": "fastapi",
40
+ "documentation": {}
41
+ },
42
+ {
43
+ "label": "v1_routers",
44
+ "importPath": "app.routers.V1",
45
+ "description": "app.routers.V1",
46
+ "isExtraImport": true,
47
+ "detail": "app.routers.V1",
48
+ "documentation": {}
49
+ },
50
+ {
51
+ "label": "HTMLResponse",
52
+ "importPath": "fastapi.responses",
53
+ "description": "fastapi.responses",
54
+ "isExtraImport": true,
55
+ "detail": "fastapi.responses",
56
+ "documentation": {}
57
+ },
58
+ {
59
+ "label": "StaticFiles",
60
+ "importPath": "fastapi.staticfiles",
61
+ "description": "fastapi.staticfiles",
62
+ "isExtraImport": true,
63
+ "detail": "fastapi.staticfiles",
64
+ "documentation": {}
65
+ },
66
+ {
67
+ "label": "Jinja2Templates",
68
+ "importPath": "fastapi.templating",
69
+ "description": "fastapi.templating",
70
+ "isExtraImport": true,
71
+ "detail": "fastapi.templating",
72
+ "documentation": {}
73
+ },
74
+ {
75
+ "label": "CORSMiddleware",
76
+ "importPath": "fastapi.middleware.cors",
77
+ "description": "fastapi.middleware.cors",
78
+ "isExtraImport": true,
79
+ "detail": "fastapi.middleware.cors",
80
+ "documentation": {}
81
+ },
82
+ {
83
+ "label": "routes",
84
+ "importPath": "app.routers",
85
+ "description": "app.routers",
86
+ "isExtraImport": true,
87
+ "detail": "app.routers",
88
+ "documentation": {}
89
+ },
90
+ {
91
+ "label": "router",
92
+ "kind": 5,
93
+ "importPath": "routers.V1.voice.voice_router",
94
+ "description": "routers.V1.voice.voice_router",
95
+ "peekOfCode": "router = APIRouter(prefix=\"/voice\", tags=[\"Voice\"])\n@router.post(\"/transcribe\")\nasync def transcribe_audio():\n return",
96
+ "detail": "routers.V1.voice.voice_router",
97
+ "documentation": {}
98
+ },
99
+ {
100
+ "label": "router",
101
+ "kind": 5,
102
+ "importPath": "routers.V1.v1_routers",
103
+ "description": "routers.V1.v1_routers",
104
+ "peekOfCode": "router = APIRouter()\n\"\"\" include auth routes \"\"\"\nrouter.include_router()",
105
+ "detail": "routers.V1.v1_routers",
106
+ "documentation": {}
107
+ },
108
+ {
109
+ "label": "router",
110
+ "kind": 5,
111
+ "importPath": "routers.routes",
112
+ "description": "routers.routes",
113
+ "peekOfCode": "router = APIRouter()\n\"\"\" include the v1 routes here \"\"\"\nrouter.include_router(v1_routers.router)",
114
+ "detail": "routers.routes",
115
+ "documentation": {}
116
+ },
117
+ {
118
+ "label": "app",
119
+ "kind": 5,
120
+ "importPath": "main",
121
+ "description": "main",
122
+ "peekOfCode": "app = FastAPI(\n title=\"Mother Tongue Voice Matcher\",\n version=\"0.0.5\",\n servers=[{\n \"url\": \"http://127.0.0.1:8000/api/v1\", \"description\": \"Local Server\"\n }],\n root_path=\"/api/v1\",\n root_path_in_servers=False,\n)\n# cors policy",
123
+ "detail": "main",
124
+ "documentation": {}
125
+ },
126
+ {
127
+ "label": "origins",
128
+ "kind": 5,
129
+ "importPath": "main",
130
+ "description": "main",
131
+ "peekOfCode": "origins = [\n \"http://localhost\",\n \"http://localhost:8080\",\n \"http://localhost:3000\",\n \"http://localhost:5173\",\n \"http://127.0.0.1\",\n \"http://127.0.0.1:8080\",\n \"http://127.0.0.1:3000\",\n \"http://127.0.0.1:5173\",\n]",
132
+ "detail": "main",
133
+ "documentation": {}
134
+ },
135
+ {
136
+ "label": "templates",
137
+ "kind": 5,
138
+ "importPath": "main",
139
+ "description": "main",
140
+ "peekOfCode": "templates = Jinja2Templates(directory=\"app/templates\")\n@app.get(\"/\", response_class=HTMLResponse, include_in_schema=False)\nasync def root(request: Request):\n \"\"\"set the root to show a html welcome page\"\"\"\n return templates.TemplateResponse(request=request, name=\"index.html\")\n# include all the other api endpoints\napp.include_router(routes.router)",
141
+ "detail": "main",
142
+ "documentation": {}
143
+ }
144
+ ]
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (141 Bytes). View file
 
app/__pycache__/main.cpython-311.pyc ADDED
Binary file (2.07 kB). View file
 
app/__pycache__/matcher.cpython-311.pyc ADDED
Binary file (1.92 kB). View file
 
app/__pycache__/mfcc.cpython-311.pyc ADDED
Binary file (2.67 kB). View file
 
app/__pycache__/transcriber.cpython-311.pyc ADDED
Binary file (1.72 kB). View file
 
app/main.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ main api file """
2
+
3
+ from fastapi.responses import HTMLResponse
4
+ from fastapi.staticfiles import StaticFiles
5
+ from fastapi.templating import Jinja2Templates
6
+ from fastapi import FastAPI, Request
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from app.routers import routes
9
+
10
+ """ initialize app with openapi configurations """
11
+ app = FastAPI(
12
+ title="Mother Tongue Voice Matcher",
13
+ version="0.0.5",
14
+ servers=[{
15
+ "url": "http://127.0.0.1:8000/api/v1", "description": "Local Server"
16
+ }],
17
+ root_path="/api/v1",
18
+ root_path_in_servers=False,
19
+ )
20
+
21
+
22
+ # cors policy
23
+ origins = [
24
+ "http://localhost",
25
+ "http://localhost:8080",
26
+ "http://localhost:3000",
27
+ "http://localhost:5173",
28
+ "http://127.0.0.1",
29
+ "http://127.0.0.1:8080",
30
+ "http://127.0.0.1:3000",
31
+ "http://127.0.0.1:5173",
32
+ ]
33
+
34
+ app.add_middleware(
35
+ CORSMiddleware,
36
+ allow_origins=origins,
37
+ allow_credentials=True,
38
+ allow_methods=["*"],
39
+ allow_headers=["*"],
40
+ )
41
+
42
+ # mount the static folder
43
+ app.mount("/static", StaticFiles(directory="app/static"), name="static")
44
+
45
+ # mount the templets folder
46
+ templates = Jinja2Templates(directory="app/templates")
47
+
48
+
49
+ @app.get("/", response_class=HTMLResponse, include_in_schema=False)
50
+ async def root(request: Request):
51
+ """set the root to show a html welcome page"""
52
+ return templates.TemplateResponse(request=request, name="index.html")
53
+
54
+
55
+ # include all the other api endpoints
56
+ app.include_router(routes.router)
app/matcher.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import difflib
2
+ from fuzzywuzzy import fuzz
3
+
4
+
5
+ # Custom phonetic matching function
6
+ def phonetic_match(word1, word2):
7
+ """
8
+ Compares two words based on their phonetic similarity.
9
+ """
10
+ return fuzz.ratio(word1, word2)
11
+
12
+
13
+ # Custom sequence matching function
14
+ def sequence_match(a, b):
15
+ """
16
+ Uses sequence matching to compare two sequences of words.
17
+ """
18
+ return difflib.SequenceMatcher(None, a, b).ratio()
19
+
20
+
21
+ # Main function to compare texts with percentage match
22
+ def compare_texts(text1, text2):
23
+ """
24
+ Compares two texts using phonetic matching and sequence matching,
25
+ returning a percentage match score.
26
+ """
27
+ words1 = text1.lower().split()
28
+ words2 = text2.lower().split()
29
+
30
+ total_matches = len(words1)
31
+ mismatches = 0
32
+
33
+ for word1, word2 in zip(words1, words2):
34
+ if word1 != word2:
35
+ mismatches += 1
36
+ if phonetic_match(word1, word2) < 80:
37
+ # Use sequence matching only if phonetic is low
38
+ if sequence_match(word1, word2) < 0.8:
39
+ mismatches += 1 # Penalty for bad sequence match
40
+
41
+ accuracy = 1 - (mismatches / total_matches)
42
+ return accuracy * 100 # Convert to percentage
43
+
44
+
45
+ def match(original, transcription):
46
+ return compare_texts(original, transcription)
app/mfcc.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ from transformers import AutoFeatureExtractor, Wav2Vec2BertModel
3
+ import soundfile as sf
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import numpy as np
6
+
7
+
8
+ # Model and feature extractor (same as before)
9
+ model_id = "facebook/w2v-bert-2.0"
10
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
11
+ model = Wav2Vec2BertModel.from_pretrained(model_id)
12
+
13
+
14
+ def load_and_resample_audio(file_path, target_sample_rate=16000):
15
+ audio_input, sample_rate = sf.read(file_path)
16
+ if sample_rate != target_sample_rate:
17
+ audio_input = librosa.resample(
18
+ audio_input, orig_sr=sample_rate, target_sr=target_sample_rate
19
+ )
20
+ return audio_input, sample_rate
21
+
22
+
23
+ def calculate_mfcc(audio_data, sample_rate):
24
+ mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)
25
+ mfccs_scaled = np.mean(mfccs.T, axis=0) # Average across time dimension
26
+ return mfccs_scaled
27
+
28
+
29
+ def calculate_similarity(mfccs1, mfccs2):
30
+ similarity = cosine_similarity(
31
+ mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))
32
+ return similarity[0][0]
33
+
34
+
35
+ def mfcc_similarty_check(original: str, recorded: str):
36
+ correct_pronunciation_audio, _ = load_and_resample_audio(original)
37
+ user_pronunciation_audio, sample_rate = load_and_resample_audio(recorded)
38
+
39
+ # Extract MFCCs from audio data
40
+ correct_mfccs = calculate_mfcc(correct_pronunciation_audio, sample_rate)
41
+ user_mfccs = calculate_mfcc(user_pronunciation_audio, sample_rate)
42
+
43
+ distance = np.linalg.norm(correct_mfccs.flatten() - user_mfccs.flatten())
44
+
45
+ # Calculate cosine similarity using MFCCs
46
+ similarity_score = calculate_similarity(correct_mfccs, user_mfccs)
47
+ accuracy_percentage = similarity_score * 100
48
+
49
+ return distance, accuracy_percentage
app/routers/V1/__init__.py ADDED
File without changes
app/routers/V1/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (171 Bytes). View file
 
app/routers/V1/__pycache__/v1_routers.cpython-311.pyc ADDED
Binary file (526 Bytes). View file
 
app/routers/V1/v1_routers.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ v1 routes file
3
+ all the v1 routes like auth
4
+ profile... will be included here
5
+ """
6
+
7
+ from fastapi import APIRouter
8
+ from app.routers.V1.voice import voice_router
9
+
10
+ """ initialize the router """
11
+ router = APIRouter()
12
+
13
+ """ include auth routes """
14
+ router.include_router(voice_router.router)
app/routers/V1/voice/__init__.py ADDED
File without changes
app/routers/V1/voice/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (158 Bytes). View file
 
app/routers/V1/voice/__pycache__/voice_router.cpython-311.pyc ADDED
Binary file (3.17 kB). View file
 
app/routers/V1/voice/voice_router.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, Body, HTTPException, status
2
+ from fastapi.responses import JSONResponse
3
+ from typing import Annotated
4
+ import time
5
+ import os
6
+ from app.transcriber import get_transcription
7
+ from app.matcher import match
8
+ from app.mfcc import mfcc_similarty_check
9
+
10
+
11
+ """ initialize the router """
12
+ router = APIRouter(prefix="/voice", tags=["Voice"])
13
+
14
+
15
+ @router.post("/transcribe")
16
+ async def transcribe_audio(
17
+ file: Annotated[UploadFile, File()], matcher_text: Annotated[str, Body()]
18
+ ):
19
+ try:
20
+ # Validate file type
21
+ if not file.filename.endswith(".wav"):
22
+ raise HTTPException(
23
+ status_code=status.HTTP_400_BAD_REQUEST,
24
+ detail="Invalid file type. Please upload a wav file.",
25
+ )
26
+
27
+ # Read file bytes
28
+ file_bytes = await file.read()
29
+ filename = f"audio_{int(time.time())}.wav"
30
+
31
+ # Save the file temporarily
32
+ with open(filename, "wb") as buffer:
33
+ buffer.write(file_bytes)
34
+
35
+ try:
36
+ text = get_transcription(filename)
37
+ percent = match(matcher_text, text)
38
+ if int(percent) > 50:
39
+ Euclidean, Cosine = mfcc_similarty_check(filename, filename)
40
+ return JSONResponse(
41
+ {
42
+ "transcription": text,
43
+ "percent": percent,
44
+ "Cosine": Cosine,
45
+ "Euclidean": Euclidean,
46
+ }
47
+ )
48
+ else:
49
+ return JSONResponse(
50
+ {
51
+ "transcription": text,
52
+ "percent": percent,
53
+ }
54
+ )
55
+ finally:
56
+ # Clean up the temporary file
57
+ os.remove(filename)
58
+
59
+ except Exception as e:
60
+ print(e)
61
+ raise HTTPException(
62
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
63
+ detail="Unable to process the audio. Please try again later.",
64
+ )
app/routers/__init__.py ADDED
File without changes
app/routers/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (168 Bytes). View file
 
app/routers/__pycache__/routes.cpython-311.pyc ADDED
Binary file (512 Bytes). View file
 
app/routers/routes.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ main routes file
3
+ all the v1 and v2... routes will go here
4
+ """
5
+
6
+ from fastapi import APIRouter
7
+ from app.routers.V1 import v1_routers
8
+
9
+ """ initialize the router """
10
+ router = APIRouter()
11
+
12
+ """ include the v1 routes here """
13
+ router.include_router(v1_routers.router)
app/static/main.css ADDED
File without changes
app/templates/index.html ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Server Status</title>
7
+ <style>
8
+ @import url("https://fonts.googleapis.com/css2?family=DM+Mono:ital,wght@0,300;1,500&display=swap");
9
+ body {
10
+ background: linear-gradient(#3800e7, #8a15ff);
11
+ height: 100vh;
12
+ font-size: calc(14px + (26 - 14) * ((100vw - 300px) / (1600 - 300)));
13
+ font-family: "DM Mono", monospace;
14
+ font-weight: 300;
15
+ overflow: hidden;
16
+ color: white;
17
+ text-align: center;
18
+ }
19
+ h1 {
20
+ font-size: 3em;
21
+ margin-bottom: 0.2em;
22
+ }
23
+ h2 {
24
+ font-size: 2em;
25
+ }
26
+ .main {
27
+ height: 100vh;
28
+ display: flex;
29
+ flex-direction: column;
30
+ flex-wrap: wrap;
31
+ position: relative;
32
+ justify-content: center;
33
+ align-items: center;
34
+ }
35
+ .main:before,
36
+ .main:after {
37
+ content: "";
38
+ display: block;
39
+ position: absolute;
40
+ z-index: -3;
41
+ }
42
+ .main:before {
43
+ right: 0;
44
+ bottom: -19;
45
+ height: 30em;
46
+ width: 30em;
47
+ border-radius: 30em;
48
+ background: linear-gradient(#3800e7, #8a15ff);
49
+ align-self: flex-end;
50
+ animation: gradient-fade 8s ease-in-out 3s infinite alternate;
51
+ }
52
+ .main:after {
53
+ top: 0;
54
+ left: 30;
55
+ height: 10em;
56
+ width: 10em;
57
+ border-radius: 10em;
58
+ background: linear-gradient(#3800e7, #8a15ff);
59
+ animation: gradient-fade-alt 6s ease-in-out 3s infinite alternate;
60
+ }
61
+ .main__text-wrapper {
62
+ position: relative;
63
+ padding: 2em;
64
+ }
65
+ .main__text-wrapper:before,
66
+ .main__text-wrapper:after {
67
+ content: "";
68
+ display: block;
69
+ position: absolute;
70
+ }
71
+ .main__text-wrapper:before {
72
+ z-index: -1;
73
+ top: -3em;
74
+ right: -3em;
75
+ width: 13em;
76
+ height: 13em;
77
+ opacity: 0.7;
78
+ border-radius: 13em;
79
+ background: linear-gradient(#15e0ff, #8a15ff);
80
+ animation: rotation 7s linear infinite;
81
+ }
82
+ .main__text-wrapper:after {
83
+ z-index: -1;
84
+ bottom: -20em;
85
+ width: 20em;
86
+ height: 20em;
87
+ border-radius: 20em;
88
+ background: linear-gradient(#d000c5, #8a15ff);
89
+ animation: rotation 7s linear infinite;
90
+ }
91
+ .arrow {
92
+ z-index: 1000;
93
+ opacity: 0.5;
94
+ position: absolute;
95
+ }
96
+ .arrow--top {
97
+ top: 0;
98
+ left: -5em;
99
+ }
100
+ .arrow--bottom {
101
+ bottom: 0;
102
+ right: 3em;
103
+ }
104
+ .circle {
105
+ transform: translate(50%, -50%) rotate(0deg);
106
+ transform-origin: center;
107
+ }
108
+ .circle--ltblue {
109
+ height: 20em;
110
+ width: 20em;
111
+ border-radius: 20em;
112
+ background: linear-gradient(#15e0ff, #3800e7);
113
+ }
114
+ .backdrop {
115
+ position: absolute;
116
+ width: 100vw;
117
+ height: 100vh;
118
+ display: block;
119
+ background-color: pink;
120
+ }
121
+ .dotted-circle {
122
+ position: absolute;
123
+ top: 0;
124
+ right: 0;
125
+ opacity: 0.3;
126
+ animation: rotation 38s linear infinite;
127
+ }
128
+ .draw-in {
129
+ stroke-dasharray: 1000;
130
+ stroke-dashoffset: 10;
131
+ animation: draw 15s ease-in-out alternate infinite;
132
+ }
133
+ @keyframes draw {
134
+ from {
135
+ stroke-dashoffset: 1000;
136
+ }
137
+ to {
138
+ stroke-dashoffset: 0;
139
+ }
140
+ }
141
+ .item-to {
142
+ animation-duration: 10s;
143
+ animation-iteration-count: infinite;
144
+ transform-origin: bottom;
145
+ }
146
+ .bounce-1 {
147
+ animation-name: bounce-1;
148
+ animation-timing-function: ease;
149
+ }
150
+ .bounce-2 {
151
+ animation-name: bounce-2;
152
+ animation-timing-function: ease;
153
+ }
154
+ .bounce-3 {
155
+ animation-name: bounce-3;
156
+ animation-timing-function: ease;
157
+ }
158
+ @keyframes bounce-1 {
159
+ 0% {
160
+ transform: translateY(0);
161
+ }
162
+ 50% {
163
+ transform: translateY(50px);
164
+ }
165
+ 100% {
166
+ transform: translateY(0);
167
+ }
168
+ }
169
+ @keyframes bounce-2 {
170
+ 0% {
171
+ transform: translateY(0);
172
+ }
173
+ 50% {
174
+ transform: translateY(-30px);
175
+ }
176
+ 100% {
177
+ transform: translateY(0);
178
+ }
179
+ }
180
+ @keyframes bounce-3 {
181
+ 0% {
182
+ transform: translateY(0);
183
+ }
184
+ 50% {
185
+ transform: translateY(30px);
186
+ }
187
+ 100% {
188
+ transform: translateY(0);
189
+ }
190
+ }
191
+ @keyframes rotation {
192
+ from {
193
+ transform: rotate(0deg);
194
+ }
195
+ to {
196
+ transform: rotate(360deg);
197
+ }
198
+ }
199
+ @keyframes gradient-fade {
200
+ from {
201
+ transform: translate(10%, -10%) rotate(0deg);
202
+ }
203
+ to {
204
+ transform: translate(50%, -50%) rotate(360deg);
205
+ }
206
+ }
207
+ @keyframes gradient-fade-alt {
208
+ from {
209
+ transform: translate(-20%, 20%) rotate(0deg);
210
+ }
211
+ to {
212
+ transform: translate(-60%, 60%) rotate(360deg);
213
+ }
214
+ }
215
+ </style>
216
+ </head>
217
+ <body>
218
+ <div class="arrow arrow--top">
219
+ <svg
220
+ xmlns="http://www.w3.org/2000/svg"
221
+ width="270.11"
222
+ height="649.9"
223
+ overflow="visible"
224
+ >
225
+ <style>
226
+ .geo-arrow {
227
+ fill: none;
228
+ stroke: #fff;
229
+ stroke-width: 2;
230
+ stroke-miterlimit: 10;
231
+ }
232
+ </style>
233
+ <g class="item-to bounce-1">
234
+ <path
235
+ class="geo-arrow draw-in"
236
+ d="M135.06 142.564L267.995 275.5 135.06 408.434 2.125 275.499z"
237
+ />
238
+ </g>
239
+ <circle
240
+ class="geo-arrow item-to bounce-2"
241
+ cx="194.65"
242
+ cy="69.54"
243
+ r="7.96"
244
+ />
245
+ <circle class="geo-arrow draw-in" cx="194.65" cy="39.5" r="7.96" />
246
+ <circle
247
+ class="geo-arrow item-to bounce-3"
248
+ cx="194.65"
249
+ cy="9.46"
250
+ r="7.96"
251
+ />
252
+ <g class="geo-arrow item-to bounce-2">
253
+ <path
254
+ class="st0 draw-in"
255
+ d="M181.21 619.5l13.27 27 13.27-27zM194.48 644.5v-552"
256
+ />
257
+ </g>
258
+ </svg>
259
+ </div>
260
+ <div class="arrow arrow--bottom">
261
+ <svg
262
+ xmlns="http://www.w3.org/2000/svg"
263
+ width="31.35"
264
+ height="649.9"
265
+ overflow="visible"
266
+ >
267
+ <style>
268
+ .geo-arrow {
269
+ fill: none;
270
+ stroke: #fff;
271
+ stroke-width: 2;
272
+ stroke-miterlimit: 10;
273
+ }
274
+ </style>
275
+ <g class="item-to bounce-1">
276
+ <circle
277
+ class="geo-arrow item-to bounce-3"
278
+ cx="15.5"
279
+ cy="580.36"
280
+ r="7.96"
281
+ />
282
+ <circle class="geo-arrow draw-in" cx="15.5" cy="610.4" r="7.96" />
283
+ <circle
284
+ class="geo-arrow item-to bounce-2"
285
+ cx="15.5"
286
+ cy="640.44"
287
+ r="7.96"
288
+ />
289
+ <g class="item-to bounce-2">
290
+ <path
291
+ class="geo-arrow draw-in"
292
+ d="M28.94 30.4l-13.26-27-13.27 27zM15.68 5.4v552"
293
+ />
294
+ </g>
295
+ </g>
296
+ </svg>
297
+ </div>
298
+ <div class="main">
299
+ <div class="main__text-wrapper">
300
+ <h1 class="main__title">Backend Server</h1>
301
+ <h2>Up and Running</h2>
302
+ <svg
303
+ xmlns="http://www.w3.org/2000/svg"
304
+ class="dotted-circle"
305
+ width="352"
306
+ height="352"
307
+ overflow="visible"
308
+ >
309
+ <circle
310
+ cx="176"
311
+ cy="176"
312
+ r="174"
313
+ fill="none"
314
+ stroke="#fff"
315
+ stroke-width="2"
316
+ stroke-miterlimit="10"
317
+ stroke-dasharray="12.921,11.9271"
318
+ />
319
+ </svg>
320
+ </div>
321
+ </div>
322
+ </body>
323
+ </html>
app/transcriber.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ from datasets import load_dataset
4
+
5
+
6
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
8
+
9
+ model_id = "openai/whisper-large-v3"
10
+
11
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
12
+ model_id,
13
+ torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
14
+ )
15
+
16
+ model.to(device)
17
+
18
+ processor = AutoProcessor.from_pretrained(model_id)
19
+
20
+ pipe = pipeline(
21
+ "automatic-speech-recognition",
22
+ model=model,
23
+ tokenizer=processor.tokenizer,
24
+ feature_extractor=processor.feature_extractor,
25
+ max_new_tokens=128,
26
+ chunk_length_s=30,
27
+ batch_size=16,
28
+ return_timestamps=True,
29
+ torch_dtype=torch_dtype,
30
+ device=device,
31
+ )
32
+
33
+ dataset = load_dataset(
34
+ "distil-whisper/librispeech_long", "clean", split="validation")
35
+
36
+ sample = dataset[0]["audio"]
37
+
38
+
39
+ def get_transcription(file: str):
40
+ result = pipe(file, generate_kwargs={"language": "shona"})
41
+ return result["text"]
requirements.txt ADDED
Binary file (3.56 kB). View file