Spaces:
Runtime error
Runtime error
text fix
Browse files- app/__pycache__/__init__.cpython-310.pyc +0 -0
- app/__pycache__/main.cpython-310.pyc +0 -0
- app/__pycache__/matcher.cpython-310.pyc +0 -0
- app/__pycache__/mfcc.cpython-310.pyc +0 -0
- app/__pycache__/string_processor.cpython-310.pyc +0 -0
- app/__pycache__/transcriber.cpython-310.pyc +0 -0
- app/main.py +2 -0
- app/matcher.py +0 -2
- app/routers/V1/__pycache__/__init__.cpython-310.pyc +0 -0
- app/routers/V1/__pycache__/v1_routers.cpython-310.pyc +0 -0
- app/routers/V1/voice/__pycache__/__init__.cpython-310.pyc +0 -0
- app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc +0 -0
- app/routers/V1/voice/voice_router.py +3 -2
- app/routers/__pycache__/__init__.cpython-310.pyc +0 -0
- app/routers/__pycache__/routes.cpython-310.pyc +0 -0
- app/string_processor.py +18 -14
app/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (152 Bytes). View file
|
|
app/__pycache__/main.cpython-310.pyc
ADDED
Binary file (1.61 kB). View file
|
|
app/__pycache__/matcher.cpython-310.pyc
ADDED
Binary file (807 Bytes). View file
|
|
app/__pycache__/mfcc.cpython-310.pyc
ADDED
Binary file (1.63 kB). View file
|
|
app/__pycache__/string_processor.cpython-310.pyc
ADDED
Binary file (657 Bytes). View file
|
|
app/__pycache__/transcriber.cpython-310.pyc
ADDED
Binary file (1.16 kB). View file
|
|
app/main.py
CHANGED
@@ -15,6 +15,8 @@ app = FastAPI(
|
|
15 |
{
|
16 |
"url": "http://127.0.0.1:8000/api/v1",
|
17 |
"description": "Local Server",
|
|
|
|
|
18 |
"url": "https://r3vibe-mother-tongue.hf.space/api/v1",
|
19 |
"description": "Huggingface Server",
|
20 |
}
|
|
|
15 |
{
|
16 |
"url": "http://127.0.0.1:8000/api/v1",
|
17 |
"description": "Local Server",
|
18 |
+
},
|
19 |
+
{
|
20 |
"url": "https://r3vibe-mother-tongue.hf.space/api/v1",
|
21 |
"description": "Huggingface Server",
|
22 |
}
|
app/matcher.py
CHANGED
@@ -18,8 +18,6 @@ def sequence_match(a, b):
|
|
18 |
return difflib.SequenceMatcher(None, a, b).ratio()
|
19 |
|
20 |
|
21 |
-
|
22 |
-
|
23 |
def match(original, transcription):
|
24 |
sequence = sequence_match(original, transcription)
|
25 |
phonetic = phonetic_match(original, transcription)
|
|
|
18 |
return difflib.SequenceMatcher(None, a, b).ratio()
|
19 |
|
20 |
|
|
|
|
|
21 |
def match(original, transcription):
|
22 |
sequence = sequence_match(original, transcription)
|
23 |
phonetic = phonetic_match(original, transcription)
|
app/routers/V1/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (163 Bytes). View file
|
|
app/routers/V1/__pycache__/v1_routers.cpython-310.pyc
ADDED
Binary file (411 Bytes). View file
|
|
app/routers/V1/voice/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (169 Bytes). View file
|
|
app/routers/V1/voice/__pycache__/voice_router.cpython-310.pyc
ADDED
Binary file (1.94 kB). View file
|
|
app/routers/V1/voice/voice_router.py
CHANGED
@@ -6,7 +6,7 @@ import os
|
|
6 |
from app.transcriber import get_transcription
|
7 |
from app.matcher import match
|
8 |
from app.mfcc import mfcc_similarty_check
|
9 |
-
from app.string_processor import
|
10 |
|
11 |
|
12 |
""" initialize the router """
|
@@ -51,7 +51,8 @@ async def transcribe_audio(
|
|
51 |
|
52 |
try:
|
53 |
text = get_transcription(filename_recorded)
|
54 |
-
|
|
|
55 |
Euclidean, Cosine = mfcc_similarty_check(
|
56 |
filename_original, filename_recorded
|
57 |
)
|
|
|
6 |
from app.transcriber import get_transcription
|
7 |
from app.matcher import match
|
8 |
from app.mfcc import mfcc_similarty_check
|
9 |
+
from app.string_processor import clean_transcription
|
10 |
|
11 |
|
12 |
""" initialize the router """
|
|
|
51 |
|
52 |
try:
|
53 |
text = get_transcription(filename_recorded)
|
54 |
+
text = clean_transcription(text)
|
55 |
+
sequence, phonetic = match(matcher_text, text)
|
56 |
Euclidean, Cosine = mfcc_similarty_check(
|
57 |
filename_original, filename_recorded
|
58 |
)
|
app/routers/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (160 Bytes). View file
|
|
app/routers/__pycache__/routes.cpython-310.pyc
ADDED
Binary file (378 Bytes). View file
|
|
app/string_processor.py
CHANGED
@@ -1,18 +1,22 @@
|
|
1 |
-
import
|
2 |
import re
|
3 |
|
4 |
|
5 |
-
def
|
6 |
-
#
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
# Step 2: Remove all punctuation (including full stops and commas)
|
10 |
-
text = text.translate(str.maketrans("", "", string.punctuation))
|
11 |
-
|
12 |
-
# Step 3: Extract sentences (assuming you want to keep the text as a whole sentence)
|
13 |
-
sentences = re.split(r"(?<=[.!?]) +", text)
|
14 |
-
|
15 |
-
# Combine the sentences back into a single string without punctuation
|
16 |
-
processed_text = " ".join(sentences)
|
17 |
-
|
18 |
-
return processed_text
|
|
|
1 |
+
import unicodedata
|
2 |
import re
|
3 |
|
4 |
|
5 |
+
def clean_transcription(text):
|
6 |
+
# Normalize the text to NFKD form
|
7 |
+
normalized_text = unicodedata.normalize('NFKD', text)
|
8 |
+
|
9 |
+
# Remove diacritics
|
10 |
+
cleaned_text = ''.join([c for c in normalized_text if not unicodedata.combining(c)])
|
11 |
+
|
12 |
+
# Explicitly remove the leading ʻ character and any other specific characters
|
13 |
+
cleaned_text = cleaned_text.replace('ʻ', '')
|
14 |
+
|
15 |
+
# Remove any remaining special characters (if any)
|
16 |
+
cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text)
|
17 |
+
|
18 |
+
# Ensure the text is stripped of any unwanted leading or trailing whitespace
|
19 |
+
cleaned_text = cleaned_text.strip()
|
20 |
+
|
21 |
+
return cleaned_text
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|