Spaces:

Kartikeyssj2
/

pronunciation-scoring

Build error

App Files Files Community

Kartikeyssj2 commited on Sep 13

Commit

10dd4bf

•

1 Parent(s): 2f6faa5

Update main.py

Browse files

Files changed (1) hide show

main.py +155 -138

main.py CHANGED Viewed

@@ -1,173 +1,190 @@
-import re
-import requests
-import pyarrow as pa
-import librosa
-import torch
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
-from fastapi import FastAPI, File, UploadFile
-import warnings
-from starlette.formparsers import MultiPartParser
-import io
-import random
-import tempfile
-import os
-import numba
-import soundfile as sf
-import asyncio
-MultiPartParser.max_file_size = 200 * 1024 * 1024
-# Initialize FastAPI app
-app = FastAPI()
-# Load Wav2Vec2 tokenizer and model
-tokenizer = Wav2Vec2Tokenizer.from_pretrained("./models/tokenizer")
-model = Wav2Vec2ForCTC.from_pretrained("./models/model")
-# Function to download English word list
-def download_word_list():
-    print("Downloading English word list...")
-    url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
-    response = requests.get(url)
-    words = set(response.text.split())
-    print("Word list downloaded.")
-    return words
-english_words = download_word_list()
-# Function to count correctly spelled words in text
-def count_spelled_words(text, word_list):
-    print("Counting spelled words...")
-    # Split the text into words
-    words = re.findall(r'\b\w+\b', text.lower())
-    correct = sum(1 for word in words if word in word_list)
-    incorrect = len(words) - correct
-    print("Spelling check complete.")
-    return incorrect, correct
-# Function to apply spell check to an item (assuming it's a dictionary)
-def apply_spell_check(item, word_list):
-    print("Applying spell check...")
-    if isinstance(item, dict):
-        # This is a single item
-        text = item['transcription']
-        incorrect, correct = count_spelled_words(text, word_list)
-        item['incorrect_words'] = incorrect
-        item['correct_words'] = correct
-        print("Spell check applied to single item.")
-        return item
-    else:
-        # This is likely a batch
-        texts = item['transcription']
-        results = [count_spelled_words(text, word_list) for text in texts]
-        incorrect_counts, correct_counts = zip(*results)
-        item = item.append_column('incorrect_words', pa.array(incorrect_counts))
-        item = item.append_column('correct_words', pa.array(correct_counts))
-        print("Spell check applied to batch of items.")
-        return item
-# FastAPI routes
-@app.get('/')
-async def root():
-    return "Welcome to the pronunciation scoring API!"
-@app.post('/check_post')
-async def rnc(number):
-    return {
-        "your value:" , number
-    }
-@app.get('/check_get')
-async def get_rnc():
-    return random.randint(0 , 10)
-@app.post('/fluency_score')
-async def fluency_scoring(file: UploadFile = File(...)):
-    audio_array, sample_rate = librosa.load(file.file, sr=16000)
-    print(audio_array)
-    return audio_array[:5]
-@app.post('/pronunciation_score')
-async def pronunciation_scoring(file: UploadFile = File(...)):
-    print("loading the file")
-    url = "https://speech-processing-6.onrender.com/process_audio"
-    files = {'file': await file.read()}
-    print("file loaded")
-    # print(files)
-    print("making a POST request on speech processor")
-    # Make the POST request
-    response = requests.post(url, files=files)
-    audio = response.json().get('audio_array')
-    print("audio:" , audio[:5])
-    print("length of the audio array:" , len(audio))
-    print("*" * 100)
-    # Tokenization
-    print("Tokenizing audio...")
-    input_values = tokenizer(
-        audio,
-        return_tensors="pt",
-        padding="max_length",
-        max_length= 386380,
-        truncation=True
-    ).input_values
-    print(input_values.shape)
-    print("Tokenization complete. Shape of input_values:", input_values.shape)
-    return "tokenization successful"
-    # Perform inference
-    print("Performing inference with Wav2Vec2 model...")
-    logits = model(input_values).logits
-    print("Inference complete. Shape of logits:", logits.shape)
-    # Get predictions
-    print("Getting predictions...")
-    prediction = torch.argmax(logits, dim=-1)
-    print("Prediction shape:", prediction.shape)
-    # Decode predictions
-    print("Decoding predictions...")
-    transcription = tokenizer.batch_decode(prediction)[0]
-    # Convert transcription to lowercase
-    transcription = transcription.lower()
-    print("Decoded transcription:", transcription)
-    incorrect, correct = count_spelled_words(transcription, english_words)
-    print("Spelling check - Incorrect words:", incorrect, ", Correct words:", correct)
-    # Calculate pronunciation score
-    fraction = correct / (incorrect + correct)
-    score = round(fraction * 100, 2)
-    print("Pronunciation score for", transcription, ":", score)
-    print("Pronunciation scoring process complete.")
-    return {
-        "transcription": transcription,
-        "pronunciation_score": score
-    }

+import soundfile as sf
+import numpy as np
+@app.post('/fluency_score')
+async def fluency_scoring(file: UploadFile = File(...)):
+    with sf.SoundFile(file.file, 'r') as sound_file:
+        audio_array = sound_file.read(dtype="float32")
+        sample_rate = sound_file.samplerate
+    if sample_rate != 16000:
+        # Resample to 16000 Hz if needed
+        audio_array = librosa.resample(audio_array, sample_rate, 16000)
+    print(audio_array)
+    return audio_array[:5].tolist()
+# import re
+# import requests
+# import pyarrow as pa
+# import librosa
+# import torch
+# from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
+# from fastapi import FastAPI, File, UploadFile
+# import warnings
+# from starlette.formparsers import MultiPartParser
+# import io
+# import random
+# import tempfile
+# import os
+# import numba
+# import soundfile as sf
+# import asyncio
+# MultiPartParser.max_file_size = 200 * 1024 * 1024
+# # Initialize FastAPI app
+# app = FastAPI()
+# # Load Wav2Vec2 tokenizer and model
+# tokenizer = Wav2Vec2Tokenizer.from_pretrained("./models/tokenizer")
+# model = Wav2Vec2ForCTC.from_pretrained("./models/model")
+# # Function to download English word list
+# def download_word_list():
+#     print("Downloading English word list...")
+#     url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
+#     response = requests.get(url)
+#     words = set(response.text.split())
+#     print("Word list downloaded.")
+#     return words
+# english_words = download_word_list()
+# # Function to count correctly spelled words in text
+# def count_spelled_words(text, word_list):
+#     print("Counting spelled words...")
+#     # Split the text into words
+#     words = re.findall(r'\b\w+\b', text.lower())
+#     correct = sum(1 for word in words if word in word_list)
+#     incorrect = len(words) - correct
+#     print("Spelling check complete.")
+#     return incorrect, correct
+# # Function to apply spell check to an item (assuming it's a dictionary)
+# def apply_spell_check(item, word_list):
+#     print("Applying spell check...")
+#     if isinstance(item, dict):
+#         # This is a single item
+#         text = item['transcription']
+#         incorrect, correct = count_spelled_words(text, word_list)
+#         item['incorrect_words'] = incorrect
+#         item['correct_words'] = correct
+#         print("Spell check applied to single item.")
+#         return item
+#     else:
+#         # This is likely a batch
+#         texts = item['transcription']
+#         results = [count_spelled_words(text, word_list) for text in texts]
+#         incorrect_counts, correct_counts = zip(*results)
+#         item = item.append_column('incorrect_words', pa.array(incorrect_counts))
+#         item = item.append_column('correct_words', pa.array(correct_counts))
+#         print("Spell check applied to batch of items.")
+#         return item
+# # FastAPI routes
+# @app.get('/')
+# async def root():
+#     return "Welcome to the pronunciation scoring API!"
+# @app.post('/check_post')
+# async def rnc(number):
+#     return {
+#         "your value:" , number
+#     }
+# @app.get('/check_get')
+# async def get_rnc():
+#     return random.randint(0 , 10)
+# @app.post('/fluency_score')
+# async def fluency_scoring(file: UploadFile = File(...)):
+#     audio_array, sample_rate = librosa.load(file.file, sr=16000)
+#     print(audio_array)
+#     return audio_array[:5]
+# @app.post('/pronunciation_score')
+# async def pronunciation_scoring(file: UploadFile = File(...)):
+#     print("loading the file")
+#     url = "https://speech-processing-6.onrender.com/process_audio"
+#     files = {'file': await file.read()}
+#     print("file loaded")
+#     # print(files)
+#     print("making a POST request on speech processor")
+#     # Make the POST request
+#     response = requests.post(url, files=files)
+#     audio = response.json().get('audio_array')
+#     print("audio:" , audio[:5])
+#     print("length of the audio array:" , len(audio))
+#     print("*" * 100)
+#     # Tokenization
+#     print("Tokenizing audio...")
+#     input_values = tokenizer(
+#         audio,
+#         return_tensors="pt",
+#         padding="max_length",
+#         max_length= 386380,
+#         truncation=True
+#     ).input_values
+#     print(input_values.shape)
+#     print("Tokenization complete. Shape of input_values:", input_values.shape)
+#     return "tokenization successful"
+#     # Perform inference
+#     print("Performing inference with Wav2Vec2 model...")
+#     logits = model(input_values).logits
+#     print("Inference complete. Shape of logits:", logits.shape)
+#     # Get predictions
+#     print("Getting predictions...")
+#     prediction = torch.argmax(logits, dim=-1)
+#     print("Prediction shape:", prediction.shape)
+#     # Decode predictions
+#     print("Decoding predictions...")
+#     transcription = tokenizer.batch_decode(prediction)[0]
+#     # Convert transcription to lowercase
+#     transcription = transcription.lower()
+#     print("Decoded transcription:", transcription)
+#     incorrect, correct = count_spelled_words(transcription, english_words)
+#     print("Spelling check - Incorrect words:", incorrect, ", Correct words:", correct)
+#     # Calculate pronunciation score
+#     fraction = correct / (incorrect + correct)
+#     score = round(fraction * 100, 2)
+#     print("Pronunciation score for", transcription, ":", score)
+#     print("Pronunciation scoring process complete.")
+#     return {
+#         "transcription": transcription,
+#         "pronunciation_score": score
+#     }