Spaces:

Kartikeyssj2
/

pronunciation-scoring

Build error

File size: 4,910 Bytes

427bb16
 
 
 
 
 
 
 
 
 
7dde9f0
4006ad9
 
c3b89d9
 
c34b637
c3b89d9
427bb16
 
 
 
 
 
7365efc
 
 
427bb16
 
 
 
 
 
 
 
 
 
c14d52f
427bb16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
882da5c
 
 
 
 
 
02e008b
882da5c
 
 
f2601c1
2f6faa5
 
 
 
 
 
 
f2601c1
2f6faa5
2e81661
bf1ce78
f89dd94
bf1ce78
2e81661
152c4f7
1eff474
d7b0c66
 
91d4d7a
bf1ce78
990903e
91d4d7a
ee147ec
1b19461
b32a33b
cbb980c
 
d466784
813004d
0cf878c
d466784
5efbe11
 
427bb16
5f74423
 
 
 
9cc016d
5f74423
 
 
 
 
5efbe11
 
9cc016d
 
427bb16
 
3cc7981
 
 
5efbe11
 
427bb16
 
 
5efbe11
 
427bb16
 
 
 
 
 
 
 
5efbe11
427bb16
 
 
 
 
7365efc
427bb16

import re
import requests
import pyarrow as pa
import librosa
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
from fastapi import FastAPI, File, UploadFile
import warnings
from starlette.formparsers import MultiPartParser
import io
import random
import tempfile
import os
import numba
import soundfile as sf 
import asyncio

MultiPartParser.max_file_size = 200 * 1024 * 1024

# Initialize FastAPI app
app = FastAPI()

# Load Wav2Vec2 tokenizer and model
tokenizer = Wav2Vec2Tokenizer.from_pretrained("./models/tokenizer")
model = Wav2Vec2ForCTC.from_pretrained("./models/model")


# Function to download English word list
def download_word_list():
    print("Downloading English word list...")
    url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
    response = requests.get(url)
    words = set(response.text.split())
    print("Word list downloaded.")
    return words

english_words = download_word_list()

# Function to count correctly spelled words in text
def count_spelled_words(text, word_list):
    print("Counting spelled words...")
    # Split the text into words
    words = re.findall(r'\b\w+\b', text.lower())
    
    correct = sum(1 for word in words if word in word_list)
    incorrect = len(words) - correct
    
    print("Spelling check complete.")
    return incorrect, correct

# Function to apply spell check to an item (assuming it's a dictionary)
def apply_spell_check(item, word_list):
    print("Applying spell check...")
    if isinstance(item, dict):
        # This is a single item
        text = item['transcription']
        incorrect, correct = count_spelled_words(text, word_list)
        item['incorrect_words'] = incorrect
        item['correct_words'] = correct
        print("Spell check applied to single item.")
        return item
    else:
        # This is likely a batch
        texts = item['transcription']
        results = [count_spelled_words(text, word_list) for text in texts]
        
        incorrect_counts, correct_counts = zip(*results)
        
        item = item.append_column('incorrect_words', pa.array(incorrect_counts))
        item = item.append_column('correct_words', pa.array(correct_counts))
        
        print("Spell check applied to batch of items.")
        return item

# FastAPI routes
@app.get('/')
async def root():
    return "Welcome to the pronunciation scoring API!"

@app.post('/check_post')
async def rnc(number):
    return {
        "your value:" , number
    }

@app.get('/check_get')
async def get_rnc():
    return random.randint(0 , 10)


@app.post('/fluency_score')
async def fluency_scoring(file: UploadFile = File(...)):
    audio_array, sample_rate = librosa.load(file.file, sr=16000)
    print(audio_array)
    return audio_array[:5]
    

@app.post('/pronunciation_score')
async def pronunciation_scoring(file: UploadFile = File(...)):    
    print("loading the file")
    url = "https://speech-processing-6.onrender.com/process_audio"
    files = {'file': await file.read()}

    print("file loaded")
        
    # print(files)

    print("making a POST request on speech processor")
    
    # Make the POST request
    response = requests.post(url, files=files)

    audio = response.json().get('audio_array')

    print("audio:" , audio[:5])
    
    

    print("length of the audio array:" , len(audio))

    print("*" * 100)

    # Tokenization
    print("Tokenizing audio...")
    input_values = tokenizer(
        audio, 
        return_tensors="pt", 
        padding="max_length", 
        max_length= 386380, 
        truncation=True
    ).input_values

    print(input_values.shape)
    
    print("Tokenization complete. Shape of input_values:", input_values.shape)

    return "tokenization successful" 
    
    # Perform inference
    print("Performing inference with Wav2Vec2 model...")
    
    logits = model(input_values).logits
    
    print("Inference complete. Shape of logits:", logits.shape)

    # Get predictions
    print("Getting predictions...")
    prediction = torch.argmax(logits, dim=-1)
    print("Prediction shape:", prediction.shape)

    # Decode predictions
    print("Decoding predictions...")
    transcription = tokenizer.batch_decode(prediction)[0]
    
    # Convert transcription to lowercase
    transcription = transcription.lower()
    
    print("Decoded transcription:", transcription)
    
    incorrect, correct = count_spelled_words(transcription, english_words)
    print("Spelling check - Incorrect words:", incorrect, ", Correct words:", correct)
    
    # Calculate pronunciation score
    fraction = correct / (incorrect + correct)
    score = round(fraction * 100, 2)
    print("Pronunciation score for", transcription, ":", score)
    
    print("Pronunciation scoring process complete.")
    
    return {
        "transcription": transcription,
        "pronunciation_score": score
    }