seba3y commited on
Commit
36b6d5c
1 Parent(s): 718233c

Delete logic.py

Browse files
Files changed (1) hide show
  1. logic.py +0 -73
logic.py DELETED
@@ -1,73 +0,0 @@
1
- from phonemizer.separator import Separator
2
- from phonemizer import phonemize
3
- # from phonemizer.backend.espeak.wrapper import EspeakWrapper
4
- from Levenshtein import distance as levenshtein_distance
5
- from scoring import calculate_fluency_and_pronunciation
6
-
7
- import whisper
8
- import torch
9
-
10
- device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
11
-
12
- model = whisper.load_model("base.en", device=device)
13
- separator = Separator(phone=None, word='',)
14
-
15
- # EspeakWrapper.set_library(r"C:\Program Files\eSpeak NG\libespeak-ng.dll")
16
-
17
- def transcribe(audio):
18
- result = model.transcribe(audio, word_timestamps=False, no_speech_threshold=0.4, compression_ratio_threshold=2, temperature=0)
19
- return {'language': result['language'], 'text': result['text']}
20
-
21
- def text2phoneme(text):
22
- return phonemize(text.lower().split(), backend='espeak' , separator=separator, strip=True, with_stress=False, tie=False, language='en-us')
23
-
24
- def rate_pronunciation(expected_phonemes, actual_phonemes):
25
- expected_phonemes = expected_phonemes
26
- actual_phonemes = actual_phonemes
27
- # Calculate the Levenshtein distance between the two phoneme sequences
28
- results = []
29
- for i, base_word in enumerate(actual_phonemes):
30
- best_dist = float('inf')
31
- if i <= len(expected_phonemes):
32
- for j in range(max(0, i-1), i + min(3, len(expected_phonemes) - i)):
33
- dist = levenshtein_distance(expected_phonemes[j], base_word,)
34
- if dist < best_dist:
35
- best_dist = dist
36
- if best_dist == 0: # Early stopping on perfect match
37
- break
38
- error_threshold = len(base_word) * 0.40
39
- if best_dist == 0:
40
- results.append(3)
41
- elif best_dist <= error_threshold:
42
- results.append(2)
43
- else:
44
- results.append(1)
45
- return results
46
-
47
-
48
-
49
-
50
- def Speaker_speech_analysis(audio_path, text):
51
- pre_transcribtion = transcribe(audio_path)['text']
52
- print(pre_transcribtion)
53
- transcribtion = text2phoneme(pre_transcribtion)
54
- text_phone = text2phoneme(text)
55
- scores = rate_pronunciation(transcribtion, text_phone)
56
- FP_scores = calculate_fluency_and_pronunciation(audio_path, len(pre_transcribtion.split()), scores, len(text.split()))
57
- word_scores = [(word, s) for word, s in zip(text.split(), scores)]
58
-
59
- FP_scores['word_scores'] = word_scores
60
- return FP_scores
61
-
62
- if __name__ == '__main__':
63
-
64
- text = 'i have ADHD '
65
- text = text2phoneme(text)
66
- file_path = r'user_recording.wav'
67
- trans = transcribe(file_path)['text']
68
- print(trans)
69
- trans = text2phoneme(trans)
70
- print('base:', text)
71
- print('predicted:', trans)
72
- result = rate_pronunciation(trans, text)
73
- print(result)