DrishtiSharma commited on
Commit
043c07c
1 Parent(s): 2f04e25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -13
app.py CHANGED
@@ -11,18 +11,31 @@ def load_and_fix_data(input_file, model_sampling_rate):
11
  speech = librosa.resample(speech, sample_rate, model_sampling_rate)
12
  return speech
13
 
14
-
15
- feature_extractor = AutoFeatureExtractor.from_pretrained("jonatasgrosman/wav2vec2-xls-r-1b-spanish")
 
16
  sampling_rate = feature_extractor.sampling_rate
17
-
18
- asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-xls-r-1b-spanish")
19
-
20
-
21
- def predict_and_ctc_lm_decode(input_file):
22
- speech = load_and_fix_data(input_file, sampling_rate)
23
- transcribed_text = asr(speech, chunk_length_s=10, stride_length_s=1)["text"]
24
- pipe2 = pipeline("text-classification", model = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
25
- sexism_detection = pipe2(transcribed_text)[0]['label']
 
 
 
 
 
 
 
 
 
 
 
 
26
  if sexism_detection == "LABEL_0":
27
  return "The input audio contains NON-SEXIST language"
28
  else:
@@ -45,7 +58,7 @@ Pre-trained Model used for Sexism Detection : [hackathon-pln-es/twitter_sexismo-
45
 
46
 
47
  gr.Interface(
48
- predict_and_ctc_lm_decode,
49
  inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")],
50
  #outputs=[gr.outputs.Label(num_top_classes=2),gr.outputs.Label(num_top_classes=2), gr.outputs.Label(num_top_classes=2)],
51
  outputs=[gr.outputs.Textbox(label="Predicción")],
@@ -54,4 +67,4 @@ gr.Interface(
54
  description=description,
55
  layout="horizontal",
56
  theme="huggingface",
57
- ).launch(enable_queue=True, cache_examples=True)
 
11
  speech = librosa.resample(speech, sample_rate, model_sampling_rate)
12
  return speech
13
 
14
+ #Loading the feature extractor and instantiating the pipeline by launching pipeline()
15
+ model_name1 = "jonatasgrosman/wav2vec2-xls-r-1b-spanish"
16
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_name1)
17
  sampling_rate = feature_extractor.sampling_rate
18
+ asr = pipeline("automatic-speech-recognition", model=model_name1)
19
+
20
+ #Instantiating a pipeline for classifying the text
21
+ model_name2 = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
22
+ classifier = pipeline("text-classification", model = model_name2)
23
+
24
+ #Defining a function for speech-to_text conversion
25
+ def speech_to_text(input_file):
26
+ speech = load_fix_data(input_file, sampling_rate)
27
+ transcribed_text = asr(speech, chunk_length_s=15, stride_length_s=1)["text"]
28
+ return transcribed_text
29
+
30
+ #Defining a function for sexism detection
31
+ def sexism_detection(transcribed_text):
32
+ sexism_detection = classifier(transcribed_text)[0]["label"]
33
+ return sexism_detection
34
+
35
+ #Defining a function which will output Spanish audio transcription and the detected sentiment
36
+ def asr_and_sexism_detection(input_file):
37
+ transcribed_text = speech_to_text(input_text)
38
+ sexism_detection = sexism_detection(transcribed_text)
39
  if sexism_detection == "LABEL_0":
40
  return "The input audio contains NON-SEXIST language"
41
  else:
 
58
 
59
 
60
  gr.Interface(
61
+ asr_and_sexism_detection,
62
  inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")],
63
  #outputs=[gr.outputs.Label(num_top_classes=2),gr.outputs.Label(num_top_classes=2), gr.outputs.Label(num_top_classes=2)],
64
  outputs=[gr.outputs.Textbox(label="Predicción")],
 
67
  description=description,
68
  layout="horizontal",
69
  theme="huggingface",
70
+ ).launch(enable_queue=True)