Update app.py
Browse files
app.py
CHANGED
@@ -2,29 +2,9 @@ import os
|
|
2 |
import openai
|
3 |
import gradio as gr
|
4 |
from transformers import pipeline
|
5 |
-
from IPython.display import Audio
|
6 |
-
import os
|
7 |
-
import re
|
8 |
-
import glob
|
9 |
-
import json
|
10 |
-
import tempfile
|
11 |
-
import math
|
12 |
-
import torch
|
13 |
-
from torch import nn
|
14 |
-
from torch.nn import functional as F
|
15 |
-
from torch.utils.data import DataLoader
|
16 |
-
import numpy as np
|
17 |
-
import commons
|
18 |
-
import utils
|
19 |
-
import argparse
|
20 |
-
import subprocess
|
21 |
-
from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
|
22 |
-
from models import SynthesizerTrn
|
23 |
-
from scipy.io.wavfile import write
|
24 |
|
25 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
26 |
-
|
27 |
-
pipe = pipeline(model="seeafricatz/kiaziboraasr")
|
28 |
|
29 |
def transcribe(audio):
|
30 |
text = pipe(audio)["text"]
|
@@ -36,7 +16,7 @@ def generate_response(transcribed_text):
|
|
36 |
messages=[
|
37 |
{
|
38 |
"role": "system",
|
39 |
-
"content": "All your answers should be in Swahili only,
|
40 |
},
|
41 |
{
|
42 |
"role": "user",
|
@@ -44,23 +24,86 @@ def generate_response(transcribed_text):
|
|
44 |
},
|
45 |
{
|
46 |
"role": "assistant",
|
47 |
-
"content": "
|
48 |
},
|
49 |
{
|
50 |
"role": "user",
|
51 |
-
"content": "
|
52 |
},
|
53 |
{
|
54 |
"role": "assistant",
|
55 |
-
"content": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
},
|
57 |
{
|
58 |
"role": "user",
|
59 |
-
"content": "
|
60 |
},
|
61 |
{
|
62 |
"role": "assistant",
|
63 |
-
"content": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
},
|
65 |
{
|
66 |
"role": "user",
|
@@ -70,175 +113,34 @@ def generate_response(transcribed_text):
|
|
70 |
)
|
71 |
return response['choices'][0]['message']['content']
|
72 |
|
73 |
-
# ... rest of your code ...
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
import os
|
78 |
-
import subprocess
|
79 |
-
import locale
|
80 |
-
locale.getpreferredencoding = lambda: "UTF-8"
|
81 |
-
|
82 |
-
def download(lang, tgt_dir="./"):
|
83 |
-
lang_fn, lang_dir = os.path.join(tgt_dir, lang+'.tar.gz'), os.path.join(tgt_dir, lang)
|
84 |
-
cmd = ";".join([
|
85 |
-
f"wget https://dl.fbaipublicfiles.com/mms/tts/{lang}.tar.gz -O {lang_fn}",
|
86 |
-
f"tar zxvf {lang_fn}"
|
87 |
-
])
|
88 |
-
print(f"Download model for language: {lang}")
|
89 |
-
subprocess.check_output(cmd, shell=True)
|
90 |
-
print(f"Model checkpoints in {lang_dir}: {os.listdir(lang_dir)}")
|
91 |
-
return lang_dir
|
92 |
-
|
93 |
-
LANG = "swh"
|
94 |
-
ckpt_dir = download(LANG)
|
95 |
-
|
96 |
-
|
97 |
-
def preprocess_char(text, lang=None):
|
98 |
-
"""
|
99 |
-
Special treatement of characters in certain languages
|
100 |
-
"""
|
101 |
-
print(lang)
|
102 |
-
if lang == 'ron':
|
103 |
-
text = text.replace("ț", "ţ")
|
104 |
-
return text
|
105 |
-
|
106 |
-
class TextMapper(object):
|
107 |
-
def __init__(self, vocab_file):
|
108 |
-
self.symbols = [x.replace("\n", "") for x in open(vocab_file, encoding="utf-8").readlines()]
|
109 |
-
self.SPACE_ID = self.symbols.index(" ")
|
110 |
-
self._symbol_to_id = {s: i for i, s in enumerate(self.symbols)}
|
111 |
-
self._id_to_symbol = {i: s for i, s in enumerate(self.symbols)}
|
112 |
-
|
113 |
-
def text_to_sequence(self, text, cleaner_names):
|
114 |
-
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
115 |
-
Args:
|
116 |
-
text: string to convert to a sequence
|
117 |
-
cleaner_names: names of the cleaner functions to run the text through
|
118 |
-
Returns:
|
119 |
-
List of integers corresponding to the symbols in the text
|
120 |
-
'''
|
121 |
-
sequence = []
|
122 |
-
clean_text = text.strip()
|
123 |
-
for symbol in clean_text:
|
124 |
-
symbol_id = self._symbol_to_id[symbol]
|
125 |
-
sequence += [symbol_id]
|
126 |
-
return sequence
|
127 |
-
|
128 |
-
def uromanize(self, text, uroman_pl):
|
129 |
-
iso = "xxx"
|
130 |
-
with tempfile.NamedTemporaryFile() as tf, \
|
131 |
-
tempfile.NamedTemporaryFile() as tf2:
|
132 |
-
with open(tf.name, "w") as f:
|
133 |
-
f.write("\n".join([text]))
|
134 |
-
cmd = f"perl " + uroman_pl
|
135 |
-
cmd += f" -l {iso} "
|
136 |
-
cmd += f" < {tf.name} > {tf2.name}"
|
137 |
-
os.system(cmd)
|
138 |
-
outtexts = []
|
139 |
-
with open(tf2.name) as f:
|
140 |
-
for line in f:
|
141 |
-
line = re.sub(r"\s+", " ", line).strip()
|
142 |
-
outtexts.append(line)
|
143 |
-
outtext = outtexts[0]
|
144 |
-
return outtext
|
145 |
-
|
146 |
-
def get_text(self, text, hps):
|
147 |
-
text_norm = self.text_to_sequence(text, hps.data.text_cleaners)
|
148 |
-
if hps.data.add_blank:
|
149 |
-
text_norm = commons.intersperse(text_norm, 0)
|
150 |
-
text_norm = torch.LongTensor(text_norm)
|
151 |
-
return text_norm
|
152 |
-
|
153 |
-
def filter_oov(self, text):
|
154 |
-
val_chars = self._symbol_to_id
|
155 |
-
txt_filt = "".join(list(filter(lambda x: x in val_chars, text)))
|
156 |
-
print(f"text after filtering OOV: {txt_filt}")
|
157 |
-
return txt_filt
|
158 |
-
|
159 |
-
def preprocess_text(txt, text_mapper, hps, uroman_dir=None, lang=None):
|
160 |
-
txt = preprocess_char(txt, lang=lang)
|
161 |
-
is_uroman = hps.data.training_files.split('.')[-1] == 'uroman'
|
162 |
-
if is_uroman:
|
163 |
-
with tempfile.TemporaryDirectory() as tmp_dir:
|
164 |
-
if uroman_dir is None:
|
165 |
-
cmd = f"git clone git@github.com:isi-nlp/uroman.git {tmp_dir}"
|
166 |
-
print(cmd)
|
167 |
-
subprocess.check_output(cmd, shell=True)
|
168 |
-
uroman_dir = tmp_dir
|
169 |
-
uroman_pl = os.path.join(uroman_dir, "bin", "uroman.pl")
|
170 |
-
print(f"uromanize")
|
171 |
-
txt = text_mapper.uromanize(txt, uroman_pl)
|
172 |
-
print(f"uroman text: {txt}")
|
173 |
-
txt = txt.lower()
|
174 |
-
txt = text_mapper.filter_oov(txt)
|
175 |
-
return txt
|
176 |
-
|
177 |
-
if torch.cuda.is_available():
|
178 |
-
device = torch.device("cuda")
|
179 |
-
else:
|
180 |
-
device = torch.device("cpu")
|
181 |
-
|
182 |
-
print(f"Run inference with {device}")
|
183 |
-
vocab_file = f"{ckpt_dir}/vocab.txt"
|
184 |
-
config_file = f"{ckpt_dir}/config.json"
|
185 |
-
assert os.path.isfile(config_file), f"{config_file} doesn't exist"
|
186 |
-
hps = utils.get_hparams_from_file(config_file)
|
187 |
-
text_mapper = TextMapper(vocab_file)
|
188 |
-
net_g = SynthesizerTrn(
|
189 |
-
len(text_mapper.symbols),
|
190 |
-
hps.data.filter_length // 2 + 1,
|
191 |
-
hps.train.segment_size // hps.data.hop_length,
|
192 |
-
**hps.model)
|
193 |
-
net_g.to(device)
|
194 |
-
_ = net_g.eval()
|
195 |
-
|
196 |
-
g_pth = f"{ckpt_dir}/G_100000.pth"
|
197 |
-
print(f"load {g_pth}")
|
198 |
-
|
199 |
-
_ = utils.load_checkpoint(g_pth, net_g, None)
|
200 |
-
|
201 |
-
|
202 |
-
import torch
|
203 |
-
from scipy.io.wavfile import write
|
204 |
|
205 |
def inference(text):
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
noise_scale_w=0.8, length_scale=1.0
|
216 |
-
)[0][0,0].cpu().float().numpy()
|
217 |
-
|
218 |
-
# Saving the generated audio to a file
|
219 |
-
output_file = "tts_output.wav"
|
220 |
-
write(output_file, hps.data.sampling_rate, hyp)
|
221 |
-
|
222 |
-
return output_file
|
223 |
-
|
224 |
-
|
225 |
|
226 |
def process_audio_and_respond(audio):
|
227 |
-
|
228 |
-
response_text = generate_response(
|
229 |
-
|
230 |
-
return response_text,
|
231 |
|
232 |
demo = gr.Interface(
|
233 |
process_audio_and_respond,
|
234 |
gr.inputs.Audio(source="microphone", type="filepath", label="Bonyeza kitufe cha kurekodi na uliza swali lako"),
|
235 |
-
[gr.outputs.Textbox(label="Jibu (kwa njia ya maandishi)"),
|
236 |
-
|
237 |
-
description="Uliza Swali kuhusu haki za
|
238 |
theme="compact",
|
239 |
layout="vertical",
|
240 |
allow_flagging=False,
|
241 |
live=True,
|
242 |
)
|
243 |
|
244 |
-
demo.launch()
|
|
|
2 |
import openai
|
3 |
import gradio as gr
|
4 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
7 |
+
pipe = pipeline(model="Ussen/whisper-medium-swc-drc-kat-1")
|
|
|
8 |
|
9 |
def transcribe(audio):
|
10 |
text = pipe(audio)["text"]
|
|
|
16 |
messages=[
|
17 |
{
|
18 |
"role": "system",
|
19 |
+
"content": "All your answers should be in Congolese Swahili only, understand Swahili and France so here we start. Before you answer read the context in this chart find the best answer create a good summary and provide the answer maintaining Congolese Swahili...Mimi ni Haki des femmes AI bot iliyoundwa na Core23Lab. Mimi ni msaidizi wa sauti ya haki ya kumiliki ardhi kwa wanawake nchini DRC. Nikusaidie vipi!"
|
20 |
},
|
21 |
{
|
22 |
"role": "user",
|
|
|
24 |
},
|
25 |
{
|
26 |
"role": "assistant",
|
27 |
+
"content": "Mimi ni Haki des femmes AI bot iliyoundwa na Core23Lab. Mimi ni msaidizi wa sauti ya haki ya kumiliki ardhi kwa wanawake nchini DRC. Nikusaidie vipi"
|
28 |
},
|
29 |
{
|
30 |
"role": "user",
|
31 |
+
"content": "haki za ardhi za wanawake Congo?"
|
32 |
},
|
33 |
{
|
34 |
"role": "assistant",
|
35 |
+
"content": """Sheria au loi ile inangaria bulongo mu jamuri ya kidemokrasia ya kongo ni sheria namba 73-021 ya le 20/07/1973, ile iribadirishwa na
|
36 |
+
ku ongezwa na sheria namba 80-008 ya le 18/07/1980, ile ina semezea pa régime foncier et immobilier na régime des suretés.
|
37 |
+
Iyi sheria ayangarie paka regime juridique ya kutumikia bulongo, lakini inanaria pia « regime general des biens »,
|
38 |
+
kwanzia ku article 1 nakufikia ku article 52 na « regime des suretes » kwanzia ku article 245 nakufikia ku article 396.
|
39 |
+
Lakini tangu 2012, na kuzariwa kwa sheria ya Ohada mu jamuri ya kidemokrasia ya kongo, kipande kia sheria ya bulongo kile
|
40 |
+
kinangaria regime des sûretés kinakombozwa na acte unisaforme ya Ohada ile inasemea regime des sûretés.
|
41 |
+
193 articles bule ya mu loi foncière inasemea regime foncier et immobilier (kwanzia ku article 53 nafikia ku article 244) ;
|
42 |
+
njo ile ma articles ile inawekwa mu disposition ya bale banatumikiaka bulongo (population), bakubwa ba inchi bale banagariaka
|
43 |
+
mambo ya bulongo (autorités foncières et cadastrales) na ba batshunga sheria (ba avocats na ba magistrats). """
|
44 |
},
|
45 |
{
|
46 |
"role": "user",
|
47 |
+
"content": "Sheria za Ardhi nchini Congo"
|
48 |
},
|
49 |
{
|
50 |
"role": "assistant",
|
51 |
+
"content": """Sheria aiandike kukatazwa ata kumoya kufwatana na sexe ao bingine nju ya haki yaku tumikia domaine foncier. Lakini, banamuke kiloko njo beko napata ma mpango nju ya kukosa kujua ma procedures,
|
52 |
+
certificat ya enregistrement, kukosa kwa makuta, incapacité juridique ya mwanamuke mwenye kuolewa, obligation ya autorisation ya bwana yake.
|
53 |
+
Sheria nᴼ11/022 ya 24/12/2011 ile ina semea ma principes ya musingi ile inangaria agriculture, ina semea accès équitable ya bulongo ile banamuke bakumigini banekalaka ba victimes sana.
|
54 |
+
Ku migini (milieux ruraux), kupata kwa ma mpango kuna fwatana ma kizazi na chefu wa clan. Lakini, kutumikiya kwa loi nᴼ73 ya le 20/07/1973 vile ili badishwa ina semea regime general,
|
55 |
+
regime foncier, regime ya suretes, ina semea pa swali ya ma bulongo ya ku rimiapo na kuvukiapo yaku migini.
|
56 |
+
Mwanamuke mwenye kuolewa eko na haki ya ku piana ma npango ile ilikiya ya bwana yake kama bwana anakufa.
|
57 |
+
Ata kama bantu abadjue mana, kuolewa kuna tshionganisha kupata kwama mpango njo effet ya kuya mwenye kuolewa kuna saidia kupata mpango nakuyi tumikia. Badjane beko na avantage kupita bale bayaolewa.
|
58 |
+
Kwa ukweli, nju ya kukosa kufunda banamuke bakumigini abadjue ma procedure yakupata ma pango na aba djue mufano yaku revendiquer na ku tshunga ma haki yabo.
|
59 |
+
Regime ya ndoa ni kunji ya ma sheria ile iko na weka ma haki na mapashwa ya ba bibi na bwana tangu mwanzo ya ndoa yabo.
|
60 |
+
Ma regime tatu ya ndoa mu jamuri ya kidemokrasia ya Kongo ni :
|
61 |
+
1. Regime ya ku kabulamana bintu : mu ile regime, akuna kintu kile kinawekwa pamoja. Kila mwana ndoa eko mwenye kintu kiake kile aripata kabla ndoa,
|
62 |
+
na wakati banakuya mu ndoa.
|
63 |
+
Faida :
|
64 |
+
• Iyi regime iko na kabula muzuri bintu bia wanandoa nju yakusaidia kuriti bintu mu djamaa kutoka kubazazi.
|
65 |
+
|
66 |
+
• Iyi inashauriwa wakati bintu bia wanandoa ao ma jamaa iko ya mana.
|
67 |
+
|
68 |
+
• Ba créanciers ya ba bibi ao bwana abawezi kubeba bintu bia mwingine shisipo ye moya kwitshikia kuripa ndeni ya mwingine ;
|
69 |
+
njo maana ba entrepreneurs ao bama kaji ya risques ba tshakulaka ile regime.
|
70 |
+
Bubaya :
|
71 |
+
• Kutshakula regime kama iyi aina garantie ya ulinzi kumbele ya ba creanciers (bale beko na dayi ndeni) nju banezi ku nyanganya bintu bia ba conjoints kama ndeni
|
72 |
+
iribewa nju yakutengeneza nyumba ao kuripa masomo ya batoto (ni pamoya nama impôts ao ma charges sociales yasipokulipwa).
|
73 |
+
|
74 |
+
• Ma akounti (comptes) ile barifungula nakuweka pamoya makutamu mu ndoa inezi kuleta shida wakati wakwatshana na ita lomba kuonesha kule kuritoya ile makuta.
|
75 |
+
|
76 |
+
• Wakati wa kifo, ule anabakiya anapashwatu ku tegemea paka pabintu biake nju ya ku garantir maisha yake ya kumbele nju kama akuna testament, ata patatu paka ¼ ya biote bile barikuya bia ule anakufwa.
|
77 |
+
2. Regime ya kutshangia bile wanandoa bana pata mu bukweri (communauté des biens réduits aux acquêts) :
|
78 |
+
Mu iyi regime, banandoa beko naweka pamoya bintu bile bari pata mu ndoa (na ma ndeni ile bibi ao bwana aribeba). Ni kanuni ya kusaidiana mu ndoa).
|
79 |
+
Faida:
|
80 |
+
• Wakati wa kifo, ule ana bakiya ata beba kipande ya bintu pashipo kuripia.
|
81 |
+
|
82 |
+
• Wakati yakwatshana, biko teke teke ku kabula bintu nju bintu biote bile biriuziwa mu ndoa ni bia kila muntu (kipande kipande).
|
83 |
+
Bubaya:
|
84 |
+
• Kama Bibi ao bwana eko na ndeni, mupinda ndeni anezi ku beba bintu bia mwenye ndeni ao bia mwingine.
|
85 |
+
|
86 |
+
• Kama mumoja ana uzisha kintu kiake na anauza bintu bingine, bile bintu ni paka biake ye moya. Lakini, inafai ule mwenye kuzisha oneshe kwa ukweli kule kunatokea makuta;
|
87 |
+
lakini bamingi abafwanyake vile. Vile, familia ya mwanandoa moya inezi kuogopa kumupa kintu nju kile kintu kishingie mu ile regime ya communautee.
|
88 |
+
|
89 |
+
• Mu wakati ya kwatshana, ma account iko teke teke kufwanya lakini iyi oneshaketu kwa ukweli makuta ya kila muntu.
|
90 |
+
|
91 |
+
• Kutshangia kwa bintu pamoya kunezi ku leta mambo nju ma decisions ya maana (kuzisha bintu, etc…) kunapashwa kufanishikwa paka na masikilizano ya ba banandoa bote bawiri.
|
92 |
+
3. Regime ya communaute universelle: mu iyi regime bintu bile ba bibi na bwana baripata kabla ya ndoa na katika ndoa binawekwa pamoya pasipo kwangalia siku ile bariipata na mufano bariipata.
|
93 |
+
Faida:
|
94 |
+
• Ile regime iko na saidia kupitia conjoint (bibi ao bwana) bintu pasipo kuripa makuta ya succession.
|
95 |
+
|
96 |
+
• Mu wakati ya kifo, batoto abataripa droit ya succession, paka ku lufu ya muzazi wa piri.
|
97 |
+
Bubaya:
|
98 |
+
• Batoto batapata abattement successoraux paka mala moya,
|
99 |
+
|
100 |
+
• Ma ndeni anikwa ya bote bawiri ata kama ni moya ari kopa kabla ya ao mu ndoa,
|
101 |
+
|
102 |
+
• Ma bintu ile banandoa bari pata mu ndoa inakwa yabo bote bawiri ila kama kuko musikilizano ingine mu contrat yabo ya ndoa.
|
103 |
+
|
104 |
+
• Wakati ya kwatshana, bintu bita kaburiwa kipande kipande pashipo kwangaria kule biritokea ao namuna bariipata,
|
105 |
+
|
106 |
+
• Risque ya bukonvi kama kuna kuya batoto ba kitanda kiakwanza."""
|
107 |
},
|
108 |
{
|
109 |
"role": "user",
|
|
|
113 |
)
|
114 |
return response['choices'][0]['message']['content']
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
def inference(text):
|
118 |
+
response = openai.Audio.create(
|
119 |
+
model="tts-1-hd",
|
120 |
+
voice="alloy",
|
121 |
+
text=text
|
122 |
+
)
|
123 |
+
# OpenAI API returns the temp audio URL which contains the audio file
|
124 |
+
# You can download it via requests or use the URL directly as needed
|
125 |
+
audio_url = response['url']
|
126 |
+
return audio_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
def process_audio_and_respond(audio):
|
129 |
+
text = transcribe(audio)
|
130 |
+
response_text = generate_response(text)
|
131 |
+
audio_url = inference(response_text)
|
132 |
+
return response_text, audio_url
|
133 |
|
134 |
demo = gr.Interface(
|
135 |
process_audio_and_respond,
|
136 |
gr.inputs.Audio(source="microphone", type="filepath", label="Bonyeza kitufe cha kurekodi na uliza swali lako"),
|
137 |
+
[gr.outputs.Textbox(label="Jibu (kwa njia ya maandishi)"),
|
138 |
+
gr.outputs.Audio(label="Jibu kwa njia ya sauti (Bofya kusikiliza Jibu)")],
|
139 |
+
description="Uliza Swali kuhusu haki and sheria za udongo",
|
140 |
theme="compact",
|
141 |
layout="vertical",
|
142 |
allow_flagging=False,
|
143 |
live=True,
|
144 |
)
|
145 |
|
146 |
+
demo.launch()
|