Core23 commited on
Commit
ff52370
1 Parent(s): 2f4b5ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -185
app.py CHANGED
@@ -2,29 +2,9 @@ import os
2
  import openai
3
  import gradio as gr
4
  from transformers import pipeline
5
- from IPython.display import Audio
6
- import os
7
- import re
8
- import glob
9
- import json
10
- import tempfile
11
- import math
12
- import torch
13
- from torch import nn
14
- from torch.nn import functional as F
15
- from torch.utils.data import DataLoader
16
- import numpy as np
17
- import commons
18
- import utils
19
- import argparse
20
- import subprocess
21
- from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
22
- from models import SynthesizerTrn
23
- from scipy.io.wavfile import write
24
 
25
  openai.api_key = os.getenv("OPENAI_API_KEY")
26
-
27
- pipe = pipeline(model="seeafricatz/kiaziboraasr")
28
 
29
  def transcribe(audio):
30
  text = pipe(audio)["text"]
@@ -36,7 +16,7 @@ def generate_response(transcribed_text):
36
  messages=[
37
  {
38
  "role": "system",
39
- "content": "All your answers should be in Swahili only, users understand Swahili only so here we start... Wewe ni mtaalamu wa haki za ardhi za wanawake nchini Kongo na utajibu maswali yote kwa Kiswahili tu!"
40
  },
41
  {
42
  "role": "user",
@@ -44,23 +24,86 @@ def generate_response(transcribed_text):
44
  },
45
  {
46
  "role": "assistant",
47
- "content": "Salama, je una swali lolote kuhusu haki za ardhi za wanawake nchini Kongo?"
48
  },
49
  {
50
  "role": "user",
51
- "content": "nini maana ya haki za ardhi za wanawake?"
52
  },
53
  {
54
  "role": "assistant",
55
- "content": "Haki za ardhi za wanawake zinamaanisha haki za wanawake kumiliki, kutumia, na kudhibiti ardhi. Katika muktadha wa Kongo, haki hizi zinaweza kuathiriwa na mila, sheria, na mizozo ya ardhi."
 
 
 
 
 
 
 
 
56
  },
57
  {
58
  "role": "user",
59
- "content": "nini matumizi ya haki za ardhi za wanawake?"
60
  },
61
  {
62
  "role": "assistant",
63
- "content": "Haki za ardhi za wanawake zina umuhimu mkubwa kwa kuwawezesha wanawake kiuchumi, kuimarisha usalama wa chakula, na kuchangia katika maendeleo ya jamii na taifa kwa ujumla. Kwa mfano, wanawake wenye haki za ardhi wanaweza kupata mikopo, kuendeleza ardhi, na kutoa mchango muhimu katika uchumi wa familia na jamii."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  },
65
  {
66
  "role": "user",
@@ -70,175 +113,34 @@ def generate_response(transcribed_text):
70
  )
71
  return response['choices'][0]['message']['content']
72
 
73
- # ... rest of your code ...
74
-
75
-
76
-
77
- import os
78
- import subprocess
79
- import locale
80
- locale.getpreferredencoding = lambda: "UTF-8"
81
-
82
- def download(lang, tgt_dir="./"):
83
- lang_fn, lang_dir = os.path.join(tgt_dir, lang+'.tar.gz'), os.path.join(tgt_dir, lang)
84
- cmd = ";".join([
85
- f"wget https://dl.fbaipublicfiles.com/mms/tts/{lang}.tar.gz -O {lang_fn}",
86
- f"tar zxvf {lang_fn}"
87
- ])
88
- print(f"Download model for language: {lang}")
89
- subprocess.check_output(cmd, shell=True)
90
- print(f"Model checkpoints in {lang_dir}: {os.listdir(lang_dir)}")
91
- return lang_dir
92
-
93
- LANG = "swh"
94
- ckpt_dir = download(LANG)
95
-
96
-
97
- def preprocess_char(text, lang=None):
98
- """
99
- Special treatement of characters in certain languages
100
- """
101
- print(lang)
102
- if lang == 'ron':
103
- text = text.replace("ț", "ţ")
104
- return text
105
-
106
- class TextMapper(object):
107
- def __init__(self, vocab_file):
108
- self.symbols = [x.replace("\n", "") for x in open(vocab_file, encoding="utf-8").readlines()]
109
- self.SPACE_ID = self.symbols.index(" ")
110
- self._symbol_to_id = {s: i for i, s in enumerate(self.symbols)}
111
- self._id_to_symbol = {i: s for i, s in enumerate(self.symbols)}
112
-
113
- def text_to_sequence(self, text, cleaner_names):
114
- '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
115
- Args:
116
- text: string to convert to a sequence
117
- cleaner_names: names of the cleaner functions to run the text through
118
- Returns:
119
- List of integers corresponding to the symbols in the text
120
- '''
121
- sequence = []
122
- clean_text = text.strip()
123
- for symbol in clean_text:
124
- symbol_id = self._symbol_to_id[symbol]
125
- sequence += [symbol_id]
126
- return sequence
127
-
128
- def uromanize(self, text, uroman_pl):
129
- iso = "xxx"
130
- with tempfile.NamedTemporaryFile() as tf, \
131
- tempfile.NamedTemporaryFile() as tf2:
132
- with open(tf.name, "w") as f:
133
- f.write("\n".join([text]))
134
- cmd = f"perl " + uroman_pl
135
- cmd += f" -l {iso} "
136
- cmd += f" < {tf.name} > {tf2.name}"
137
- os.system(cmd)
138
- outtexts = []
139
- with open(tf2.name) as f:
140
- for line in f:
141
- line = re.sub(r"\s+", " ", line).strip()
142
- outtexts.append(line)
143
- outtext = outtexts[0]
144
- return outtext
145
-
146
- def get_text(self, text, hps):
147
- text_norm = self.text_to_sequence(text, hps.data.text_cleaners)
148
- if hps.data.add_blank:
149
- text_norm = commons.intersperse(text_norm, 0)
150
- text_norm = torch.LongTensor(text_norm)
151
- return text_norm
152
-
153
- def filter_oov(self, text):
154
- val_chars = self._symbol_to_id
155
- txt_filt = "".join(list(filter(lambda x: x in val_chars, text)))
156
- print(f"text after filtering OOV: {txt_filt}")
157
- return txt_filt
158
-
159
- def preprocess_text(txt, text_mapper, hps, uroman_dir=None, lang=None):
160
- txt = preprocess_char(txt, lang=lang)
161
- is_uroman = hps.data.training_files.split('.')[-1] == 'uroman'
162
- if is_uroman:
163
- with tempfile.TemporaryDirectory() as tmp_dir:
164
- if uroman_dir is None:
165
- cmd = f"git clone git@github.com:isi-nlp/uroman.git {tmp_dir}"
166
- print(cmd)
167
- subprocess.check_output(cmd, shell=True)
168
- uroman_dir = tmp_dir
169
- uroman_pl = os.path.join(uroman_dir, "bin", "uroman.pl")
170
- print(f"uromanize")
171
- txt = text_mapper.uromanize(txt, uroman_pl)
172
- print(f"uroman text: {txt}")
173
- txt = txt.lower()
174
- txt = text_mapper.filter_oov(txt)
175
- return txt
176
-
177
- if torch.cuda.is_available():
178
- device = torch.device("cuda")
179
- else:
180
- device = torch.device("cpu")
181
-
182
- print(f"Run inference with {device}")
183
- vocab_file = f"{ckpt_dir}/vocab.txt"
184
- config_file = f"{ckpt_dir}/config.json"
185
- assert os.path.isfile(config_file), f"{config_file} doesn't exist"
186
- hps = utils.get_hparams_from_file(config_file)
187
- text_mapper = TextMapper(vocab_file)
188
- net_g = SynthesizerTrn(
189
- len(text_mapper.symbols),
190
- hps.data.filter_length // 2 + 1,
191
- hps.train.segment_size // hps.data.hop_length,
192
- **hps.model)
193
- net_g.to(device)
194
- _ = net_g.eval()
195
-
196
- g_pth = f"{ckpt_dir}/G_100000.pth"
197
- print(f"load {g_pth}")
198
-
199
- _ = utils.load_checkpoint(g_pth, net_g, None)
200
-
201
-
202
- import torch
203
- from scipy.io.wavfile import write
204
 
205
  def inference(text):
206
- # Preprocessing the text
207
- text = preprocess_text(text, text_mapper, hps, lang=LANG)
208
- stn_tst = text_mapper.get_text(text, hps)
209
-
210
- with torch.no_grad():
211
- x_tst = stn_tst.unsqueeze(0).to(device)
212
- x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(device)
213
- hyp = net_g.infer(
214
- x_tst, x_tst_lengths, noise_scale=.667,
215
- noise_scale_w=0.8, length_scale=1.0
216
- )[0][0,0].cpu().float().numpy()
217
-
218
- # Saving the generated audio to a file
219
- output_file = "tts_output.wav"
220
- write(output_file, hps.data.sampling_rate, hyp)
221
-
222
- return output_file
223
-
224
-
225
 
226
  def process_audio_and_respond(audio):
227
- transcribed_text = transcribe(audio)
228
- response_text = generate_response(transcribed_text)
229
- output_file = inference(response_text)
230
- return response_text, output_file
231
 
232
  demo = gr.Interface(
233
  process_audio_and_respond,
234
  gr.inputs.Audio(source="microphone", type="filepath", label="Bonyeza kitufe cha kurekodi na uliza swali lako"),
235
- [gr.outputs.Textbox(label="Jibu (kwa njia ya maandishi)"), gr.outputs.Audio(type="filepath", label="Jibu kwa njia ya sauti (Bofya kusikiliza Jibu)")],
236
- title="Haki",
237
- description="Uliza Swali kuhusu haki za ardhi",
238
  theme="compact",
239
  layout="vertical",
240
  allow_flagging=False,
241
  live=True,
242
  )
243
 
244
- demo.launch()
 
2
  import openai
3
  import gradio as gr
4
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  openai.api_key = os.getenv("OPENAI_API_KEY")
7
+ pipe = pipeline(model="Ussen/whisper-medium-swc-drc-kat-1")
 
8
 
9
  def transcribe(audio):
10
  text = pipe(audio)["text"]
 
16
  messages=[
17
  {
18
  "role": "system",
19
+ "content": "All your answers should be in Congolese Swahili only, understand Swahili and France so here we start. Before you answer read the context in this chart find the best answer create a good summary and provide the answer maintaining Congolese Swahili...Mimi ni Haki des femmes AI bot iliyoundwa na Core23Lab. Mimi ni msaidizi wa sauti ya haki ya kumiliki ardhi kwa wanawake nchini DRC. Nikusaidie vipi!"
20
  },
21
  {
22
  "role": "user",
 
24
  },
25
  {
26
  "role": "assistant",
27
+ "content": "Mimi ni Haki des femmes AI bot iliyoundwa na Core23Lab. Mimi ni msaidizi wa sauti ya haki ya kumiliki ardhi kwa wanawake nchini DRC. Nikusaidie vipi"
28
  },
29
  {
30
  "role": "user",
31
+ "content": "haki za ardhi za wanawake Congo?"
32
  },
33
  {
34
  "role": "assistant",
35
+ "content": """Sheria au loi ile inangaria bulongo mu jamuri ya kidemokrasia ya kongo ni sheria namba 73-021 ya le 20/07/1973, ile iribadirishwa na
36
+ ku ongezwa na sheria namba 80-008 ya le 18/07/1980, ile ina semezea pa régime foncier et immobilier na régime des suretés.
37
+ Iyi sheria ayangarie paka regime juridique ya kutumikia bulongo, lakini inanaria pia « regime general des biens »,
38
+ kwanzia ku article 1 nakufikia ku article 52 na « regime des suretes » kwanzia ku article 245 nakufikia ku article 396.
39
+ Lakini tangu 2012, na kuzariwa kwa sheria ya Ohada mu jamuri ya kidemokrasia ya kongo, kipande kia sheria ya bulongo kile
40
+ kinangaria regime des sûretés kinakombozwa na acte unisaforme ya Ohada ile inasemea regime des sûretés.
41
+ 193 articles bule ya mu loi foncière inasemea regime foncier et immobilier (kwanzia ku article 53 nafikia ku article 244) ;
42
+ njo ile ma articles ile inawekwa mu disposition ya bale banatumikiaka bulongo (population), bakubwa ba inchi bale banagariaka
43
+ mambo ya bulongo (autorités foncières et cadastrales) na ba batshunga sheria (ba avocats na ba magistrats). """
44
  },
45
  {
46
  "role": "user",
47
+ "content": "Sheria za Ardhi nchini Congo"
48
  },
49
  {
50
  "role": "assistant",
51
+ "content": """Sheria aiandike kukatazwa ata kumoya kufwatana na sexe ao bingine nju ya haki yaku tumikia domaine foncier. Lakini, banamuke kiloko njo beko napata ma mpango nju ya kukosa kujua ma procedures,
52
+ certificat ya enregistrement, kukosa kwa makuta, incapacité juridique ya mwanamuke mwenye kuolewa, obligation ya autorisation ya bwana yake.
53
+ Sheria nᴼ11/022 ya 24/12/2011 ile ina semea ma principes ya musingi ile inangaria agriculture, ina semea accès équitable ya bulongo ile banamuke bakumigini banekalaka ba victimes sana.
54
+ Ku migini (milieux ruraux), kupata kwa ma mpango kuna fwatana ma kizazi na chefu wa clan. Lakini, kutumikiya kwa loi nᴼ73 ya le 20/07/1973 vile ili badishwa ina semea regime general,
55
+ regime foncier, regime ya suretes, ina semea pa swali ya ma bulongo ya ku rimiapo na kuvukiapo yaku migini.
56
+ Mwanamuke mwenye kuolewa eko na haki ya ku piana ma npango ile ilikiya ya bwana yake kama bwana anakufa.
57
+ Ata kama bantu abadjue mana, kuolewa kuna tshionganisha kupata kwama mpango njo effet ya kuya mwenye kuolewa kuna saidia kupata mpango nakuyi tumikia. Badjane beko na avantage kupita bale bayaolewa.
58
+ Kwa ukweli, nju ya kukosa kufunda banamuke bakumigini abadjue ma procedure yakupata ma pango na aba djue mufano yaku revendiquer na ku tshunga ma haki yabo.
59
+ Regime ya ndoa ni kunji ya ma sheria ile iko na weka ma haki na mapashwa ya ba bibi na bwana tangu mwanzo ya ndoa yabo.
60
+ Ma regime tatu ya ndoa mu jamuri ya kidemokrasia ya Kongo ni :
61
+ 1. Regime ya ku kabulamana bintu : mu ile regime, akuna kintu kile kinawekwa pamoja. Kila mwana ndoa eko mwenye kintu kiake kile aripata kabla ndoa,
62
+ na wakati banakuya mu ndoa.
63
+ Faida :
64
+ • Iyi regime iko na kabula muzuri bintu bia wanandoa nju yakusaidia kuriti bintu mu djamaa kutoka kubazazi.
65
+
66
+ • Iyi inashauriwa wakati bintu bia wanandoa ao ma jamaa iko ya mana.
67
+
68
+ • Ba créanciers ya ba bibi ao bwana abawezi kubeba bintu bia mwingine shisipo ye moya kwitshikia kuripa ndeni ya mwingine ;
69
+ njo maana ba entrepreneurs ao bama kaji ya risques ba tshakulaka ile regime.
70
+ Bubaya :
71
+ • Kutshakula regime kama iyi aina garantie ya ulinzi kumbele ya ba creanciers (bale beko na dayi ndeni) nju banezi ku nyanganya bintu bia ba conjoints kama ndeni
72
+ iribewa nju yakutengeneza nyumba ao kuripa masomo ya batoto (ni pamoya nama impôts ao ma charges sociales yasipokulipwa).
73
+
74
+ • Ma akounti (comptes) ile barifungula nakuweka pamoya makutamu mu ndoa inezi kuleta shida wakati wakwatshana na ita lomba kuonesha kule kuritoya ile makuta.
75
+
76
+ • Wakati wa kifo, ule anabakiya anapashwatu ku tegemea paka pabintu biake nju ya ku garantir maisha yake ya kumbele nju kama akuna testament, ata patatu paka ¼ ya biote bile barikuya bia ule anakufwa.
77
+ 2. Regime ya kutshangia bile wanandoa bana pata mu bukweri (communauté des biens réduits aux acquêts) :
78
+ Mu iyi regime, banandoa beko naweka pamoya bintu bile bari pata mu ndoa (na ma ndeni ile bibi ao bwana aribeba). Ni kanuni ya kusaidiana mu ndoa).
79
+ Faida:
80
+ • Wakati wa kifo, ule ana bakiya ata beba kipande ya bintu pashipo kuripia.
81
+
82
+ • Wakati yakwatshana, biko teke teke ku kabula bintu nju bintu biote bile biriuziwa mu ndoa ni bia kila muntu (kipande kipande).
83
+ Bubaya:
84
+ • Kama Bibi ao bwana eko na ndeni, mupinda ndeni anezi ku beba bintu bia mwenye ndeni ao bia mwingine.
85
+
86
+ • Kama mumoja ana uzisha kintu kiake na anauza bintu bingine, bile bintu ni paka biake ye moya. Lakini, inafai ule mwenye kuzisha oneshe kwa ukweli kule kunatokea makuta;
87
+ lakini bamingi abafwanyake vile. Vile, familia ya mwanandoa moya inezi kuogopa kumupa kintu nju kile kintu kishingie mu ile regime ya communautee.
88
+
89
+ • Mu wakati ya kwatshana, ma account iko teke teke kufwanya lakini iyi oneshaketu kwa ukweli makuta ya kila muntu.
90
+
91
+ • Kutshangia kwa bintu pamoya kunezi ku leta mambo nju ma decisions ya maana (kuzisha bintu, etc…) kunapashwa kufanishikwa paka na masikilizano ya ba banandoa bote bawiri.
92
+ 3. Regime ya communaute universelle: mu iyi regime bintu bile ba bibi na bwana baripata kabla ya ndoa na katika ndoa binawekwa pamoya pasipo kwangalia siku ile bariipata na mufano bariipata.
93
+ Faida:
94
+ • Ile regime iko na saidia kupitia conjoint (bibi ao bwana) bintu pasipo kuripa makuta ya succession.
95
+
96
+ • Mu wakati ya kifo, batoto abataripa droit ya succession, paka ku lufu ya muzazi wa piri.
97
+ Bubaya:
98
+ • Batoto batapata abattement successoraux paka mala moya,
99
+
100
+ • Ma ndeni anikwa ya bote bawiri ata kama ni moya ari kopa kabla ya ao mu ndoa,
101
+
102
+ • Ma bintu ile banandoa bari pata mu ndoa inakwa yabo bote bawiri ila kama kuko musikilizano ingine mu contrat yabo ya ndoa.
103
+
104
+ • Wakati ya kwatshana, bintu bita kaburiwa kipande kipande pashipo kwangaria kule biritokea ao namuna bariipata,
105
+
106
+ • Risque ya bukonvi kama kuna kuya batoto ba kitanda kiakwanza."""
107
  },
108
  {
109
  "role": "user",
 
113
  )
114
  return response['choices'][0]['message']['content']
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  def inference(text):
118
+ response = openai.Audio.create(
119
+ model="tts-1-hd",
120
+ voice="alloy",
121
+ text=text
122
+ )
123
+ # OpenAI API returns the temp audio URL which contains the audio file
124
+ # You can download it via requests or use the URL directly as needed
125
+ audio_url = response['url']
126
+ return audio_url
 
 
 
 
 
 
 
 
 
 
127
 
128
  def process_audio_and_respond(audio):
129
+ text = transcribe(audio)
130
+ response_text = generate_response(text)
131
+ audio_url = inference(response_text)
132
+ return response_text, audio_url
133
 
134
  demo = gr.Interface(
135
  process_audio_and_respond,
136
  gr.inputs.Audio(source="microphone", type="filepath", label="Bonyeza kitufe cha kurekodi na uliza swali lako"),
137
+ [gr.outputs.Textbox(label="Jibu (kwa njia ya maandishi)"),
138
+ gr.outputs.Audio(label="Jibu kwa njia ya sauti (Bofya kusikiliza Jibu)")],
139
+ description="Uliza Swali kuhusu haki and sheria za udongo",
140
  theme="compact",
141
  layout="vertical",
142
  allow_flagging=False,
143
  live=True,
144
  )
145
 
146
+ demo.launch()