Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,77 @@ import gradio as gr
|
|
8 |
import google.auth
|
9 |
from google.auth.transport.requests import Request
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True):
|
13 |
if not dictionary_path:
|
@@ -103,6 +174,9 @@ def clean_hindi_transliterated_text(text):
|
|
103 |
return result_text
|
104 |
|
105 |
|
|
|
|
|
|
|
106 |
def dubpro_english_transliteration(text, call_gpt):
|
107 |
if call_gpt:
|
108 |
headers = {
|
@@ -127,39 +201,40 @@ def dubpro_english_transliteration(text, call_gpt):
|
|
127 |
time.sleep(0.5)
|
128 |
return clean(resp.json())
|
129 |
else:
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
# response_content =
|
162 |
-
|
|
|
163 |
|
164 |
|
165 |
def generate_rephrases_gemini(text, language, problem):
|
|
|
8 |
import google.auth
|
9 |
from google.auth.transport.requests import Request
|
10 |
|
11 |
+
import google.generativeai as genai
|
12 |
+
|
13 |
+
genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
|
14 |
+
|
15 |
+
def upload_to_gemini(path, mime_type=None):
|
16 |
+
file = genai.upload_file(path, mime_type=mime_type)
|
17 |
+
print(f"Uploaded file '{file.display_name}' as: {file.uri}")
|
18 |
+
return file
|
19 |
+
|
20 |
+
generation_config = {
|
21 |
+
"temperature": 1,
|
22 |
+
"top_p": 0.95,
|
23 |
+
"top_k": 64,
|
24 |
+
"max_output_tokens": 1_048_576,
|
25 |
+
"response_mime_type": "text/plain",
|
26 |
+
}
|
27 |
+
|
28 |
+
safety_settings = [
|
29 |
+
{
|
30 |
+
"category": "HARM_CATEGORY_HARASSMENT",
|
31 |
+
"threshold": "BLOCK_NONE",
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"category": "HARM_CATEGORY_HATE_SPEECH",
|
35 |
+
"threshold": "BLOCK_NONE",
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
39 |
+
"threshold": "BLOCK_NONE",
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
43 |
+
"threshold": "BLOCK_NONE",
|
44 |
+
},
|
45 |
+
]
|
46 |
+
|
47 |
+
model = genai.GenerativeModel(
|
48 |
+
model_name="gemini-1.5-pro-latest",
|
49 |
+
safety_settings=safety_settings,
|
50 |
+
generation_config=generation_config,
|
51 |
+
system_instruction="Act as a language model trained on a specific style of writing that incorporates both Roman and Devanagari script",
|
52 |
+
)
|
53 |
+
|
54 |
+
transliteration_example_file = upload_to_gemini(
|
55 |
+
"ai_exp_json.txt", mime_type="text/plain"
|
56 |
+
)
|
57 |
+
|
58 |
+
chat_session = model.start_chat(
|
59 |
+
history=[
|
60 |
+
{
|
61 |
+
"role": "user",
|
62 |
+
"parts": [
|
63 |
+
"Given a sentence in Roman written English and a set of pre-defined patterns, transliterate only specific words to Devanagari script while maintaining a desired ratio between Roman and Devanagari words. Your task is to transliterate only a subset of words while maintaining the overall meaning and sentence structure.\n",
|
64 |
+
'Based on a provided English sentence and a desired transliteration ratio, use your knowledge of this unique style to select words for transliteration that enhance the overall message and aesthetic. I will provide you with training examples to understand the preferred approach.\nGo through the examples in the file in following JSON format: [{"English": xxx, "Transliteration"}]." and Develop a system that can intelligently choose which English words to transliterate into Devanagari in a sentence, aiming for a specific ratio between the two scripts. With the help of examples in Json format file, design a system that can learn the optimal ratio and transliteration pattern.',
|
65 |
+
transliteration_example_file,
|
66 |
+
],
|
67 |
+
},
|
68 |
+
]
|
69 |
+
)
|
70 |
+
|
71 |
+
|
72 |
+
def generate_transliteration_gemini_15_pro(text):
|
73 |
+
texts = [text]
|
74 |
+
chat_session.send_message(
|
75 |
+
'Given an English sentences: \n```' + "\n".join(texts) + '\n```\nTransliterate English sentences into a mix of Roman and Devanagari script, following a predefined pattern or learning from provided examples above without explain anything.\nReturn output in JSON in following format for the list of sentences: {"text": xxx, "transliterate": xxx}'
|
76 |
+
)
|
77 |
+
clean_text = lambda res: res.replace("```json", "").replace("```", "").replace("\n", "")
|
78 |
+
data = json.loads(clean_text(response.text))
|
79 |
+
return clean_hindi_transliterated_text(data["transliterate"])
|
80 |
+
|
81 |
+
|
82 |
|
83 |
def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True):
|
84 |
if not dictionary_path:
|
|
|
174 |
return result_text
|
175 |
|
176 |
|
177 |
+
|
178 |
+
|
179 |
+
|
180 |
def dubpro_english_transliteration(text, call_gpt):
|
181 |
if call_gpt:
|
182 |
headers = {
|
|
|
201 |
time.sleep(0.5)
|
202 |
return clean(resp.json())
|
203 |
else:
|
204 |
+
return generate_transliteration_gemini_15_pro(text)
|
205 |
+
# API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API")
|
206 |
+
# BEARER_TOKEN = get_google_token()
|
207 |
+
# headers = {
|
208 |
+
# "Authorization": f"Bearer {BEARER_TOKEN}",
|
209 |
+
# "Content-Type": "application/json",
|
210 |
+
# }
|
211 |
+
# payload = {
|
212 |
+
# "contents": [
|
213 |
+
# {
|
214 |
+
# "parts": [{"text": f"input: {text}"}],
|
215 |
+
# "role": "user",
|
216 |
+
# }
|
217 |
+
# ],
|
218 |
+
# "generationConfig": {
|
219 |
+
# "maxOutputTokens": 8192,
|
220 |
+
# "temperature": 0.85,
|
221 |
+
# },
|
222 |
+
# "safetySettings": [
|
223 |
+
# {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
|
224 |
+
# {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
|
225 |
+
# {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
|
226 |
+
# {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
|
227 |
+
# ],
|
228 |
+
# }
|
229 |
+
# result = requests.post(
|
230 |
+
# url=API_URL,
|
231 |
+
# headers=headers,
|
232 |
+
# json=payload
|
233 |
+
# )
|
234 |
+
# response = result.json()
|
235 |
+
# response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"')
|
236 |
+
# # response_content = transliterate_first_word(response_content)
|
237 |
+
# return response_content
|
238 |
|
239 |
|
240 |
def generate_rephrases_gemini(text, language, problem):
|