deepsync commited on
Commit
d5eb5d2
1 Parent(s): 0976736

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -33
app.py CHANGED
@@ -8,6 +8,77 @@ import gradio as gr
8
  import google.auth
9
  from google.auth.transport.requests import Request
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True):
13
  if not dictionary_path:
@@ -103,6 +174,9 @@ def clean_hindi_transliterated_text(text):
103
  return result_text
104
 
105
 
 
 
 
106
  def dubpro_english_transliteration(text, call_gpt):
107
  if call_gpt:
108
  headers = {
@@ -127,39 +201,40 @@ def dubpro_english_transliteration(text, call_gpt):
127
  time.sleep(0.5)
128
  return clean(resp.json())
129
  else:
130
- API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API")
131
- BEARER_TOKEN = get_google_token()
132
- headers = {
133
- "Authorization": f"Bearer {BEARER_TOKEN}",
134
- "Content-Type": "application/json",
135
- }
136
- payload = {
137
- "contents": [
138
- {
139
- "parts": [{"text": f"input: {text}"}],
140
- "role": "user",
141
- }
142
- ],
143
- "generationConfig": {
144
- "maxOutputTokens": 8192,
145
- "temperature": 0.85,
146
- },
147
- "safetySettings": [
148
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
149
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
150
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
151
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
152
- ],
153
- }
154
- result = requests.post(
155
- url=API_URL,
156
- headers=headers,
157
- json=payload
158
- )
159
- response = result.json()
160
- response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"')
161
- # response_content = transliterate_first_word(response_content)
162
- return response_content
 
163
 
164
 
165
  def generate_rephrases_gemini(text, language, problem):
 
8
  import google.auth
9
  from google.auth.transport.requests import Request
10
 
11
+ import google.generativeai as genai
12
+
13
+ genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
14
+
15
+ def upload_to_gemini(path, mime_type=None):
16
+ file = genai.upload_file(path, mime_type=mime_type)
17
+ print(f"Uploaded file '{file.display_name}' as: {file.uri}")
18
+ return file
19
+
20
+ generation_config = {
21
+ "temperature": 1,
22
+ "top_p": 0.95,
23
+ "top_k": 64,
24
+ "max_output_tokens": 1_048_576,
25
+ "response_mime_type": "text/plain",
26
+ }
27
+
28
+ safety_settings = [
29
+ {
30
+ "category": "HARM_CATEGORY_HARASSMENT",
31
+ "threshold": "BLOCK_NONE",
32
+ },
33
+ {
34
+ "category": "HARM_CATEGORY_HATE_SPEECH",
35
+ "threshold": "BLOCK_NONE",
36
+ },
37
+ {
38
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
39
+ "threshold": "BLOCK_NONE",
40
+ },
41
+ {
42
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
43
+ "threshold": "BLOCK_NONE",
44
+ },
45
+ ]
46
+
47
+ model = genai.GenerativeModel(
48
+ model_name="gemini-1.5-pro-latest",
49
+ safety_settings=safety_settings,
50
+ generation_config=generation_config,
51
+ system_instruction="Act as a language model trained on a specific style of writing that incorporates both Roman and Devanagari script",
52
+ )
53
+
54
+ transliteration_example_file = upload_to_gemini(
55
+ "ai_exp_json.txt", mime_type="text/plain"
56
+ )
57
+
58
+ chat_session = model.start_chat(
59
+ history=[
60
+ {
61
+ "role": "user",
62
+ "parts": [
63
+ "Given a sentence in Roman written English and a set of pre-defined patterns, transliterate only specific words to Devanagari script while maintaining a desired ratio between Roman and Devanagari words. Your task is to transliterate only a subset of words while maintaining the overall meaning and sentence structure.\n",
64
+ 'Based on a provided English sentence and a desired transliteration ratio, use your knowledge of this unique style to select words for transliteration that enhance the overall message and aesthetic. I will provide you with training examples to understand the preferred approach.\nGo through the examples in the file in following JSON format: [{"English": xxx, "Transliteration"}]." and Develop a system that can intelligently choose which English words to transliterate into Devanagari in a sentence, aiming for a specific ratio between the two scripts. With the help of examples in Json format file, design a system that can learn the optimal ratio and transliteration pattern.',
65
+ transliteration_example_file,
66
+ ],
67
+ },
68
+ ]
69
+ )
70
+
71
+
72
+ def generate_transliteration_gemini_15_pro(text):
73
+ texts = [text]
74
+ chat_session.send_message(
75
+ 'Given an English sentences: \n```' + "\n".join(texts) + '\n```\nTransliterate English sentences into a mix of Roman and Devanagari script, following a predefined pattern or learning from provided examples above without explain anything.\nReturn output in JSON in following format for the list of sentences: {"text": xxx, "transliterate": xxx}'
76
+ )
77
+ clean_text = lambda res: res.replace("```json", "").replace("```", "").replace("\n", "")
78
+ data = json.loads(clean_text(response.text))
79
+ return clean_hindi_transliterated_text(data["transliterate"])
80
+
81
+
82
 
83
  def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True):
84
  if not dictionary_path:
 
174
  return result_text
175
 
176
 
177
+
178
+
179
+
180
  def dubpro_english_transliteration(text, call_gpt):
181
  if call_gpt:
182
  headers = {
 
201
  time.sleep(0.5)
202
  return clean(resp.json())
203
  else:
204
+ return generate_transliteration_gemini_15_pro(text)
205
+ # API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API")
206
+ # BEARER_TOKEN = get_google_token()
207
+ # headers = {
208
+ # "Authorization": f"Bearer {BEARER_TOKEN}",
209
+ # "Content-Type": "application/json",
210
+ # }
211
+ # payload = {
212
+ # "contents": [
213
+ # {
214
+ # "parts": [{"text": f"input: {text}"}],
215
+ # "role": "user",
216
+ # }
217
+ # ],
218
+ # "generationConfig": {
219
+ # "maxOutputTokens": 8192,
220
+ # "temperature": 0.85,
221
+ # },
222
+ # "safetySettings": [
223
+ # {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
224
+ # {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
225
+ # {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
226
+ # {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
227
+ # ],
228
+ # }
229
+ # result = requests.post(
230
+ # url=API_URL,
231
+ # headers=headers,
232
+ # json=payload
233
+ # )
234
+ # response = result.json()
235
+ # response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"')
236
+ # # response_content = transliterate_first_word(response_content)
237
+ # return response_content
238
 
239
 
240
  def generate_rephrases_gemini(text, language, problem):