Spaces:

deepsync
/

english-rephraser-transliterator

Running

App Files Files Community

deepsync commited on May 20

Commit

d5eb5d2

•

1 Parent(s): 0976736

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -33

app.py CHANGED Viewed

@@ -8,6 +8,77 @@ import gradio as gr
 import google.auth
 from google.auth.transport.requests import Request
 def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True):
     if not dictionary_path:
@@ -103,6 +174,9 @@ def clean_hindi_transliterated_text(text):
     return result_text
 def dubpro_english_transliteration(text, call_gpt):
     if call_gpt:
         headers = {
@@ -127,39 +201,40 @@ def dubpro_english_transliteration(text, call_gpt):
             time.sleep(0.5)
         return clean(resp.json())
     else:
-        API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API")
-        BEARER_TOKEN = get_google_token()
-        headers = {
-            "Authorization": f"Bearer {BEARER_TOKEN}",
-            "Content-Type": "application/json",
-        }
-        payload = {
-            "contents": [
-                {
-                    "parts": [{"text": f"input: {text}"}],
-                    "role": "user",
-                }
-            ],
-            "generationConfig": {
-                "maxOutputTokens": 8192,
-                "temperature": 0.85,
-            },
-            "safetySettings": [
-                {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
-                {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
-                {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
-                {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
-            ],
-        }
-        result = requests.post(
-            url=API_URL,
-            headers=headers,
-            json=payload
-        )
-        response = result.json()
-        response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"')
-        # response_content = transliterate_first_word(response_content)
-        return response_content
 def generate_rephrases_gemini(text, language, problem):

 import google.auth
 from google.auth.transport.requests import Request
+import google.generativeai as genai
+genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
+def upload_to_gemini(path, mime_type=None):
+    file = genai.upload_file(path, mime_type=mime_type)
+    print(f"Uploaded file '{file.display_name}' as: {file.uri}")
+    return file
+generation_config = {
+    "temperature": 1,
+    "top_p": 0.95,
+    "top_k": 64,
+    "max_output_tokens": 1_048_576,
+    "response_mime_type": "text/plain",
+}
+safety_settings = [
+    {
+        "category": "HARM_CATEGORY_HARASSMENT",
+        "threshold": "BLOCK_NONE",
+    },
+    {
+        "category": "HARM_CATEGORY_HATE_SPEECH",
+        "threshold": "BLOCK_NONE",
+    },
+    {
+        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+        "threshold": "BLOCK_NONE",
+    },
+    {
+        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+        "threshold": "BLOCK_NONE",
+    },
+]
+model = genai.GenerativeModel(
+    model_name="gemini-1.5-pro-latest",
+    safety_settings=safety_settings,
+    generation_config=generation_config,
+    system_instruction="Act as a language model trained on a specific style of writing that incorporates both Roman and Devanagari script",
+)
+transliteration_example_file = upload_to_gemini(
+    "ai_exp_json.txt", mime_type="text/plain"
+)
+chat_session = model.start_chat(
+    history=[
+        {
+            "role": "user",
+            "parts": [
+                "Given a sentence in Roman written English and a set of pre-defined patterns, transliterate only specific words to Devanagari script while maintaining a desired ratio between Roman and Devanagari words. Your task is to transliterate only a subset of words while maintaining the overall meaning and sentence structure.\n",
+                'Based on a provided English sentence and a desired transliteration ratio, use your knowledge of this unique style to select words for transliteration that enhance the overall message and aesthetic. I will provide you with training examples to understand the preferred approach.\nGo through the examples in the file in following JSON format: [{"English": xxx, "Transliteration"}]." and Develop a system that can intelligently choose which English words to transliterate into Devanagari in a sentence, aiming for a specific ratio between the two scripts. With the help of examples in Json format file, design a system that can learn the optimal ratio and transliteration pattern.',
+                transliteration_example_file,
+            ],
+        },
+    ]
+)
+def generate_transliteration_gemini_15_pro(text):
+    texts = [text]
+    chat_session.send_message(
+        'Given an English sentences: \n```' +  "\n".join(texts) + '\n```\nTransliterate English sentences into a mix of Roman and Devanagari script, following a predefined pattern or learning from provided examples above without explain anything.\nReturn output in JSON in following format for the list of sentences: {"text": xxx, "transliterate": xxx}'
+    )
+    clean_text = lambda res: res.replace("```json", "").replace("```", "").replace("\n", "")
+    data = json.loads(clean_text(response.text))
+    return clean_hindi_transliterated_text(data["transliterate"])
 def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True):
     if not dictionary_path:
     return result_text
 def dubpro_english_transliteration(text, call_gpt):
     if call_gpt:
         headers = {
             time.sleep(0.5)
         return clean(resp.json())
     else:
+        return generate_transliteration_gemini_15_pro(text)
+        # API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API")
+        # BEARER_TOKEN = get_google_token()
+        # headers = {
+        #     "Authorization": f"Bearer {BEARER_TOKEN}",
+        #     "Content-Type": "application/json",
+        # }
+        # payload = {
+        #     "contents": [
+        #         {
+        #             "parts": [{"text": f"input: {text}"}],
+        #             "role": "user",
+        #         }
+        #     ],
+        #     "generationConfig": {
+        #         "maxOutputTokens": 8192,
+        #         "temperature": 0.85,
+        #     },
+        #     "safetySettings": [
+        #         {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
+        #         {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
+        #         {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
+        #         {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
+        #     ],
+        # }
+        # result = requests.post(
+        #     url=API_URL,
+        #     headers=headers,
+        #     json=payload
+        # )
+        # response = result.json()
+        # response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"')
+        # # response_content = transliterate_first_word(response_content)
+        # return response_content
 def generate_rephrases_gemini(text, language, problem):