import os import re import json import time import requests import gradio as gr import google.auth from google.auth.transport.requests import Request import google.generativeai as genai genai.configure(api_key=os.environ.get("GEMINI_API_KEY")) def upload_to_gemini(path, mime_type=None): file = genai.upload_file(path, mime_type=mime_type) print(f"Uploaded file '{file.display_name}' as: {file.uri}") return file generation_config = { "temperature": 1, "top_p": 0.95, "top_k": 64, "max_output_tokens": 1_048_576, "response_mime_type": "text/plain", } safety_settings = [ { "category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE", }, { "category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE", }, { "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE", }, { "category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE", }, ] model = genai.GenerativeModel( model_name="gemini-1.5-pro-latest", safety_settings=safety_settings, generation_config=generation_config, system_instruction="Act as a language model trained on a specific style of writing that incorporates both Roman and Devanagari script", ) transliteration_example_file = upload_to_gemini( "ai_exp_json.txt", mime_type="text/plain" ) chat_session = model.start_chat( history=[ { "role": "user", "parts": [ "Given a sentence in Roman written English and a set of pre-defined patterns, transliterate only specific words to Devanagari script while maintaining a desired ratio between Roman and Devanagari words. Your task is to transliterate only a subset of words while maintaining the overall meaning and sentence structure.\n", 'Based on a provided English sentence and a desired transliteration ratio, use your knowledge of this unique style to select words for transliteration that enhance the overall message and aesthetic. I will provide you with training examples to understand the preferred approach.\nGo through the examples in the file in following JSON format: [{"English": xxx, "Transliteration"}]." and Develop a system that can intelligently choose which English words to transliterate into Devanagari in a sentence, aiming for a specific ratio between the two scripts. With the help of examples in Json format file, design a system that can learn the optimal ratio and transliteration pattern.', transliteration_example_file, ], }, ] ) def generate_transliteration_gemini_15_pro(text): texts = [text] chat_session.send_message( 'Given an English sentences: \n```' + "\n".join(texts) + '\n```\nTransliterate English sentences into a mix of Roman and Devanagari script, following a predefined pattern or learning from provided examples above without explain anything.\nReturn output in JSON in following format for the list of sentences: {"text": xxx, "transliterate": xxx}' ) clean_text = lambda res: res.replace("```json", "").replace("```", "").replace("\n", "") data = json.loads(clean_text(response.text)) return clean_hindi_transliterated_text(data["transliterate"]) def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True): if not dictionary_path: return text with open(dictionary_path) as f: lines = f.read().splitlines() updated_lines = list(map(lambda x: x.split("|"), lines)) initial_pass_dict = {} final_pass_dict = {} for initial, incorrect, correct in updated_lines: initial_pass_dict[initial] = correct initial_pass_dict[initial+"."] = correct+"." initial_pass_dict[initial+"?"] = correct+"?" initial_pass_dict[initial+","] = correct+"," final_pass_dict[incorrect] = correct final_pass_dict[incorrect+"."] = correct+"." final_pass_dict[incorrect+"?"] = correct+"?" final_pass_dict[incorrect+","] = correct+"," if initial_lookup: print(f"Original [{initial_lookup}]: ", text) # print(initial_pass_dict) new_text = " ".join([initial_pass_dict.get(t, t) for t in text.split()]) print(f"New [{initial_lookup}]: ", new_text) else: print(f"Original [{initial_lookup}]: ", text) # print(final_pass_dict) new_text = " ".join([final_pass_dict.get(t, t) for t in text.split()]) print(f"New [{initial_lookup}]: ", new_text) return new_text def get_google_token(): credentials, project = google.auth.load_credentials_from_dict( json.loads(os.environ.get('GCP_FINETUNE_KEY')), scopes=[ "https://www.googleapis.com/auth/cloud-platform", "https://www.googleapis.com/auth/generative-language.tuning", ], ) request = Request() credentials.refresh(request) access_token = credentials.token return access_token def transliterate_first_word(text): texts = text.split(maxsplit=1) if len(texts) > 1: first_word, rest = texts else: first_word, rest = texts[0], "" if not first_word.isalnum(): return text url = "https://inputtools.google.com/request" n=1 params = { "text": first_word, "num": n, "itc": "hi-t-i0-und", "cp": 0, "cs": 1, "ie": "utf-8", "app": "demopage" } response = requests.get(url, params=params) results = response.json()[1][0][1] first_word_transliterated = results[0] return f"{first_word_transliterated} {rest}" def clean(result): text = result["choices"][0]['message']["content"] text = re.sub(r"\(.*?\)|\[.*?\]","", text) text = text.strip("'").replace('"', "").replace('`', "") if "\n" in text.strip("\n"): text = text.split("\n")[-1] return clean_hindi_transliterated_text(text) def clean_hindi_transliterated_text(text): updates = [('ऑ', 'औ'), ('ॉ', 'ौ'), ('ॅ', 'े'), ("{", ""), ("}", ""), ("'text'", ""), (":", "")] text = text.replace('`', '').replace("output:", "") for o, n in updates: text = text.replace(o, n) final_text = text.strip().strip("'").strip('"') result_text = update_text_from_dictionary(final_text, initial_lookup=False) return result_text def dubpro_english_transliteration(text, call_gpt): if call_gpt: headers = { "Content-Type": "application/json", "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}" } text = update_text_from_dictionary(text, initial_lookup=True) prompt = f"Given the English text, transliterate it to Hindi, without translation. Return only the transliterated text, without any instruction or messages. Text: `{text}`\nOutput: " messages = [ {"role": "user", "content": prompt} ] resp = None while resp is None: resp = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json={ "model": "gpt-4", "messages": messages }) if resp.status_code != 200: print(resp.text) time.sleep(0.5) return clean(resp.json()) else: return generate_transliteration_gemini_15_pro(text) # API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API") # BEARER_TOKEN = get_google_token() # headers = { # "Authorization": f"Bearer {BEARER_TOKEN}", # "Content-Type": "application/json", # } # payload = { # "contents": [ # { # "parts": [{"text": f"input: {text}"}], # "role": "user", # } # ], # "generationConfig": { # "maxOutputTokens": 8192, # "temperature": 0.85, # }, # "safetySettings": [ # {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, # {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, # {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, # {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}, # ], # } # result = requests.post( # url=API_URL, # headers=headers, # json=payload # ) # response = result.json() # response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"') # # response_content = transliterate_first_word(response_content) # return response_content def generate_rephrases_gemini(text, language, problem): API_URL = os.environ.get("GEMINI_REPHRASER_API") BEARER_TOKEN = get_google_token() headers = { "Authorization": f"Bearer {BEARER_TOKEN}", "Content-Type": "application/json", } if problem == "Gap": speak = "more" else: speak = "less" if language == "English": prompt = f"You are an English and Hindi language expert, please rephrase a sentence that has been translated from Hindi to English so that it takes little {speak} time to speak." elif language == "Hindi": prompt = f"You are a hindi language expert please rephrase the below line without summary so that it takes little {speak} time to speak in hinglish manner." payload = { "contents": [ { "parts": [ { "text": prompt }, { "text": f"input: {text}" }, { "text": f"output: " } ], "role": "user", } ], "generationConfig": { "maxOutputTokens": 8192, "temperature": 0.85, "candidateCount": 1, }, "safetySettings": [ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}, ], } result = requests.post(url=API_URL, headers=headers, json=payload) response = result.json() output_text = response["candidates"][0]["content"]["parts"][0]["text"] texts = list(map(lambda x: x.replace("-", "").strip(), output_text.split("\n"))) texts = "\n".join(texts) # texts = dubpro_english_transliteration(texts) wc = f"Original Word Count: {len(text.split())}\nRephrased Word Count: {len(texts.split())}" return texts, wc with gr.Blocks() as demo: gr.Markdown("# Translator Assistance Tools") with gr.Tab("Transliteration"): with gr.Row(): with gr.Column(): input_text = gr.Textbox(label="Input text", info="Please enter English text.") full_transliteration = gr.Checkbox(label="Full transliteration", value=True) output_text = gr.Textbox(label="Output text") transliterate = gr.Button("Submit") transliterate.click(dubpro_english_transliteration, [input_text, full_transliteration], output_text) with gr.Tab("Rephraser Tool"): with gr.Row(): rephrase_text = gr.Textbox(label="Input text", info="Please enter text.") language = gr.Dropdown(["English", "Hindi"], value="Hindi") solving_for = gr.Dropdown(["Gap", "Overflow"], value="Overflow", label="Solving for:") with gr.Row(): word_count = gr.Textbox(label="Word count") rephrased_text = gr.Textbox(label="Output text") rephrase = gr.Button("Submit") rephrase.click(generate_rephrases_gemini, [rephrase_text, language, solving_for], [rephrased_text, word_count]) demo.launch(auth=(os.environ.get("USERNAME"), os.environ.get("PASSWORD")))