import os import re import json import time import requests import gradio as gr import google.auth from google.auth.transport.requests import Request def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True): if not dictionary_path: return texts with open(dictionary_path) as f: lines = f.read().splitlines() updated_lines = list(map(lambda x: x.split("|"), lines)) initial_pass_dict = {} final_pass_dict = {} for initial, incorrect, correct in updated_lines: initial_pass_dict[initial] = correct initial_pass_dict[initial+"."] = correct+"." initial_pass_dict[initial+"?"] = correct+"?" initial_pass_dict[initial+","] = correct+"," final_pass_dict[incorrect] = correct final_pass_dict[incorrect+"."] = correct+"." final_pass_dict[incorrect+"?"] = correct+"?" final_pass_dict[incorrect+","] = correct+"," replacable_dict = initial_pass_dict if initial_lookup else final_pass_dict print(f"Original [{initial_lookup}]: ", text) new_text = " ".join([replacable_dict.get(t, t) for t in text.split()]) print(f"New [{initial_lookup}]: ", text) return new_text def get_google_token(): credentials, project = google.auth.load_credentials_from_dict( json.loads(os.environ.get('GCP_FINETUNE_KEY')), scopes=[ "https://www.googleapis.com/auth/cloud-platform", "https://www.googleapis.com/auth/generative-language.tuning", ], ) request = Request() credentials.refresh(request) access_token = credentials.token return access_token def transliterate_first_word(text): texts = text.split(maxsplit=1) if len(texts) > 1: first_word, rest = texts else: first_word, rest = texts[0], "" if not first_word.isalnum(): return text url = "https://inputtools.google.com/request" n=1 params = { "text": first_word, "num": n, "itc": "hi-t-i0-und", "cp": 0, "cs": 1, "ie": "utf-8", "app": "demopage" } response = requests.get(url, params=params) results = response.json()[1][0][1] first_word_transliterated = results[0] return f"{first_word_transliterated} {rest}" def clean(result): text = result["choices"][0]['message']["content"] text = re.sub(r"\(.*?\)|\[.*?\]","", text) text = text.strip("'").replace('"', "").replace('`', "") if "\n" in text.strip("\n"): text = text.split("\n")[-1] return clean_hindi_transliterated_text(text) def clean_hindi_transliterated_text(text): updates = [('ऑ', 'औ'), ('ॉ', 'ौ'), ('ॅ', 'े'), ("{", ""), ("}", ""), ("'text'", ""), (":", "")] text = text.replace('`', '').replace("output:", "") for o, n in updates: text = text.replace(o, n) final_text = text.strip().strip("'").strip('"') result_text = update_text_from_dictionary(final_text) return result_text def dubpro_english_transliteration(text, call_gpt): if call_gpt: headers = { "Content-Type": "application/json", "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}" } prompt = f"Given the English text, transliterate it to Hindi, without translation. Return only the transliterated text, without any instruction or messages. Text: `{text}`\nOutput: " messages = [ {"role": "user", "content": prompt} ] resp = None while resp is None: resp = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json={ "model": "gpt-4", "messages": messages }) if resp.status_code != 200: print(resp.text) time.sleep(0.5) return clean(resp.json()) else: API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API") BEARER_TOKEN = get_google_token() headers = { "Authorization": f"Bearer {BEARER_TOKEN}", "Content-Type": "application/json", } payload = { "contents": [ { "parts": [{"text": f"input: {text}"}], "role": "user", } ], "generationConfig": { "maxOutputTokens": 8192, "temperature": 0.85, }, "safetySettings": [ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}, ], } result = requests.post( url=API_URL, headers=headers, json=payload ) response = result.json() response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"') response_content = transliterate_first_word(response_content) return response_content def generate_rephrases_gemini(text, language, problem): API_URL = os.environ.get("GEMINI_REPHRASER_API") BEARER_TOKEN = get_google_token() headers = { "Authorization": f"Bearer {BEARER_TOKEN}", "Content-Type": "application/json", } if problem == "Gap": speak = "more" else: speak = "less" if language == "English": prompt = f"You are an English and Hindi language expert, please rephrase a sentence that has been translated from Hindi to English so that it takes little {speak} time to speak." elif language == "Hindi": prompt = f"You are a hindi language expert please rephrase the below line without summary so that it takes little {speak} time to speak in hinglish manner." payload = { "contents": [ { "parts": [ { "text": prompt }, { "text": f"input: {text}" }, { "text": f"output: " } ], "role": "user", } ], "generationConfig": { "maxOutputTokens": 8192, "temperature": 0.85, "candidateCount": 1, }, "safetySettings": [ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}, ], } result = requests.post(url=API_URL, headers=headers, json=payload) response = result.json() output_text = response["candidates"][0]["content"]["parts"][0]["text"] texts = list(map(lambda x: x.replace("-", "").strip(), output_text.split("\n"))) texts = "\n".join(texts) # texts = dubpro_english_transliteration(texts) wc = f"Original Word Count: {len(text.split())}\nRephrased Word Count: {len(texts.split())}" return texts, wc with gr.Blocks() as demo: gr.Markdown("English Transliteration Tool") with gr.Row(): with gr.Column(): input_text = gr.Textbox(label="Input text", info="Please enter English text.") full_transliteration = gr.Checkbox(label="Full transliteration", value=True) output_text = gr.Textbox(label="Output text") transliterate = gr.Button("Submit") transliterate.click(dubpro_english_transliteration, [input_text, full_transliteration], output_text) gr.Markdown("Rephraser Tool") with gr.Row(): rephrase_text = gr.Textbox(label="Input text", info="Please enter text.") language = gr.Dropdown(["English", "Hindi"], value="Hindi") solving_for = gr.Dropdown(["Gap", "Overflow"], value="Overflow", label="Solving for:") with gr.Row(): word_count = gr.Textbox(label="Word count") rephrased_text = gr.Textbox(label="Output text") rephrase = gr.Button("Submit") rephrase.click(generate_rephrases_gemini, [rephrase_text, language, solving_for], [rephrased_text, word_count]) demo.launch(auth=(os.environ.get("USERNAME"), os.environ.get("PASSWORD")))