deepsync's picture
Update app.py
59c6335 verified
raw
history blame
8.68 kB
import os
import re
import json
import time
import requests
import gradio as gr
import google.auth
from google.auth.transport.requests import Request
def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True):
if not dictionary_path:
return texts
with open(dictionary_path) as f:
lines = f.read().splitlines()
updated_lines = list(map(lambda x: x.split("|"), lines))
initial_pass_dict = {}
final_pass_dict = {}
for initial, incorrect, correct in updated_lines:
initial_pass_dict[initial] = correct
initial_pass_dict[initial+"."] = correct+"."
initial_pass_dict[initial+"?"] = correct+"?"
initial_pass_dict[initial+","] = correct+","
final_pass_dict[incorrect] = correct
final_pass_dict[incorrect+"."] = correct+"."
final_pass_dict[incorrect+"?"] = correct+"?"
final_pass_dict[incorrect+","] = correct+","
replacable_dict = initial_pass_dict if initial_lookup else final_pass_dict
print(f"Original [{initial_lookup}]: ", text)
new_text = " ".join([replacable_dict.get(t, t) for t in text.split()])
print(f"New [{initial_lookup}]: ", text)
return new_text
def get_google_token():
credentials, project = google.auth.load_credentials_from_dict(
json.loads(os.environ.get('GCP_FINETUNE_KEY')),
scopes=[
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/generative-language.tuning",
],
)
request = Request()
credentials.refresh(request)
access_token = credentials.token
return access_token
def transliterate_first_word(text):
texts = text.split(maxsplit=1)
if len(texts) > 1:
first_word, rest = texts
else:
first_word, rest = texts[0], ""
if not first_word.isalnum():
return text
url = "https://inputtools.google.com/request"
n=1
params = {
"text": first_word,
"num": n,
"itc": "hi-t-i0-und",
"cp": 0,
"cs": 1,
"ie": "utf-8",
"app": "demopage"
}
response = requests.get(url, params=params)
results = response.json()[1][0][1]
first_word_transliterated = results[0]
return f"{first_word_transliterated} {rest}"
def clean(result):
text = result["choices"][0]['message']["content"]
text = re.sub(r"\(.*?\)|\[.*?\]","", text)
text = text.strip("'").replace('"', "").replace('`', "")
if "\n" in text.strip("\n"):
text = text.split("\n")[-1]
return clean_hindi_transliterated_text(text)
def clean_hindi_transliterated_text(text):
updates = [('ऑ', 'औ'), ('ॉ', 'ौ'), ('ॅ', 'े'), ("{", ""), ("}", ""), ("'text'", ""), (":", "")]
text = text.replace('`', '').replace("output:", "")
for o, n in updates:
text = text.replace(o, n)
final_text = text.strip().strip("'").strip('"')
result_text = update_text_from_dictionary(final_text)
return result_text
def dubpro_english_transliteration(text, call_gpt):
if call_gpt:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
}
prompt = f"Given the English text, transliterate it to Hindi, without translation. Return only the transliterated text, without any instruction or messages. Text: `{text}`\nOutput: "
messages = [
{"role": "user", "content": prompt}
]
resp = None
while resp is None:
resp = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json={
"model": "gpt-4",
"messages": messages
})
if resp.status_code != 200:
print(resp.text)
time.sleep(0.5)
return clean(resp.json())
else:
API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API")
BEARER_TOKEN = get_google_token()
headers = {
"Authorization": f"Bearer {BEARER_TOKEN}",
"Content-Type": "application/json",
}
payload = {
"contents": [
{
"parts": [{"text": f"input: {text}"}],
"role": "user",
}
],
"generationConfig": {
"maxOutputTokens": 8192,
"temperature": 0.85,
},
"safetySettings": [
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
],
}
result = requests.post(
url=API_URL,
headers=headers,
json=payload
)
response = result.json()
response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"')
response_content = transliterate_first_word(response_content)
return response_content
def generate_rephrases_gemini(text, language, problem):
API_URL = os.environ.get("GEMINI_REPHRASER_API")
BEARER_TOKEN = get_google_token()
headers = {
"Authorization": f"Bearer {BEARER_TOKEN}",
"Content-Type": "application/json",
}
if problem == "Gap":
speak = "more"
else:
speak = "less"
if language == "English":
prompt = f"You are an English and Hindi language expert, please rephrase a sentence that has been translated from Hindi to English so that it takes little {speak} time to speak."
elif language == "Hindi":
prompt = f"You are a hindi language expert please rephrase the below line without summary so that it takes little {speak} time to speak in hinglish manner."
payload = {
"contents": [
{
"parts": [
{
"text": prompt
},
{
"text": f"input: {text}"
},
{
"text": f"output: "
}
],
"role": "user",
}
],
"generationConfig": {
"maxOutputTokens": 8192,
"temperature": 0.85,
"candidateCount": 1,
},
"safetySettings": [
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
],
}
result = requests.post(url=API_URL, headers=headers, json=payload)
response = result.json()
output_text = response["candidates"][0]["content"]["parts"][0]["text"]
texts = list(map(lambda x: x.replace("-", "").strip(), output_text.split("\n")))
texts = "\n".join(texts)
# texts = dubpro_english_transliteration(texts)
wc = f"Original Word Count: {len(text.split())}\nRephrased Word Count: {len(texts.split())}"
return texts, wc
with gr.Blocks() as demo:
gr.Markdown("English Transliteration Tool")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input text", info="Please enter English text.")
full_transliteration = gr.Checkbox(label="Full transliteration", value=True)
output_text = gr.Textbox(label="Output text")
transliterate = gr.Button("Submit")
transliterate.click(dubpro_english_transliteration, [input_text, full_transliteration], output_text)
gr.Markdown("Rephraser Tool")
with gr.Row():
rephrase_text = gr.Textbox(label="Input text", info="Please enter text.")
language = gr.Dropdown(["English", "Hindi"], value="Hindi")
solving_for = gr.Dropdown(["Gap", "Overflow"], value="Overflow", label="Solving for:")
with gr.Row():
word_count = gr.Textbox(label="Word count")
rephrased_text = gr.Textbox(label="Output text")
rephrase = gr.Button("Submit")
rephrase.click(generate_rephrases_gemini, [rephrase_text, language, solving_for], [rephrased_text, word_count])
demo.launch(auth=(os.environ.get("USERNAME"), os.environ.get("PASSWORD")))