Spaces:

deepsync
/

english-rephraser-transliterator

Sleeping

App Files Files Community

english-rephraser-transliterator / app.py

deepsync

Update app.py

59c6335 verified 7 months ago

raw

history blame

8.68 kB

	import os
	import re
	import json
	import time
	import requests
	import gradio as gr

	import google.auth
	from google.auth.transport.requests import Request


	def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True):
	if not dictionary_path:
	return texts

	with open(dictionary_path) as f:
	lines = f.read().splitlines()

	updated_lines = list(map(lambda x: x.split("\|"), lines))

	initial_pass_dict = {}
	final_pass_dict = {}
	for initial, incorrect, correct in updated_lines:
	initial_pass_dict[initial] = correct
	initial_pass_dict[initial+"."] = correct+"."
	initial_pass_dict[initial+"?"] = correct+"?"
	initial_pass_dict[initial+","] = correct+","
	final_pass_dict[incorrect] = correct
	final_pass_dict[incorrect+"."] = correct+"."
	final_pass_dict[incorrect+"?"] = correct+"?"
	final_pass_dict[incorrect+","] = correct+","

	replacable_dict = initial_pass_dict if initial_lookup else final_pass_dict
	print(f"Original [{initial_lookup}]: ", text)
	new_text = " ".join([replacable_dict.get(t, t) for t in text.split()])
	print(f"New [{initial_lookup}]: ", text)
	return new_text


	def get_google_token():
	credentials, project = google.auth.load_credentials_from_dict(
	json.loads(os.environ.get('GCP_FINETUNE_KEY')),
	scopes=[
	"https://www.googleapis.com/auth/cloud-platform",
	"https://www.googleapis.com/auth/generative-language.tuning",
	],
	)
	request = Request()
	credentials.refresh(request)
	access_token = credentials.token
	return access_token


	def transliterate_first_word(text):
	texts = text.split(maxsplit=1)
	if len(texts) > 1:
	first_word, rest = texts
	else:
	first_word, rest = texts[0], ""
	if not first_word.isalnum():
	return text

	url = "https://inputtools.google.com/request"
	n=1
	params = {
	"text": first_word,
	"num": n,
	"itc": "hi-t-i0-und",
	"cp": 0,
	"cs": 1,
	"ie": "utf-8",
	"app": "demopage"
	}
	response = requests.get(url, params=params)
	results = response.json()[1][0][1]
	first_word_transliterated = results[0]
	return f"{first_word_transliterated} {rest}"


	def clean(result):
	text = result["choices"][0]['message']["content"]
	text = re.sub(r"\(.?\)\|\[.?\]","", text)
	text = text.strip("'").replace('"', "").replace('`', "")
	if "\n" in text.strip("\n"):
	text = text.split("\n")[-1]
	return clean_hindi_transliterated_text(text)


	def clean_hindi_transliterated_text(text):
	updates = [('ऑ', 'औ'), ('ॉ', 'ौ'), ('ॅ', 'े'), ("{", ""), ("}", ""), ("'text'", ""), (":", "")]
	text = text.replace('`', '').replace("output:", "")
	for o, n in updates:
	text = text.replace(o, n)
	final_text = text.strip().strip("'").strip('"')
	result_text = update_text_from_dictionary(final_text)
	return result_text


	def dubpro_english_transliteration(text, call_gpt):
	if call_gpt:
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
	}

	prompt = f"Given the English text, transliterate it to Hindi, without translation. Return only the transliterated text, without any instruction or messages. Text: `{text}`\nOutput: "
	messages = [
	{"role": "user", "content": prompt}
	]
	resp = None
	while resp is None:
	resp = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json={
	"model": "gpt-4",
	"messages": messages
	})
	if resp.status_code != 200:
	print(resp.text)
	time.sleep(0.5)
	return clean(resp.json())
	else:
	API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API")
	BEARER_TOKEN = get_google_token()
	headers = {
	"Authorization": f"Bearer {BEARER_TOKEN}",
	"Content-Type": "application/json",
	}
	payload = {
	"contents": [
	{
	"parts": [{"text": f"input: {text}"}],
	"role": "user",
	}
	],
	"generationConfig": {
	"maxOutputTokens": 8192,
	"temperature": 0.85,
	},
	"safetySettings": [
	{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
	{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
	{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
	{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
	],
	}
	result = requests.post(
	url=API_URL,
	headers=headers,
	json=payload
	)
	response = result.json()
	response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"')
	response_content = transliterate_first_word(response_content)
	return response_content


	def generate_rephrases_gemini(text, language, problem):
	API_URL = os.environ.get("GEMINI_REPHRASER_API")
	BEARER_TOKEN = get_google_token()
	headers = {
	"Authorization": f"Bearer {BEARER_TOKEN}",
	"Content-Type": "application/json",
	}
	if problem == "Gap":
	speak = "more"
	else:
	speak = "less"
	if language == "English":
	prompt = f"You are an English and Hindi language expert, please rephrase a sentence that has been translated from Hindi to English so that it takes little {speak} time to speak."
	elif language == "Hindi":
	prompt = f"You are a hindi language expert please rephrase the below line without summary so that it takes little {speak} time to speak in hinglish manner."

	payload = {
	"contents": [
	{
	"parts": [
	{
	"text": prompt
	},
	{
	"text": f"input: {text}"
	},
	{
	"text": f"output: "
	}
	],
	"role": "user",
	}
	],
	"generationConfig": {
	"maxOutputTokens": 8192,
	"temperature": 0.85,
	"candidateCount": 1,
	},
	"safetySettings": [
	{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
	{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
	{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
	{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
	],
	}
	result = requests.post(url=API_URL, headers=headers, json=payload)
	response = result.json()
	output_text = response["candidates"][0]["content"]["parts"][0]["text"]

	texts = list(map(lambda x: x.replace("-", "").strip(), output_text.split("\n")))
	texts = "\n".join(texts)
	# texts = dubpro_english_transliteration(texts)

	wc = f"Original Word Count: {len(text.split())}\nRephrased Word Count: {len(texts.split())}"

	return texts, wc


	with gr.Blocks() as demo:
	gr.Markdown("English Transliteration Tool")
	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(label="Input text", info="Please enter English text.")
	full_transliteration = gr.Checkbox(label="Full transliteration", value=True)
	output_text = gr.Textbox(label="Output text")
	transliterate = gr.Button("Submit")
	transliterate.click(dubpro_english_transliteration, [input_text, full_transliteration], output_text)

	gr.Markdown("Rephraser Tool")
	with gr.Row():
	rephrase_text = gr.Textbox(label="Input text", info="Please enter text.")
	language = gr.Dropdown(["English", "Hindi"], value="Hindi")
	solving_for = gr.Dropdown(["Gap", "Overflow"], value="Overflow", label="Solving for:")
	with gr.Row():
	word_count = gr.Textbox(label="Word count")
	rephrased_text = gr.Textbox(label="Output text")
	rephrase = gr.Button("Submit")
	rephrase.click(generate_rephrases_gemini, [rephrase_text, language, solving_for], [rephrased_text, word_count])


	demo.launch(auth=(os.environ.get("USERNAME"), os.environ.get("PASSWORD")))