from pathlib import Path
import gradio as gr
import openai
import os
import tiktoken
# Set secret key
HF_TOKEN = os.getenv("NextStar")
#Set prompt engineering paths (so globally available)
inStructionPath = "intro_instructions_combine.txt"
inRulesPath = "formatting_rules_expanded.txt"
inExamplesPath = "examples_longer1.txt"
inDialoguesPath = "examples_dialogues.txt"
#Set to read in prompting files
def openReadFiles(inpath):
infile = Path (inpath)
with open(infile) as f:
data = f.read()
return data
# Set up prompting data (so globally available)
instruct = openReadFiles(inStructionPath)
rules = openReadFiles(inRulesPath)
examples = openReadFiles(inExamplesPath)
exampleDialogues = openReadFiles(inDialoguesPath)
def formatQuery(engText):
"""Add prompt instructions to English text for GPT4"""
instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
query = instruct+engText
return query
def num_tokens_from_string(string: str, encoding_name: str) -> int:
"""Returns the number of tokens in a text string."""
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
return num_tokens
def checkTokens(tokens):
"""Checks tokens to ensrue we can translate to ASL gloss"""
goAhead = None
if tokens >= 553:
print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}")
goAhead = False
else:
goAhead = True
print(f"Has less than 553 tokens - can continue translating")
return goAhead
def getGlossFromText(query):
"""Sets all for getting ASL gloss"""
text = formatQuery(query)
tokens = num_tokens_from_string(text, "cl100k_base")
goAhead = checkTokens(tokens)
if goAhead == True:
results = getASLGloss(text)
else:
results = "Too many tokens: cannot translate"
return results
def getASLGloss(testQs):
"""Get ASL gloss from OpenAI using our prompt engineering"""
openai.api_key = HF_TOKEN
completion = openai.ChatCompletion.create(
model = 'gpt-4',
messages = [
{"role": "system", "content": instruct},
{"role": "system", "content": rules},
{"role": "system", "content": examples},
{"role": "system", "content": exampleDialogues},
{"role": "user", "content": testQs},
],
temperature = 0
)
results = completion['choices'][0]['message']['content']
return results
def main():
title = "English to ASL Gloss"
#description = """Translate English text to ASL Gloss"""
description = "This program uses GPT4 alongside prompt engineering to \
translate English text to ASL gloss.\n \
Type in the English sentence you would like to translate into ASL Gloss. \
\n \n This version of EngToASLGloss contains superscript notation which adds \
grammatical context to assist in ASL generation. \
\n Below are the guidelines we are using to express grammatical concepts \
in ASL gloss.\
Anything within the angle brackets < > indicates this additional grammatical notation.\
If the angle brackets are directly next to a word, the notation inside \
the angle brackets is associate with just that word, e.g. WILL < A >. \
If the angle brackets are next to a whitespace after a word,\
the notation inside the angle bracket is associated with all of the words\
before it, up until a comma, another angle bracket, or a double space.\
\n \n This sentence is an example of this rule:\
\n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\
\n\r \
\n The superscript notation options that will appear in results are as follows:\
\n Ti marks time\
\n T marks topic\
\n A marks comment\
\n Y/N marks yes-no question\
\n WHQ marks wh-question\
\n RHQ marks rhetorical question\
\n < Cond > marks conditional sentences\
\n lower case marks directional verbs\
\n ++ marks emphesis ('very' or 'a lot of')\
\n \# marks lexical fingerspelling \
\n \- marks space between individual letters of fingerspelling\
\n \n Note: This is a prototype and is still in development. \
Do not use it in a production deployment. \
\n For additional details on how the program works, please see \
[the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)"
interface = gr.Interface(
fn=getGlossFromText,
inputs="textbox",
outputs="text",
title = title,
description = description)
#examples = [[("Prompt: Every year I buy my dad a gift \n", "Result: EVERY-YEAR, MY DAD GIFT, ME BUY")]])
# examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
# ["If I don't travel often, I am sad."]])
interface.launch()
if __name__ == "__main__":
main()
# def getAnswer(query, texts = texts, embeddings = embeddings):
# docsearch = FAISS.from_texts(texts, embeddings)
# docs = docsearch.similarity_search(query)
# chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
# response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# #interum_q = list(response.keys())
# interum_a = list(response.values())
# q = query
# a = interum_a[0]
# return a
# # query = "describe the fisher database"
# # docs = docsearch.similarity_search(query)
# # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
# # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# title = "Query the S Drive!"
# description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
# interface = gr.Interface(
# fn=getAnswer,
# inputs="textbox",
# outputs="text",
# title = title,
# description = description,
# examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
# ["How many audio files do we have in the CallHome database?"]])
# interface.launch()
# if __name__ == "__main__":
# main()
# def main():
# results = setMode()
# print (results)
# main()