from pathlib import Path import gradio as gr import openai import os import tiktoken # Set secret key HF_TOKEN = os.getenv("NextStar") #Set prompt engineering paths (so globally available) inStructionPath = "intro_instructions_combine.txt" inRulesPath = "formatting_rules_expanded.txt" inExamplesPath = "examples_longer1.txt" inDialoguesPath = "examples_dialogues.txt" #Set to read in prompting files def openReadFiles(inpath): infile = Path (inpath) with open(infile) as f: data = f.read() return data # Set up prompting data (so globally available) instruct = openReadFiles(inStructionPath) rules = openReadFiles(inRulesPath) examples = openReadFiles(inExamplesPath) exampleDialogues = openReadFiles(inDialoguesPath) def formatQuery(engText): """Add prompt instructions to English text for GPT4""" instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n" query = instruct+engText return query def num_tokens_from_string(string: str, encoding_name: str) -> int: """Returns the number of tokens in a text string.""" encoding = tiktoken.get_encoding(encoding_name) num_tokens = len(encoding.encode(string)) return num_tokens def checkTokens(tokens): """Checks tokens to ensrue we can translate to ASL gloss""" goAhead = None if tokens >= 553: print(f"Cannot translate to ASL gloss at this time: too many tokens {tokens}") goAhead = False else: goAhead = True print(f"Has less than 553 tokens - can continue translating") return goAhead def getGlossFromText(query): """Sets all for getting ASL gloss""" text = formatQuery(query) tokens = num_tokens_from_string(text, "cl100k_base") goAhead = checkTokens(tokens) if goAhead == True: results = getASLGloss(text) else: results = "Too many tokens: cannot translate" return results def getASLGloss(testQs): """Get ASL gloss from OpenAI using our prompt engineering""" openai.api_key = HF_TOKEN completion = openai.ChatCompletion.create( model = 'gpt-4', messages = [ {"role": "system", "content": instruct}, {"role": "system", "content": rules}, {"role": "system", "content": examples}, {"role": "system", "content": exampleDialogues}, {"role": "user", "content": testQs}, ], temperature = 0 ) results = completion['choices'][0]['message']['content'] return results def main(): title = "English to ASL Gloss" #description = """Translate English text to ASL Gloss""" description = "This program uses GPT4 alongside prompt engineering to \ translate English text to ASL gloss.\n \ Type in the English sentence you would like to translate into ASL Gloss. \ \n \n This version of EngToASLGloss contains superscript notation which adds \ grammatical context to assist in ASL generation. \ \n Below are the guidelines we are using to express grammatical concepts \ in ASL gloss.\ Anything within the angle brackets < > indicates this additional grammatical notation.\ If the angle brackets are directly next to a word, the notation inside \ the angle brackets is associate with just that word, e.g. WILL < A >. \ If the angle brackets are next to a whitespace after a word,\ the notation inside the angle bracket is associated with all of the words\ before it, up until a comma, another angle bracket, or a double space.\ \n \n This sentence is an example of this rule:\ \n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\ \n\r \ \n The superscript notation options that will appear in results are as follows:\ \n Ti marks time\ \n T marks topic\ \n A marks comment\ \n Y/N marks yes-no question\ \n WHQ marks wh-question\ \n RHQ marks rhetorical question\ \n < Cond > marks conditional sentences\ \n lower case marks directional verbs\ \n ++ marks emphesis ('very' or 'a lot of')\ \n \# marks lexical fingerspelling \ \n \- marks space between individual letters of fingerspelling\ \n \n Note: This is a prototype and is still in development. \ Do not use it in a production deployment. \ \n For additional details on how the program works, please see \ [the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)" interface = gr.Interface( fn=getGlossFromText, inputs="textbox", outputs="text", title = title, description = description) #examples = [[("Prompt: Every year I buy my dad a gift \n", "Result: EVERY-YEAR, MY DAD GIFT, ME BUY")]]) # examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"], # ["If I don't travel often, I am sad."]]) interface.launch() if __name__ == "__main__": main() # def getAnswer(query, texts = texts, embeddings = embeddings): # docsearch = FAISS.from_texts(texts, embeddings) # docs = docsearch.similarity_search(query) # chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False) # response = chain({"input_documents": docs, "question": query}, return_only_outputs=True) # #interum_q = list(response.keys()) # interum_a = list(response.values()) # q = query # a = interum_a[0] # return a # # query = "describe the fisher database" # # docs = docsearch.similarity_search(query) # # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False) # # chain({"input_documents": docs, "question": query}, return_only_outputs=True) # title = "Query the S Drive!" # description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)""" # interface = gr.Interface( # fn=getAnswer, # inputs="textbox", # outputs="text", # title = title, # description = description, # examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"], # ["How many audio files do we have in the CallHome database?"]]) # interface.launch() # if __name__ == "__main__": # main() # def main(): # results = setMode() # print (results) # main()