EngTexToASLGloss / eng_to_aslGloss_app.py
ai-lab-macree's picture
Update eng_to_aslGloss_app.py
7acf2ae verified
import os
secret = os.getenv("VerySecret")
print(secret)
# from pathlib import Path
# import gradio as gr
# #import openai
# import os
# import tiktoken
# from openai import OpenAI
# # Set secret key
# #HF_TOKEN = os.getenv("NextStar")
# # Set client and secret key
# client = OpenAI(api_key=os.getenv("NextStar"))
# #Set prompt engineering paths (so globally available)
# inStructionPath = "intro_instructions_combine.txt"
# inRulesPath = "formatting_rules_expanded.txt"
# inExamplesPath = "examples_longer1.txt"
# inDialoguesPath = "examples_dialogues.txt"
# #Set to read in prompting files
# def openReadFiles(inpath):
# infile = Path (inpath)
# with open(infile) as f:
# data = f.read()
# return data
# # Set up prompting data (so globally available)
# instruct = openReadFiles(inStructionPath)
# rules = openReadFiles(inRulesPath)
# examples = openReadFiles(inExamplesPath)
# exampleDialogues = openReadFiles(inDialoguesPath)
# def formatQuery(engText):
# """Add prompt instructions to English text for GPT4"""
# instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
# query = instruct+engText
# return query
# def num_tokens_from_string(string: str, encoding_name: str) -> int:
# """Returns the number of tokens in a text string."""
# encoding = tiktoken.get_encoding(encoding_name)
# num_tokens = len(encoding.encode(string))
# return num_tokens
# def checkTokens(tokens):
# """Checks tokens to ensrue we can translate to ASL gloss"""
# goAhead = None
# if tokens >= 553:
# print(f"Cannot translate to ASL gloss at this time: too many tokens ({tokens})")
# goAhead = False
# else:
# goAhead = True
# print(f"Number of tokens is acceptable: can continue translating")
# return goAhead
# def getGlossFromText(query):
# """Sets all for getting ASL gloss"""
# text = formatQuery(query)
# tokens = num_tokens_from_string(text, "cl100k_base")
# goAhead = checkTokens(tokens)
# if goAhead == True:
# results = getASLGloss(text)
# else:
# results = "Too many tokens: cannot translate"
# return results
# def getASLGloss(testQs):
# """Get ASL gloss from OpenAI using our prompt engineering"""
# #openai.api_key = HF_TOKENS
# completion = client.chat.completions.create(
# model = 'gpt-4-0125-preview',
# messages = [
# {"role": "system", "content": instruct},
# {"role": "system", "content": rules},
# {"role": "system", "content": examples},
# {"role": "system", "content": exampleDialogues},
# {"role": "user", "content": testQs},
# ],
# temperature = 0
# )
# #results = completion['choices'][0]['message']['content']
# results = completion.choices[0].message.content
# return results
# def main():
# title = "English to ASL Gloss"
# #description = """Translate English text to ASL Gloss"""
# description = "This program uses GPT4 alongside prompt engineering to \
# translate English text to ASL gloss.\n \
# <b>Type in the English sentence you would like to translate into ASL Gloss.</b> \
# \n \n This program was last updated on February 27, 2024, and uses GPT4-Turbo (0125 preview version) \
# \n\n \
# \n \n This version of EngToASLGloss contains superscript notation which adds \
# grammatical context to assist in ASL generation. \
# \n Below are the guidelines we are using to express grammatical concepts \
# in ASL gloss.\
# Anything within the angle brackets < > indicates this additional grammatical notation.\
# If the angle brackets are directly next to a word, the notation inside \
# the angle brackets is associate with just that word, e.g. WILL < A >. \
# If the angle brackets are next to a whitespace after a word,\
# the notation inside the angle bracket is associated with all of the words\
# before it, up until a comma, another angle bracket, or a double space.\
# \n \n This sentence is an example of this rule:\
# \n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\
# \n\r \
# \n The superscript notation options that will appear in results are as follows:\
# \n Ti marks time\
# \n T marks topic\
# \n A marks comment\
# \n Y/N marks yes-no question\
# \n WHQ marks wh-question\
# \n RHQ marks rhetorical question\
# \n < Cond > marks conditional sentences\
# \n lower case marks directional verbs\
# \n ++ marks emphesis ('very' or 'a lot of')\
# \n \# marks lexical fingerspelling \
# \n \- marks space between individual letters of fingerspelling\
# \n \n <b>Note: This is a prototype and is still in development. \
# Do not use it in a production deployment.</b> \
# \n For additional details on how the program works, please see \
# [the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)"
# interface = gr.Interface(
# fn=getGlossFromText,
# inputs="textbox",
# outputs="text",
# title = title,
# description = description)
# #examples = [[("Prompt: Every year I buy my dad a gift \n", "Result: EVERY-YEAR<Ti>, MY DAD GIFT<T>, ME BUY<A>")]])
# # examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
# # ["If I don't travel often, I am sad."]])
# interface.launch()
# if __name__ == "__main__":
# main()
# # def getAnswer(query, texts = texts, embeddings = embeddings):
# # docsearch = FAISS.from_texts(texts, embeddings)
# # docs = docsearch.similarity_search(query)
# # chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
# # response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# # #interum_q = list(response.keys())
# # interum_a = list(response.values())
# # q = query
# # a = interum_a[0]
# # return a
# # # query = "describe the fisher database"
# # # docs = docsearch.similarity_search(query)
# # # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
# # # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# # title = "Query the S Drive!"
# # description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
# # interface = gr.Interface(
# # fn=getAnswer,
# # inputs="textbox",
# # outputs="text",
# # title = title,
# # description = description,
# # examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
# # ["How many audio files do we have in the CallHome database?"]])
# # interface.launch()
# # if __name__ == "__main__":
# # main()
# # def main():
# # results = setMode()
# # print (results)
# # main()