EngTexToASLGloss

Runtime error

App Files Files Community

ai-lab-macree commited on May 13

Commit

7acf2ae

•

1 Parent(s): c6c114f

Update eng_to_aslGloss_app.py

Browse files

Files changed (1) hide show

eng_to_aslGloss_app.py +172 -168

eng_to_aslGloss_app.py CHANGED Viewed

@@ -1,180 +1,150 @@
-from pathlib import Path
-import gradio as gr
-#import openai
 import os
-import tiktoken
-from openai import OpenAI
-# Set secret key
-#HF_TOKEN = os.getenv("NextStar")
-# Set client and secret key
-client = OpenAI(api_key=os.getenv("NextStar"))
-#Set prompt engineering paths (so globally available)
-inStructionPath = "intro_instructions_combine.txt"
-inRulesPath = "formatting_rules_expanded.txt"
-inExamplesPath = "examples_longer1.txt"
-inDialoguesPath = "examples_dialogues.txt"
-#Set to read in prompting files
-def openReadFiles(inpath):
-    infile = Path (inpath)
-    with open(infile) as f:
-        data = f.read()
-    return data
-# Set up prompting data (so globally available)
-instruct = openReadFiles(inStructionPath)
-rules = openReadFiles(inRulesPath)
-examples = openReadFiles(inExamplesPath)
-exampleDialogues = openReadFiles(inDialoguesPath)
-def formatQuery(engText):
-    """Add prompt instructions to English text for GPT4"""
-    instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
-    query = instruct+engText
-    return query
-def num_tokens_from_string(string: str, encoding_name: str) -> int:
-    """Returns the number of tokens in a text string."""
-    encoding = tiktoken.get_encoding(encoding_name)
-    num_tokens = len(encoding.encode(string))
-    return num_tokens
-def checkTokens(tokens):
-    """Checks tokens to ensrue we can translate to ASL gloss"""
-    goAhead = None
-    if tokens >= 553:
-        print(f"Cannot translate to ASL gloss at this time: too many tokens ({tokens})")
-        goAhead = False
-    else:
-        goAhead = True
-        print(f"Number of tokens is acceptable: can continue translating")
-    return goAhead
-def getGlossFromText(query):
-    """Sets all for getting ASL gloss"""
-    text = formatQuery(query)
-    tokens = num_tokens_from_string(text, "cl100k_base")
-    goAhead = checkTokens(tokens)
-    if goAhead == True:
-        results = getASLGloss(text)
-    else:
-        results = "Too many tokens: cannot translate"
-    return results
-def getASLGloss(testQs):
-    """Get ASL gloss from OpenAI using our prompt engineering"""
-    #openai.api_key = HF_TOKENS
-    completion = client.chat.completions.create(
-      model = 'gpt-4-0125-preview',
-      messages = [
-          {"role": "system", "content": instruct},
-          {"role": "system", "content": rules},
-          {"role": "system", "content": examples},
-          {"role": "system", "content": exampleDialogues},
-          {"role": "user", "content": testQs},
-      ],
-      temperature = 0
-    )
-    #results = completion['choices'][0]['message']['content']
-    results = completion.choices[0].message.content
-    return results
-def main():
-    title = "English to ASL Gloss"
-    #description = """Translate English text to ASL Gloss"""
-    description = "This program uses GPT4 alongside prompt engineering to \
-        translate English text to ASL gloss.\n \
-        <b>Type in the English sentence you would like to translate into ASL Gloss.</b> \
-        \n \n This program was last updated on February 27, 2024, and uses GPT4-Turbo (0125 preview version) \
-        \n\n \
-        \n \n This version of EngToASLGloss contains superscript notation which adds \
-        grammatical context to assist in ASL generation. \
-        \n Below are the guidelines we are using to express grammatical concepts \
-        in ASL gloss.\
-        Anything within the angle brackets < > indicates this additional grammatical notation.\
-        If the angle brackets are directly next to a word, the notation inside \
-        the angle brackets is associate with just that word, e.g. WILL < A >.  \
-        If the angle brackets are next to a whitespace after a word,\
-        the notation inside the angle bracket is associated with all of the words\
-        before it, up until a comma, another angle bracket, or a double space.\
-        \n \n This sentence is an example of this rule:\
-        \n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\
-        \n\r \
-        \n The superscript notation options that will appear in results are as follows:\
-        \n Ti marks time\
-        \n T marks topic\
-        \n A marks comment\
-        \n Y/N marks yes-no question\
-        \n WHQ marks wh-question\
-        \n RHQ marks rhetorical question\
-        \n < Cond > marks conditional sentences\
-        \n lower case marks directional verbs\
-        \n ++ marks emphesis ('very' or 'a lot of')\
-        \n \# marks lexical fingerspelling \
-        \n \- marks space between individual letters of fingerspelling\
-        \n \n <b>Note: This is a prototype and is still in development. \
-        Do not use it in a production deployment.</b> \
-        \n For additional details on how the program works, please see \
-        [the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)"
-    interface = gr.Interface(
-        fn=getGlossFromText,
-        inputs="textbox",
-        outputs="text",
-        title = title,
-        description = description)
-        #examples = [[("Prompt: Every year I buy my dad a gift \n", "Result:  EVERY-YEAR<Ti>, MY DAD GIFT<T>, ME BUY<A>")]])
-        # examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
-        #         ["If I don't travel often, I am sad."]])
-    interface.launch()
-if __name__ == "__main__":
-    main()
-#     def getAnswer(query, texts = texts, embeddings = embeddings):
-#         docsearch = FAISS.from_texts(texts, embeddings)
-#         docs = docsearch.similarity_search(query)
-#         chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
-#         response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-#             #interum_q = list(response.keys())
-#         interum_a = list(response.values())
-#         q = query
-#         a = interum_a[0]
-#         return a
-#     # query = "describe the fisher database"
-#     # docs = docsearch.similarity_search(query)
-#     # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
-#     # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-#     title = "Query the S Drive!"
-#     description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
 #     interface = gr.Interface(
-#         fn=getAnswer,
 #         inputs="textbox",
 #         outputs="text",
 #         title = title,
-#         description = description,
-#         examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
-#                 ["How many audio files do we have in the CallHome database?"]])
 #     interface.launch()
@@ -182,9 +152,43 @@ if __name__ == "__main__":
 # if __name__ == "__main__":
 #     main()
-# def main():
-#     results = setMode()
-#     print (results)
-# main()

 import os
+secret = os.getenv("VerySecret")
+print(secret)
+# from pathlib import Path
+# import gradio as gr
+# #import openai
+# import os
+# import tiktoken
+# from openai import OpenAI
+# # Set secret key
+# #HF_TOKEN = os.getenv("NextStar")
+# # Set client and secret key
+# client = OpenAI(api_key=os.getenv("NextStar"))
+# #Set prompt engineering paths (so globally available)
+# inStructionPath = "intro_instructions_combine.txt"
+# inRulesPath = "formatting_rules_expanded.txt"
+# inExamplesPath = "examples_longer1.txt"
+# inDialoguesPath = "examples_dialogues.txt"
+# #Set to read in prompting files
+# def openReadFiles(inpath):
+#     infile = Path (inpath)
+#     with open(infile) as f:
+#         data = f.read()
+#     return data
+# # Set up prompting data (so globally available)
+# instruct = openReadFiles(inStructionPath)
+# rules = openReadFiles(inRulesPath)
+# examples = openReadFiles(inExamplesPath)
+# exampleDialogues = openReadFiles(inDialoguesPath)
+# def formatQuery(engText):
+#     """Add prompt instructions to English text for GPT4"""
+#     instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
+#     query = instruct+engText
+#     return query
+# def num_tokens_from_string(string: str, encoding_name: str) -> int:
+#     """Returns the number of tokens in a text string."""
+#     encoding = tiktoken.get_encoding(encoding_name)
+#     num_tokens = len(encoding.encode(string))
+#     return num_tokens
+# def checkTokens(tokens):
+#     """Checks tokens to ensrue we can translate to ASL gloss"""
+#     goAhead = None
+#     if tokens >= 553:
+#         print(f"Cannot translate to ASL gloss at this time: too many tokens ({tokens})")
+#         goAhead = False
+#     else:
+#         goAhead = True
+#         print(f"Number of tokens is acceptable: can continue translating")
+#     return goAhead
+# def getGlossFromText(query):
+#     """Sets all for getting ASL gloss"""
+#     text = formatQuery(query)
+#     tokens = num_tokens_from_string(text, "cl100k_base")
+#     goAhead = checkTokens(tokens)
+#     if goAhead == True:
+#         results = getASLGloss(text)
+#     else:
+#         results = "Too many tokens: cannot translate"
+#     return results
+# def getASLGloss(testQs):
+#     """Get ASL gloss from OpenAI using our prompt engineering"""
+#     #openai.api_key = HF_TOKENS
+#     completion = client.chat.completions.create(
+#       model = 'gpt-4-0125-preview',
+#       messages = [
+#           {"role": "system", "content": instruct},
+#           {"role": "system", "content": rules},
+#           {"role": "system", "content": examples},
+#           {"role": "system", "content": exampleDialogues},
+#           {"role": "user", "content": testQs},
+#       ],
+#       temperature = 0
+#     )
+#     #results = completion['choices'][0]['message']['content']
+#     results = completion.choices[0].message.content
+#     return results
+# def main():
+#     title = "English to ASL Gloss"
+#     #description = """Translate English text to ASL Gloss"""
+#     description = "This program uses GPT4 alongside prompt engineering to \
+#         translate English text to ASL gloss.\n \
+#         <b>Type in the English sentence you would like to translate into ASL Gloss.</b> \
+#         \n \n This program was last updated on February 27, 2024, and uses GPT4-Turbo (0125 preview version) \
+#         \n\n \
+#         \n \n This version of EngToASLGloss contains superscript notation which adds \
+#         grammatical context to assist in ASL generation. \
+#         \n Below are the guidelines we are using to express grammatical concepts \
+#         in ASL gloss.\
+#         Anything within the angle brackets < > indicates this additional grammatical notation.\
+#         If the angle brackets are directly next to a word, the notation inside \
+#         the angle brackets is associate with just that word, e.g. WILL < A >.  \
+#         If the angle brackets are next to a whitespace after a word,\
+#         the notation inside the angle bracket is associated with all of the words\
+#         before it, up until a comma, another angle bracket, or a double space.\
+#         \n \n This sentence is an example of this rule:\
+#         \n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\
+#         \n\r \
+#         \n The superscript notation options that will appear in results are as follows:\
+#         \n Ti marks time\
+#         \n T marks topic\
+#         \n A marks comment\
+#         \n Y/N marks yes-no question\
+#         \n WHQ marks wh-question\
+#         \n RHQ marks rhetorical question\
+#         \n < Cond > marks conditional sentences\
+#         \n lower case marks directional verbs\
+#         \n ++ marks emphesis ('very' or 'a lot of')\
+#         \n \# marks lexical fingerspelling \
+#         \n \- marks space between individual letters of fingerspelling\
+#         \n \n <b>Note: This is a prototype and is still in development. \
+#         Do not use it in a production deployment.</b> \
+#         \n For additional details on how the program works, please see \
+#         [the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)"
 #     interface = gr.Interface(
+#         fn=getGlossFromText,
 #         inputs="textbox",
 #         outputs="text",
 #         title = title,
+#         description = description)
+#         #examples = [[("Prompt: Every year I buy my dad a gift \n", "Result:  EVERY-YEAR<Ti>, MY DAD GIFT<T>, ME BUY<A>")]])
+#         # examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
+#         #         ["If I don't travel often, I am sad."]])
 #     interface.launch()
 # if __name__ == "__main__":
 #     main()
+# #     def getAnswer(query, texts = texts, embeddings = embeddings):
+# #         docsearch = FAISS.from_texts(texts, embeddings)
+# #         docs = docsearch.similarity_search(query)
+# #         chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
+# #         response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
+# #             #interum_q = list(response.keys())
+# #         interum_a = list(response.values())
+# #         q = query
+# #         a = interum_a[0]
+# #         return a
+# #     # query = "describe the fisher database"
+# #     # docs = docsearch.similarity_search(query)
+# #     # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
+# #     # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
+# #     title = "Query the S Drive!"
+# #     description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
+# #     interface = gr.Interface(
+# #         fn=getAnswer,
+# #         inputs="textbox",
+# #         outputs="text",
+# #         title = title,
+# #         description = description,
+# #         examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
+# #                 ["How many audio files do we have in the CallHome database?"]])
+# #     interface.launch()
+# # if __name__ == "__main__":
+# #     main()
+# # def main():
+# #     results = setMode()
+# #     print (results)
+# # main()