File size: 7,605 Bytes
8eb9635
7acf2ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eb9635
7acf2ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eb9635
7acf2ae
 
 
 
 
8eb9635
 
 
 
7acf2ae
8eb9635
7acf2ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eb9635
 
7acf2ae
8eb9635
 
 
7acf2ae
 
 
 
8eb9635
 
 
 
 
 
 
7acf2ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eb9635
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import os
secret = os.getenv("VerySecret")
print(secret)

# from pathlib import Path
# import gradio as gr
# #import openai
# import os
# import tiktoken 
# from openai import OpenAI


# # Set secret key
# #HF_TOKEN = os.getenv("NextStar")

# # Set client and secret key
# client = OpenAI(api_key=os.getenv("NextStar"))

# #Set prompt engineering paths (so globally available)
# inStructionPath = "intro_instructions_combine.txt"
# inRulesPath = "formatting_rules_expanded.txt"
# inExamplesPath = "examples_longer1.txt"
# inDialoguesPath = "examples_dialogues.txt"  

# #Set to read in prompting files
# def openReadFiles(inpath):
#     infile = Path (inpath)
#     with open(infile) as f:
#         data = f.read()
#     return data


# # Set up prompting data (so globally available)
# instruct = openReadFiles(inStructionPath)
# rules = openReadFiles(inRulesPath)
# examples = openReadFiles(inExamplesPath)
# exampleDialogues = openReadFiles(inDialoguesPath)


# def formatQuery(engText):
#     """Add prompt instructions to English text for GPT4"""
#     instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
#     query = instruct+engText
#     return query


# def num_tokens_from_string(string: str, encoding_name: str) -> int:
#     """Returns the number of tokens in a text string."""
#     encoding = tiktoken.get_encoding(encoding_name)
#     num_tokens = len(encoding.encode(string))
#     return num_tokens


# def checkTokens(tokens):
#     """Checks tokens to ensrue we can translate to ASL gloss"""
#     goAhead = None
#     if tokens >= 553:
#         print(f"Cannot translate to ASL gloss at this time: too many tokens ({tokens})")
#         goAhead = False
#     else:
#         goAhead = True
#         print(f"Number of tokens is acceptable: can continue translating")
#     return goAhead
    
# def getGlossFromText(query):
#     """Sets all for getting ASL gloss"""
#     text = formatQuery(query)
#     tokens = num_tokens_from_string(text, "cl100k_base")
#     goAhead = checkTokens(tokens)
#     if goAhead == True:
#         results = getASLGloss(text)
#     else:
#         results = "Too many tokens: cannot translate"
#     return results



# def getASLGloss(testQs):
#     """Get ASL gloss from OpenAI using our prompt engineering"""
#     #openai.api_key = HF_TOKENS
#     completion = client.chat.completions.create(
#       model = 'gpt-4-0125-preview',
#       messages = [
#           {"role": "system", "content": instruct},
#           {"role": "system", "content": rules},
#           {"role": "system", "content": examples},
#           {"role": "system", "content": exampleDialogues},
#           {"role": "user", "content": testQs},
#       ],
    
#       temperature = 0  
#     )
#     #results = completion['choices'][0]['message']['content']
#     results = completion.choices[0].message.content
#     return results
     

    

# def main():
    
#     title = "English to ASL Gloss"
#     #description = """Translate English text to ASL Gloss"""
#     description = "This program uses GPT4 alongside prompt engineering to \
#         translate English text to ASL gloss.\n \
#         <b>Type in the English sentence you would like to translate into ASL Gloss.</b> \
#         \n \n This program was last updated on February 27, 2024, and uses GPT4-Turbo (0125 preview version) \
#         \n\n \
#         \n \n This version of EngToASLGloss contains superscript notation which adds \
#         grammatical context to assist in ASL generation. \
#         \n Below are the guidelines we are using to express grammatical concepts \
#         in ASL gloss.\
#         Anything within the angle brackets < > indicates this additional grammatical notation.\
#         If the angle brackets are directly next to a word, the notation inside \
#         the angle brackets is associate with just that word, e.g. WILL < A >.  \
#         If the angle brackets are next to a whitespace after a word,\
#         the notation inside the angle bracket is associated with all of the words\
#         before it, up until a comma, another angle bracket, or a double space.\
#         \n \n This sentence is an example of this rule:\
#         \n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\
#         \n\r \
#         \n The superscript notation options that will appear in results are as follows:\
#         \n Ti marks time\
#         \n T marks topic\
#         \n A marks comment\
#         \n Y/N marks yes-no question\
#         \n WHQ marks wh-question\
#         \n RHQ marks rhetorical question\
#         \n < Cond > marks conditional sentences\
#         \n lower case marks directional verbs\
#         \n ++ marks emphesis ('very' or 'a lot of')\
#         \n \# marks lexical fingerspelling \
#         \n \- marks space between individual letters of fingerspelling\
#         \n \n <b>Note: This is a prototype and is still in development. \
#         Do not use it in a production deployment.</b> \
#         \n For additional details on how the program works, please see \
#         [the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)"

#     interface = gr.Interface(
#         fn=getGlossFromText, 
#         inputs="textbox", 
#         outputs="text",
#         title = title,
#         description = description)
#         #examples = [[("Prompt: Every year I buy my dad a gift \n", "Result:  EVERY-YEAR<Ti>, MY DAD GIFT<T>, ME BUY<A>")]])
#         # examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"], 
#         #         ["If I don't travel often, I am sad."]])
#     interface.launch()
    

    
# if __name__ == "__main__":
#     main()

    
# #     def getAnswer(query, texts = texts, embeddings = embeddings):
# #         docsearch = FAISS.from_texts(texts, embeddings)
# #         docs = docsearch.similarity_search(query)
# #         chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
# #         response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# #             #interum_q = list(response.keys())
# #         interum_a = list(response.values())
# #         q = query
# #         a = interum_a[0]
# #         return a

# #     # query = "describe the fisher database"
# #     # docs = docsearch.similarity_search(query)
# #     # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
# #     # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
# #     title = "Query the S Drive!"
# #     description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""

# #     interface = gr.Interface(
# #         fn=getAnswer, 
# #         inputs="textbox", 
# #         outputs="text",
# #         title = title,
# #         description = description,
# #         examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"], 
# #                 ["How many audio files do we have in the CallHome database?"]])
# #     interface.launch()
    

    
# # if __name__ == "__main__":
# #     main()

# # def main():
# #     results = setMode()
# #     print (results)
# # main()