ai-lab-macree commited on
Commit
7acf2ae
1 Parent(s): c6c114f

Update eng_to_aslGloss_app.py

Browse files
Files changed (1) hide show
  1. eng_to_aslGloss_app.py +172 -168
eng_to_aslGloss_app.py CHANGED
@@ -1,180 +1,150 @@
1
- from pathlib import Path
2
- import gradio as gr
3
- #import openai
4
  import os
5
- import tiktoken
6
- from openai import OpenAI
7
-
8
-
9
- # Set secret key
10
- #HF_TOKEN = os.getenv("NextStar")
11
-
12
- # Set client and secret key
13
- client = OpenAI(api_key=os.getenv("NextStar"))
14
-
15
- #Set prompt engineering paths (so globally available)
16
- inStructionPath = "intro_instructions_combine.txt"
17
- inRulesPath = "formatting_rules_expanded.txt"
18
- inExamplesPath = "examples_longer1.txt"
19
- inDialoguesPath = "examples_dialogues.txt"
20
-
21
- #Set to read in prompting files
22
- def openReadFiles(inpath):
23
- infile = Path (inpath)
24
- with open(infile) as f:
25
- data = f.read()
26
- return data
27
-
28
-
29
- # Set up prompting data (so globally available)
30
- instruct = openReadFiles(inStructionPath)
31
- rules = openReadFiles(inRulesPath)
32
- examples = openReadFiles(inExamplesPath)
33
- exampleDialogues = openReadFiles(inDialoguesPath)
34
-
35
-
36
- def formatQuery(engText):
37
- """Add prompt instructions to English text for GPT4"""
38
- instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
39
- query = instruct+engText
40
- return query
41
-
42
-
43
- def num_tokens_from_string(string: str, encoding_name: str) -> int:
44
- """Returns the number of tokens in a text string."""
45
- encoding = tiktoken.get_encoding(encoding_name)
46
- num_tokens = len(encoding.encode(string))
47
- return num_tokens
48
-
49
-
50
- def checkTokens(tokens):
51
- """Checks tokens to ensrue we can translate to ASL gloss"""
52
- goAhead = None
53
- if tokens >= 553:
54
- print(f"Cannot translate to ASL gloss at this time: too many tokens ({tokens})")
55
- goAhead = False
56
- else:
57
- goAhead = True
58
- print(f"Number of tokens is acceptable: can continue translating")
59
- return goAhead
 
 
 
 
 
 
 
60
 
61
- def getGlossFromText(query):
62
- """Sets all for getting ASL gloss"""
63
- text = formatQuery(query)
64
- tokens = num_tokens_from_string(text, "cl100k_base")
65
- goAhead = checkTokens(tokens)
66
- if goAhead == True:
67
- results = getASLGloss(text)
68
- else:
69
- results = "Too many tokens: cannot translate"
70
- return results
71
-
72
-
73
-
74
- def getASLGloss(testQs):
75
- """Get ASL gloss from OpenAI using our prompt engineering"""
76
- #openai.api_key = HF_TOKENS
77
- completion = client.chat.completions.create(
78
- model = 'gpt-4-0125-preview',
79
- messages = [
80
- {"role": "system", "content": instruct},
81
- {"role": "system", "content": rules},
82
- {"role": "system", "content": examples},
83
- {"role": "system", "content": exampleDialogues},
84
- {"role": "user", "content": testQs},
85
- ],
86
 
87
- temperature = 0
88
- )
89
- #results = completion['choices'][0]['message']['content']
90
- results = completion.choices[0].message.content
91
- return results
92
 
93
 
94
 
95
 
96
- def main():
97
-
98
- title = "English to ASL Gloss"
99
- #description = """Translate English text to ASL Gloss"""
100
- description = "This program uses GPT4 alongside prompt engineering to \
101
- translate English text to ASL gloss.\n \
102
- <b>Type in the English sentence you would like to translate into ASL Gloss.</b> \
103
- \n \n This program was last updated on February 27, 2024, and uses GPT4-Turbo (0125 preview version) \
104
- \n\n \
105
- \n \n This version of EngToASLGloss contains superscript notation which adds \
106
- grammatical context to assist in ASL generation. \
107
- \n Below are the guidelines we are using to express grammatical concepts \
108
- in ASL gloss.\
109
- Anything within the angle brackets < > indicates this additional grammatical notation.\
110
- If the angle brackets are directly next to a word, the notation inside \
111
- the angle brackets is associate with just that word, e.g. WILL < A >. \
112
- If the angle brackets are next to a whitespace after a word,\
113
- the notation inside the angle bracket is associated with all of the words\
114
- before it, up until a comma, another angle bracket, or a double space.\
115
- \n \n This sentence is an example of this rule:\
116
- \n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\
117
- \n\r \
118
- \n The superscript notation options that will appear in results are as follows:\
119
- \n Ti marks time\
120
- \n T marks topic\
121
- \n A marks comment\
122
- \n Y/N marks yes-no question\
123
- \n WHQ marks wh-question\
124
- \n RHQ marks rhetorical question\
125
- \n < Cond > marks conditional sentences\
126
- \n lower case marks directional verbs\
127
- \n ++ marks emphesis ('very' or 'a lot of')\
128
- \n \# marks lexical fingerspelling \
129
- \n \- marks space between individual letters of fingerspelling\
130
- \n \n <b>Note: This is a prototype and is still in development. \
131
- Do not use it in a production deployment.</b> \
132
- \n For additional details on how the program works, please see \
133
- [the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)"
134
-
135
- interface = gr.Interface(
136
- fn=getGlossFromText,
137
- inputs="textbox",
138
- outputs="text",
139
- title = title,
140
- description = description)
141
- #examples = [[("Prompt: Every year I buy my dad a gift \n", "Result: EVERY-YEAR<Ti>, MY DAD GIFT<T>, ME BUY<A>")]])
142
- # examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
143
- # ["If I don't travel often, I am sad."]])
144
- interface.launch()
145
-
146
-
147
-
148
- if __name__ == "__main__":
149
- main()
150
-
151
 
152
- # def getAnswer(query, texts = texts, embeddings = embeddings):
153
- # docsearch = FAISS.from_texts(texts, embeddings)
154
- # docs = docsearch.similarity_search(query)
155
- # chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
156
- # response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
157
- # #interum_q = list(response.keys())
158
- # interum_a = list(response.values())
159
- # q = query
160
- # a = interum_a[0]
161
- # return a
162
-
163
- # # query = "describe the fisher database"
164
- # # docs = docsearch.similarity_search(query)
165
- # # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
166
- # # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
167
- # title = "Query the S Drive!"
168
- # description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  # interface = gr.Interface(
171
- # fn=getAnswer,
172
  # inputs="textbox",
173
  # outputs="text",
174
  # title = title,
175
- # description = description,
176
- # examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
177
- # ["How many audio files do we have in the CallHome database?"]])
 
178
  # interface.launch()
179
 
180
 
@@ -182,9 +152,43 @@ if __name__ == "__main__":
182
  # if __name__ == "__main__":
183
  # main()
184
 
185
- # def main():
186
- # results = setMode()
187
- # print (results)
188
- # main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
 
 
 
 
 
1
  import os
2
+ secret = os.getenv("VerySecret")
3
+ print(secret)
4
+
5
+ # from pathlib import Path
6
+ # import gradio as gr
7
+ # #import openai
8
+ # import os
9
+ # import tiktoken
10
+ # from openai import OpenAI
11
+
12
+
13
+ # # Set secret key
14
+ # #HF_TOKEN = os.getenv("NextStar")
15
+
16
+ # # Set client and secret key
17
+ # client = OpenAI(api_key=os.getenv("NextStar"))
18
+
19
+ # #Set prompt engineering paths (so globally available)
20
+ # inStructionPath = "intro_instructions_combine.txt"
21
+ # inRulesPath = "formatting_rules_expanded.txt"
22
+ # inExamplesPath = "examples_longer1.txt"
23
+ # inDialoguesPath = "examples_dialogues.txt"
24
+
25
+ # #Set to read in prompting files
26
+ # def openReadFiles(inpath):
27
+ # infile = Path (inpath)
28
+ # with open(infile) as f:
29
+ # data = f.read()
30
+ # return data
31
+
32
+
33
+ # # Set up prompting data (so globally available)
34
+ # instruct = openReadFiles(inStructionPath)
35
+ # rules = openReadFiles(inRulesPath)
36
+ # examples = openReadFiles(inExamplesPath)
37
+ # exampleDialogues = openReadFiles(inDialoguesPath)
38
+
39
+
40
+ # def formatQuery(engText):
41
+ # """Add prompt instructions to English text for GPT4"""
42
+ # instruct = "Now, translate the following sentences to perfect ASL gloss using the grammatical, syntactic, and notation rules you just learned. \n\n"
43
+ # query = instruct+engText
44
+ # return query
45
+
46
+
47
+ # def num_tokens_from_string(string: str, encoding_name: str) -> int:
48
+ # """Returns the number of tokens in a text string."""
49
+ # encoding = tiktoken.get_encoding(encoding_name)
50
+ # num_tokens = len(encoding.encode(string))
51
+ # return num_tokens
52
+
53
+
54
+ # def checkTokens(tokens):
55
+ # """Checks tokens to ensrue we can translate to ASL gloss"""
56
+ # goAhead = None
57
+ # if tokens >= 553:
58
+ # print(f"Cannot translate to ASL gloss at this time: too many tokens ({tokens})")
59
+ # goAhead = False
60
+ # else:
61
+ # goAhead = True
62
+ # print(f"Number of tokens is acceptable: can continue translating")
63
+ # return goAhead
64
 
65
+ # def getGlossFromText(query):
66
+ # """Sets all for getting ASL gloss"""
67
+ # text = formatQuery(query)
68
+ # tokens = num_tokens_from_string(text, "cl100k_base")
69
+ # goAhead = checkTokens(tokens)
70
+ # if goAhead == True:
71
+ # results = getASLGloss(text)
72
+ # else:
73
+ # results = "Too many tokens: cannot translate"
74
+ # return results
75
+
76
+
77
+
78
+ # def getASLGloss(testQs):
79
+ # """Get ASL gloss from OpenAI using our prompt engineering"""
80
+ # #openai.api_key = HF_TOKENS
81
+ # completion = client.chat.completions.create(
82
+ # model = 'gpt-4-0125-preview',
83
+ # messages = [
84
+ # {"role": "system", "content": instruct},
85
+ # {"role": "system", "content": rules},
86
+ # {"role": "system", "content": examples},
87
+ # {"role": "system", "content": exampleDialogues},
88
+ # {"role": "user", "content": testQs},
89
+ # ],
90
 
91
+ # temperature = 0
92
+ # )
93
+ # #results = completion['choices'][0]['message']['content']
94
+ # results = completion.choices[0].message.content
95
+ # return results
96
 
97
 
98
 
99
 
100
+ # def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ # title = "English to ASL Gloss"
103
+ # #description = """Translate English text to ASL Gloss"""
104
+ # description = "This program uses GPT4 alongside prompt engineering to \
105
+ # translate English text to ASL gloss.\n \
106
+ # <b>Type in the English sentence you would like to translate into ASL Gloss.</b> \
107
+ # \n \n This program was last updated on February 27, 2024, and uses GPT4-Turbo (0125 preview version) \
108
+ # \n\n \
109
+ # \n \n This version of EngToASLGloss contains superscript notation which adds \
110
+ # grammatical context to assist in ASL generation. \
111
+ # \n Below are the guidelines we are using to express grammatical concepts \
112
+ # in ASL gloss.\
113
+ # Anything within the angle brackets < > indicates this additional grammatical notation.\
114
+ # If the angle brackets are directly next to a word, the notation inside \
115
+ # the angle brackets is associate with just that word, e.g. WILL < A >. \
116
+ # If the angle brackets are next to a whitespace after a word,\
117
+ # the notation inside the angle bracket is associated with all of the words\
118
+ # before it, up until a comma, another angle bracket, or a double space.\
119
+ # \n \n This sentence is an example of this rule:\
120
+ # \n NEXT-YEAR < Ti >, MY FIANCE < T >, TWO-OF-US MARRY \< A \>.\
121
+ # \n\r \
122
+ # \n The superscript notation options that will appear in results are as follows:\
123
+ # \n Ti marks time\
124
+ # \n T marks topic\
125
+ # \n A marks comment\
126
+ # \n Y/N marks yes-no question\
127
+ # \n WHQ marks wh-question\
128
+ # \n RHQ marks rhetorical question\
129
+ # \n < Cond > marks conditional sentences\
130
+ # \n lower case marks directional verbs\
131
+ # \n ++ marks emphesis ('very' or 'a lot of')\
132
+ # \n \# marks lexical fingerspelling \
133
+ # \n \- marks space between individual letters of fingerspelling\
134
+ # \n \n <b>Note: This is a prototype and is still in development. \
135
+ # Do not use it in a production deployment.</b> \
136
+ # \n For additional details on how the program works, please see \
137
+ # [the README](https://huggingface.co/spaces/rrakov/EngTexToASLGloss/blob/main/README.md)"
138
 
139
  # interface = gr.Interface(
140
+ # fn=getGlossFromText,
141
  # inputs="textbox",
142
  # outputs="text",
143
  # title = title,
144
+ # description = description)
145
+ # #examples = [[("Prompt: Every year I buy my dad a gift \n", "Result: EVERY-YEAR<Ti>, MY DAD GIFT<T>, ME BUY<A>")]])
146
+ # # examples=[["Every year I buy my dad a gift"], ["I always look forward to the family vacation"],
147
+ # # ["If I don't travel often, I am sad."]])
148
  # interface.launch()
149
 
150
 
 
152
  # if __name__ == "__main__":
153
  # main()
154
 
155
+
156
+ # # def getAnswer(query, texts = texts, embeddings = embeddings):
157
+ # # docsearch = FAISS.from_texts(texts, embeddings)
158
+ # # docs = docsearch.similarity_search(query)
159
+ # # chain = load_qa_chain(OpenAI(openai_api_key = HF_TOKEN, temperature=0), chain_type="map_reduce", return_map_steps=False)
160
+ # # response = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
161
+ # # #interum_q = list(response.keys())
162
+ # # interum_a = list(response.values())
163
+ # # q = query
164
+ # # a = interum_a[0]
165
+ # # return a
166
+
167
+ # # # query = "describe the fisher database"
168
+ # # # docs = docsearch.similarity_search(query)
169
+ # # # chain = load_qa_chain(OpenAI(openai_api_key = "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", temperature=0), chain_type="map_reduce", return_map_steps=False)
170
+ # # # chain({"input_documents": docs, "question": query}, return_only_outputs=True)
171
+ # # title = "Query the S Drive!"
172
+ # # description = """This QA system will answer questions based on information in [data descriptions](https://indeocorp-my.sharepoint.com/:x:/g/personal/rrakov_sorenson_com/EWhs_Gpp9nNEukR7iJLd4mQBPREngKdRGYpT545jX8mY4Q?e=9EeEWF)"""
173
+
174
+ # # interface = gr.Interface(
175
+ # # fn=getAnswer,
176
+ # # inputs="textbox",
177
+ # # outputs="text",
178
+ # # title = title,
179
+ # # description = description,
180
+ # # examples=[["Where is the Fisher database?"], ["Where is the Defined Crowd audio?"], ["Do we have any Spanish audio data?"],
181
+ # # ["How many audio files do we have in the CallHome database?"]])
182
+ # # interface.launch()
183
+
184
+
185
+
186
+ # # if __name__ == "__main__":
187
+ # # main()
188
+
189
+ # # def main():
190
+ # # results = setMode()
191
+ # # print (results)
192
+ # # main()
193
 
194