Spaces:
Running
Running
Update helper_functions_api.py
Browse files- helper_functions_api.py +25 -30
helper_functions_api.py
CHANGED
@@ -96,10 +96,8 @@ Guidelines for extraction:
|
|
96 |
SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
|
97 |
SysPromptSearch = """You are a search query generator, create a concise Google search query, focusing only on the main topic and omitting additional redundant details, include year if necessory, 2024, Do not add any additional comments. OUTPUT ONLY THE SEARCH QUERY
|
98 |
#Additional instructions:
|
99 |
-
##Use the following search
|
100 |
-
OR #to cover multiple topics
|
101 |
-
* #wildcard to match any word or phrase
|
102 |
-
AND #to include specific topics."""
|
103 |
|
104 |
import tiktoken # Used to limit tokens
|
105 |
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") # Instead of Llama3 using available option/ replace if found anything better
|
@@ -172,32 +170,29 @@ def remove_stopwords(text):
|
|
172 |
return ' '.join(filtered_text)
|
173 |
|
174 |
def rephrase_content(data_format, content, query):
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
max_tokens=500,
|
199 |
-
)
|
200 |
-
|
201 |
|
202 |
def fetch_content(url):
|
203 |
try:
|
|
|
96 |
SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
|
97 |
SysPromptSearch = """You are a search query generator, create a concise Google search query, focusing only on the main topic and omitting additional redundant details, include year if necessory, 2024, Do not add any additional comments. OUTPUT ONLY THE SEARCH QUERY
|
98 |
#Additional instructions:
|
99 |
+
##Use the following search operator if necessory
|
100 |
+
OR #to cover multiple topics"""
|
|
|
|
|
101 |
|
102 |
import tiktoken # Used to limit tokens
|
103 |
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") # Instead of Llama3 using available option/ replace if found anything better
|
|
|
170 |
return ' '.join(filtered_text)
|
171 |
|
172 |
def rephrase_content(data_format, content, query):
|
173 |
+
try:
|
174 |
+
if data_format == "Structured data":
|
175 |
+
return together_response(
|
176 |
+
f"""return only the relevant information regarding the query: {{{query}}}. Output should be concise chunks of \
|
177 |
+
paragraphs or tables or both, extracted from the following scraped context {{{limit_tokens(content,token_limit=2000)}}}""",
|
178 |
+
SysPrompt=SysPromptData,
|
179 |
+
max_tokens=900,
|
180 |
+
)
|
181 |
+
elif data_format == "Quantitative data":
|
182 |
+
return together_response(
|
183 |
+
f"return only the numerical or quantitative data regarding the query: {{{query}}} structured into .md tables, using the scraped context:{{{limit_tokens(content,token_limit=2000)}}}",
|
184 |
+
SysPrompt=SysPromptData,
|
185 |
+
max_tokens=500,
|
186 |
+
)
|
187 |
+
else:
|
188 |
+
return together_response(
|
189 |
+
f"return only the relevant information regarding the query: {{{query}}} using the scraped context:{{{limit_tokens(content,token_limit=2000)}}}",
|
190 |
+
SysPrompt=SysPromptData,
|
191 |
+
max_tokens=500,
|
192 |
+
)
|
193 |
+
except Exception as e:
|
194 |
+
print(f"An error occurred: {str(e)}")
|
195 |
+
return limit_tokens(content,token_limit=500)
|
|
|
|
|
|
|
196 |
|
197 |
def fetch_content(url):
|
198 |
try:
|