Spaces:
Sleeping
Sleeping
import requests, re, json | |
def printInColor(text, color=None): | |
colorSet = { | |
"green": '\033[1;32m', | |
"blue": '\033[34m', | |
"red": '\033[1;31m', | |
"yellow": '\033[1;33m', | |
"magenta": '\033[1;35m', | |
"cyan": '\033[1;36m', | |
"gray": '\033[1;30m', | |
"bright_green": '\033[1;92m', | |
"bright_blue": '\033[1;94m', | |
"bright_red": '\033[1;91m', | |
"bright_yellow": '\033[1;93m', | |
"bright_magenta": '\033[1;95m', | |
"bright_cyan": '\033[1;96m', | |
"bright_white": '\033[1;97m', | |
"reset": '\033[0m', | |
} | |
if color == None: | |
color = colorSet['blue'] | |
else: | |
color = colorSet[color] | |
print(color + text + colorSet['reset']) | |
def remove_sentence_with_keyword(text, keywords): | |
for keyword in keywords: | |
# Regex pattern to match a sentence containing the keyword | |
pattern = r"[^.!?。?!]*?" + re.escape(keyword) + r"[^.!?。?!]*?[.!?。?!]+(\s|$)" | |
# Use re.sub to replace matching sentences with an empty string | |
text = re.sub(pattern, '', text) | |
return text | |
def remove_line_with_keyword(text, keyword): | |
# Create the pattern string, which matches the exact line containing the keyword | |
pattern = r'.*{}\.*\n'.format(keyword) | |
# Use the re.sub function to replace the matching line with an empty string | |
new_text = re.sub(pattern, '', text, flags=re.IGNORECASE) | |
return new_text | |
def extract_as(text): | |
pattern = f"<as>(.*?)</as>" | |
return re.findall(pattern, text) | |
def extract_last_sentence(text): | |
pattern = r"(\d+\.\s)([\s\S]*?)(?=\d+\.|$)" | |
paragraphs = re.findall(pattern, text) | |
if paragraphs: # If there are bullet points | |
text = paragraphs[-1][1] | |
printInColor(f"Last paragraph:\n{text}", 'bright_blue') | |
return text # Return the last paragraph | |
else: | |
return text # If no bullet points, return the whole text | |
def extract_json(text): | |
# Extract JSON string from the text using regular expressions | |
match = re.search(r'\{.*\}', text, re.DOTALL) | |
if match: | |
json_string = match.group(0) | |
# print(f"json_string: {json_string}") | |
# Load JSON data from the extracted JSON string | |
json_data = json.loads(json_string) | |
return json_data | |
else: | |
return {} | |
def extract_response(text): | |
pattern = r'#(.*?)#' | |
matches = re.findall(pattern, text) | |
if matches: | |
return matches[-1] | |
else: | |
return None | |
def extract_url(text): | |
pattern = r'https?://\S+' | |
match = re.search(pattern, text) | |
if match: | |
return match.group(0) | |
else: | |
return None | |
def has_url(text): | |
pattern = r'https?://\S+' | |
return bool(re.search(pattern, text)) | |
def truncate_text(text, maxLen=2200): | |
res = "".join(text) | |
if len(res) > maxLen: | |
res = res[:maxLen] | |
return res | |
def mock_tokencount(prompt): | |
return int(len(prompt)*0.45) | |
def truncate_prompt(basePrompt, processPrompt, latestInput, maxTokenCount=3200): | |
prompt = basePrompt + processPrompt + latestInput | |
currentPromptCount = mock_tokencount(prompt) | |
# print(f"total_tokens: {total_tokens}") | |
# If the total tokens exceed the maximum, truncate the process prompt | |
if currentPromptCount > maxTokenCount: | |
# print(f"Token exceeds:{total_tokens - max_tokens}") | |
baseTokenCount = mock_tokencount(basePrompt) | |
# print("base_tokens: " + str(base_tokens)) | |
availableTokenCount = maxTokenCount - baseTokenCount | |
availablePrompt = processPrompt + latestInput | |
truncate_text = availablePrompt[maxTokenCount-availableTokenCount:] | |
prompt = basePrompt + truncate_text | |
# print("prompt2: " + prompt) | |
currentPromptCount = mock_tokencount(prompt) | |
printInColor(f"truncated prompt length: {currentPromptCount}", "bright_red") | |
return prompt | |