import requests, re, json def printInColor(text, color=None): colorSet = { "green": '\033[1;32m', "blue": '\033[34m', "red": '\033[1;31m', "yellow": '\033[1;33m', "magenta": '\033[1;35m', "cyan": '\033[1;36m', "gray": '\033[1;30m', "bright_green": '\033[1;92m', "bright_blue": '\033[1;94m', "bright_red": '\033[1;91m', "bright_yellow": '\033[1;93m', "bright_magenta": '\033[1;95m', "bright_cyan": '\033[1;96m', "bright_white": '\033[1;97m', "reset": '\033[0m', } if color == None: color = colorSet['blue'] else: color = colorSet[color] print(color + text + colorSet['reset']) def remove_sentence_with_keyword(text, keywords): for keyword in keywords: # Regex pattern to match a sentence containing the keyword pattern = r"[^.!?。?!]*?" + re.escape(keyword) + r"[^.!?。?!]*?[.!?。?!]+(\s|$)" # Use re.sub to replace matching sentences with an empty string text = re.sub(pattern, '', text) return text def remove_line_with_keyword(text, keyword): # Create the pattern string, which matches the exact line containing the keyword pattern = r'.*{}\.*\n'.format(keyword) # Use the re.sub function to replace the matching line with an empty string new_text = re.sub(pattern, '', text, flags=re.IGNORECASE) return new_text def extract_as(text): pattern = f"(.*?)" return re.findall(pattern, text) def extract_last_sentence(text): pattern = r"(\d+\.\s)([\s\S]*?)(?=\d+\.|$)" paragraphs = re.findall(pattern, text) if paragraphs: # If there are bullet points text = paragraphs[-1][1] printInColor(f"Last paragraph:\n{text}", 'bright_blue') return text # Return the last paragraph else: return text # If no bullet points, return the whole text def extract_json(text): # Extract JSON string from the text using regular expressions match = re.search(r'\{.*\}', text, re.DOTALL) if match: json_string = match.group(0) # print(f"json_string: {json_string}") # Load JSON data from the extracted JSON string json_data = json.loads(json_string) return json_data else: return {} def extract_response(text): pattern = r'#(.*?)#' matches = re.findall(pattern, text) if matches: return matches[-1] else: return None def extract_url(text): pattern = r'https?://\S+' match = re.search(pattern, text) if match: return match.group(0) else: return None def has_url(text): pattern = r'https?://\S+' return bool(re.search(pattern, text)) def truncate_text(text, maxLen=2200): res = "".join(text) if len(res) > maxLen: res = res[:maxLen] return res def mock_tokencount(prompt): return int(len(prompt)*0.45) def truncate_prompt(basePrompt, processPrompt, latestInput, maxTokenCount=3200): prompt = basePrompt + processPrompt + latestInput currentPromptCount = mock_tokencount(prompt) # print(f"total_tokens: {total_tokens}") # If the total tokens exceed the maximum, truncate the process prompt if currentPromptCount > maxTokenCount: # print(f"Token exceeds:{total_tokens - max_tokens}") baseTokenCount = mock_tokencount(basePrompt) # print("base_tokens: " + str(base_tokens)) availableTokenCount = maxTokenCount - baseTokenCount availablePrompt = processPrompt + latestInput truncate_text = availablePrompt[maxTokenCount-availableTokenCount:] prompt = basePrompt + truncate_text # print("prompt2: " + prompt) currentPromptCount = mock_tokencount(prompt) printInColor(f"truncated prompt length: {currentPromptCount}", "bright_red") return prompt