# -*- coding:utf-8 -*- import csv import datetime import getpass import hashlib import html import json import os import pickle import re import threading from enum import Enum from typing import List, Union from typing import TYPE_CHECKING import colorama import gradio as gr import pandas as pd import requests import tiktoken from loguru import logger from markdown import markdown from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_by_name from pypinyin import lazy_pinyin from src.config import retrieve_proxy, hide_history_when_not_logged_in, config_file from src.presets import ALREADY_CONVERTED_MARK, HISTORY_DIR, TEMPLATES_DIR, i18n, LOCAL_MODELS, ONLINE_MODELS from src.shared import state if TYPE_CHECKING: from typing import TypedDict class DataframeData(TypedDict): headers: List[str] data: List[List[Union[str, int, bool]]] def predict(current_model, *args): if current_model: iter = current_model.predict(*args) for i in iter: yield i def billing_info(current_model): if current_model: return current_model.billing_info() def set_key(current_model, *args): return current_model.set_key(*args) def load_chat_history(current_model, *args): return current_model.load_chat_history(*args) def delete_chat_history(current_model, *args): return current_model.delete_chat_history(*args) def interrupt(current_model, *args): return current_model.interrupt(*args) def reset(current_model, *args): if current_model: return current_model.reset(*args) def retry(current_model, *args): iter = current_model.retry(*args) for i in iter: yield i def delete_first_conversation(current_model, *args): return current_model.delete_first_conversation(*args) def delete_last_conversation(current_model, *args): return current_model.delete_last_conversation(*args) def set_system_prompt(current_model, *args): return current_model.set_system_prompt(*args) def rename_chat_history(current_model, *args): return current_model.rename_chat_history(*args) def auto_name_chat_history(current_model, *args): if current_model: return current_model.auto_name_chat_history(*args) def export_markdown(current_model, *args): return current_model.export_markdown(*args) def upload_chat_history(current_model, *args): return current_model.load_chat_history(*args) def set_token_upper_limit(current_model, *args): return current_model.set_token_upper_limit(*args) def set_temperature(current_model, *args): current_model.set_temperature(*args) def set_top_p(current_model, *args): current_model.set_top_p(*args) def set_n_choices(current_model, *args): current_model.set_n_choices(*args) def set_stop_sequence(current_model, *args): current_model.set_stop_sequence(*args) def set_max_tokens(current_model, *args): current_model.set_max_tokens(*args) def set_presence_penalty(current_model, *args): current_model.set_presence_penalty(*args) def set_frequency_penalty(current_model, *args): current_model.set_frequency_penalty(*args) def set_logit_bias(current_model, *args): current_model.set_logit_bias(*args) def set_user_identifier(current_model, *args): current_model.set_user_identifier(*args) def set_single_turn(current_model, *args): current_model.set_single_turn(*args) def handle_file_upload(current_model, *args): return current_model.handle_file_upload(*args) def handle_summarize_index(current_model, *args): return current_model.summarize_index(*args) def like(current_model, *args): return current_model.like(*args) def dislike(current_model, *args): return current_model.dislike(*args) def count_token(input_str): encoding = tiktoken.get_encoding("cl100k_base") if type(input_str) == dict: input_str = f"role: {input_str['role']}, content: {input_str['content']}" length = len(encoding.encode(input_str)) return length def markdown_to_html_with_syntax_highlight(md_str): # deprecated def replacer(match): lang = match.group(1) or "text" code = match.group(2) try: lexer = get_lexer_by_name(lang, stripall=True) except ValueError: lexer = get_lexer_by_name("text", stripall=True) formatter = HtmlFormatter() highlighted_code = highlight(code, lexer, formatter) return f'
{highlighted_code}
'
code_block_pattern = r"```(\w+)?\n([\s\S]+?)\n```"
md_str = re.sub(code_block_pattern, replacer, md_str, flags=re.MULTILINE)
html_str = markdown(md_str)
return html_str
def normalize_markdown(md_text: str) -> str: # deprecated
lines = md_text.split("\n")
normalized_lines = []
inside_list = False
for i, line in enumerate(lines):
if re.match(r"^(\d+\.|-|\*|\+)\s", line.strip()):
if not inside_list and i > 0 and lines[i - 1].strip() != "":
normalized_lines.append("")
inside_list = True
normalized_lines.append(line)
elif inside_list and line.strip() == "":
if i < len(lines) - 1 and not re.match(
r"^(\d+\.|-|\*|\+)\s", lines[i + 1].strip()
):
normalized_lines.append(line)
continue
else:
inside_list = False
normalized_lines.append(line)
return "\n".join(normalized_lines)
def convert_mdtext(md_text): # deprecated
code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
code_blocks = code_block_pattern.findall(md_text)
non_code_parts = code_block_pattern.split(md_text)[::2]
result = []
raw = f' '
for non_code, code in zip(non_code_parts, code_blocks + [""]):
if non_code.strip():
non_code = normalize_markdown(non_code)
result.append(markdown(non_code, extensions=["tables"]))
if code.strip():
# _, code = detect_language(code) # 暂时去除代码高亮功能,因为在大段代码的情况下会出现问题
# code = code.replace("\n\n", "\n") # 暂时去除代码中的空行,因为在大段代码的情况下会出现问题
code = f"\n```{code}\n\n```"
code = markdown_to_html_with_syntax_highlight(code)
result.append(code)
result = "".join(result)
output = f' '
output += raw
output += ALREADY_CONVERTED_MARK
return output
def clip_rawtext(chat_message, need_escape=True):
# first, clip hr line
hr_pattern = r'\n\n{escape_markdown(part)}' if need_escape else f'
{part}' ) else: part = part.replace(' data-fancybox="gallery"', '') final_message += part return final_message def convert_bot_before_marked(chat_message): """ 注意不能给输出加缩进, 否则会被marked解析成代码块 """ if '