Spaces:

JiangYH
/

ChatWorld

Sleeping

App Files Files Community

JiangYH commited on Mar 4

Commit

6f179e7

•

1 Parent(s): 87818fb

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

.gitignore +6 -0
README.md +7 -0
app.py +100 -39
main.py +95 -0
requirements.txt +4 -0
run_fastapi.sh +5 -0
run_gradio.sh +3 -3
src/ChatWorld.py +157 -0
src/DataBase/BaseDB.py +61 -0
src/DataBase/ChromaDB.py +49 -0
src/DataBase/__init__.py +3 -0
src/Models/__init__.py +3 -0
src/Models/models.py +63 -0
src/Response.py +12 -0
src/__init__.py +3 -0
src/logging.py +16 -0
src/user.py +23 -0
src/utils.py +27 -0

.gitignore CHANGED Viewed

@@ -158,3 +158,9 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/

 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
+.vscode
+data/
+uploads/
+clash*
+*.yml

README.md CHANGED Viewed

@@ -4,3 +4,10 @@ app_file: app.py
 sdk: gradio
 sdk_version: 3.50.2
 ---

 sdk: gradio
 sdk_version: 3.50.2
 ---
+python 3.9.18
+# TODO
+- 对话流传输
+- 持久化
+- 多轮对话 历史

app.py CHANGED Viewed

@@ -1,57 +1,87 @@
 import logging
-import os
 import gradio as gr
-from ChatWorld import ChatWorld
-logging.basicConfig(level=logging.INFO, filename="demo.log", filemode="w",
-                    format="%(asctime)s - %(name)s - %(levelname)-9s - %(filename)-8s : %(lineno)s line - %(message)s",
-                    datefmt="%Y-%m-%d %H:%M:%S")
 chatWorld = ChatWorld()
 role_name_list_global = None
 def getContent(input_file):
     # 读取文件内容
-    with open(input_file.name, 'r', encoding='utf-8') as f:
         logging.info(f"read file {input_file.name}")
         input_text = f.read()
         logging.info(f"file content: {input_text}")
-    # 保存文件内容
-    input_text_list = input_text.split("\n")
-    chatWorld.initDB(input_text_list)
-    role_name_set = set()
-    # 读取角色名
-    for line in input_text_list:
-        role_name_set.add(line.split(":")[0])
-    role_name_list = [i for i in role_name_set if i != ""]
-    logging.info(f"role_name_list: {role_name_list}")
     global role_name_list_global
     role_name_list_global = role_name_list
-    return gr.Radio(choices=role_name_list, interactive=True, value=role_name_list[0]), gr.Radio(choices=role_name_list, interactive=True, value=role_name_list[-1])
-def submit_message(message, history, model_role_name, role_name, model_role_nickname, role_nickname):
-    print(f"history: {history}")
-    chatWorld.setRoleName(model_role_name, model_role_nickname)
-    response = chatWorld.chat(message,
-                              role_name, role_nickname, use_local_model=True)
     return response
-def submit_message_api(message, history, model_role_name, role_name, model_role_nickname, role_nickname):
-    print(f"history: {history}")
-    chatWorld.setRoleName(model_role_name, model_role_nickname)
-    response = chatWorld.chat(message,
-                              role_name, role_nickname, use_local_model=False)
     return response
@@ -63,8 +93,13 @@ def get_role_list():
         return []
-with gr.Blocks() as demo:
     upload_c = gr.File(label="上传文档文件")
     with gr.Row():
@@ -75,15 +110,41 @@ with gr.Blocks() as demo:
         role_name = gr.Radio(get_role_list(), label="角色名")
         role_nickname = gr.Textbox(label="角色昵称")
-    upload_c.upload(fn=getContent, inputs=upload_c,
-                    outputs=[model_role_name, role_name])
     with gr.Row():
         chatBox_local = gr.ChatInterface(
-            submit_message, chatbot=gr.Chatbot(height=400, label="本地模型", render=False), additional_inputs=[model_role_name, role_name, model_role_nickname, role_nickname])
         chatBox_api = gr.ChatInterface(
-            submit_message_api, chatbot=gr.Chatbot(height=400, label="API模型", render=False), additional_inputs=[model_role_name, role_name, model_role_nickname, role_nickname])
-demo.launch(server_name="0.0.0.0")

 import logging
 import gradio as gr
+from src import ChatWorld
 chatWorld = ChatWorld()
 role_name_list_global = None
+role_name_dict_global = None
 def getContent(input_file):
     # 读取文件内容
+    with open(input_file.name, "r", encoding="utf-8") as f:
         logging.info(f"read file {input_file.name}")
         input_text = f.read()
         logging.info(f"file content: {input_text}")
+    chatWorld.setStory(stories=input_text, metas=None)
+    # 保存文件内容
+    role_name_list, role_name_dict = chatWorld.getRoleNameFromFile(input_text)
     global role_name_list_global
     role_name_list_global = role_name_list
+    global role_name_dict_global
+    role_name_dict_global = role_name_dict
+    return (
+        gr.Radio(choices=role_name_list, interactive=True, value=role_name_list[0]),
+        gr.Radio(choices=role_name_list, interactive=True, value=role_name_list[-1]),
+    )
+def submit_message(
+    message,
+    history,
+    model_role_name,
+    role_name,
+    model_role_nickname,
+    role_nickname,
+    withCharacter,
+):
+    if withCharacter:
+        response = chatWorld.chatWithCharacter(
+            text=message,
+            role_name=role_name,
+            role_nickname=role_nickname,
+            model_role_name=model_role_name,
+            model_role_nickname=model_role_nickname,
+            use_local_model=True,
+        )
+    else:
+        response = chatWorld.chatWithoutCharacter(
+            text=message,
+            use_local_model=True,
+        )
     return response
+def submit_message_api(
+    message,
+    history,
+    model_role_name,
+    role_name,
+    model_role_nickname,
+    role_nickname,
+    withCharacter,
+):
+    if withCharacter:
+        response = chatWorld.chatWithCharacter(
+            text=message,
+            role_name=role_name,
+            role_nickname=role_nickname,
+            model_role_name=model_role_name,
+            model_role_nickname=model_role_nickname,
+            use_local_model=False,
+        )
+    else:
+        response = chatWorld.chatWithoutCharacter(
+            text=message,
+            use_local_model=False,
+        )
     return response
         return []
+def change_role_list(name):
+    global role_name_dict_global
+    return role_name_dict_global[name]
+with gr.Blocks() as demo:
     upload_c = gr.File(label="上传文档文件")
     with gr.Row():
         role_name = gr.Radio(get_role_list(), label="角色名")
         role_nickname = gr.Textbox(label="角色昵称")
+    model_role_name.change(
+        fn=change_role_list, inputs=[model_role_name], outputs=[model_role_nickname]
+    )
+    role_name.change(fn=change_role_list, inputs=[role_name], outputs=[role_nickname])
+    upload_c.upload(
+        fn=getContent, inputs=upload_c, outputs=[model_role_name, role_name]
+    )
+    withCharacter = gr.Radio([True, False], value=True, label="是否进行角色扮演")
     with gr.Row():
         chatBox_local = gr.ChatInterface(
+            submit_message,
+            chatbot=gr.Chatbot(height=400, label="本地模型", render=False),
+            additional_inputs=[
+                model_role_name,
+                role_name,
+                model_role_nickname,
+                role_nickname,
+                withCharacter,
+            ],
+        )
         chatBox_api = gr.ChatInterface(
+            submit_message_api,
+            chatbot=gr.Chatbot(height=400, label="API模型", render=False),
+            additional_inputs=[
+                model_role_name,
+                role_name,
+                model_role_nickname,
+                role_nickname,
+                withCharacter,
+            ],
+        )
+demo.launch(share=True, server_name="0.0.0.0")

main.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import hashlib
+import os
+import time
+from fastapi import Body, FastAPI, File, Form, HTTPException, Response, UploadFile
+from fastapi.responses import JSONResponse
+import uvicorn
+from src import ChatWorld
+from src.Response import ChatResponse, FileResponse
+from src.logging import logging_info
+from src.user import UUID, Role, User
+from src.utils import convertToUTF8
+app = FastAPI()
+chatWorld = ChatWorld()
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+SAVE_DIR = "uploads"
+user_info: dict[UUID, User] = dict()
+@app.post("/uploadFile", response_model=FileResponse)
+def upload_file(uuid: str = Form(), file: UploadFile = File(...)):
+    if not os.path.exists(os.path.join(BASE_DIR, SAVE_DIR)):
+        os.makedirs(os.path.join(BASE_DIR, SAVE_DIR))
+    file_name = f"{time.time_ns()}_{uuid}_{file.filename}"
+    file_path = os.path.join(BASE_DIR, SAVE_DIR, file_name)
+    file_content = file.file.read()
+    with open(file_path, "wb") as f:
+        f.write(file_content)
+    file_content_utf8 = convertToUTF8(file_content)
+    chatWorld.setStory(
+        stories=file_content_utf8,
+        metas={
+            "uuid": uuid,
+        },
+    )
+    user_info[uuid] = User()
+    print(user_info)
+    role_name_list,role_name_dict = chatWorld.getRoleNameFromFile(file_content_utf8)
+    return FileResponse(
+        filename=file_name,
+        role_name_list=role_name_list,
+        role_name_dict=role_name_dict,
+        md5=hashlib.md5(file_content).hexdigest(),
+    )
+@app.post("/chatWithCharacter", response_model=ChatResponse)
+def chatWithCharacter(
+    uuid: str = Body(...),
+    text: str = Body(...),
+    use_local_model: bool = Body(False),
+    top_k: int = Body(5),
+    role_info: Role = Body(...),
+):
+    user = user_info.get(uuid)
+    if not user:
+        raise HTTPException(status_code=400, detail="User not found")
+    user_info[uuid] = user.update(role_info.model_dump())
+    logging_info(f"user_info: {user_info}")
+    response = chatWorld.chatWithCharacter(
+        text=text,
+        use_local_model=use_local_model,
+        top_k=top_k,
+        **role_info.model_dump(),
+        metas={"uuid": uuid},
+    )
+    return ChatResponse(response=response)
+# @app.post("/chatWithoutCharacter")
+# def chatWithoutCharacter(
+#     uuid: str = Body(...),
+#     text: str = Body(...),
+#     use_local_model: bool = Body(...),
+# ):
+#     pass
+if __name__ == "__main__":
+    uvicorn.run("main:app", host="0.0.0.0", port=8000)

requirements.txt CHANGED Viewed

@@ -5,3 +5,7 @@ transformers==4.38.1
 accelerate
 zhipuai
 sentencepiece

 accelerate
 zhipuai
 sentencepiece
+tiktoken
+sentence-transformers
+langchain
+chromadb

run_fastapi.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+export CUDA_VISIBLE_DEVICES=0
+export HF_ENDPOINT="https://hf-mirror.com"
+# Start the gradio server
+python main.py

run_gradio.sh CHANGED Viewed

@@ -1,5 +1,5 @@
-export CUDA_VISIBLE_DEVICES=0
-export HF_HOME="/workspace/jyh/.cache/huggingface"
 # Start the gradio server
-/workspace/jyh/miniconda3/envs/ChatWorld/bin/python /workspace/jyh/Zero-Haruhi/app.py

+export CUDA_VISIBLE_DEVICES=3
+export HF_ENDPOINT="https://hf-mirror.com"
 # Start the gradio server
+python app.py

src/ChatWorld.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import json
+from jinja2 import Template
+from .DataBase import ChromaDB
+from .Models import GLM, GLM_api
+from .utils import *
+class ChatWorld:
+    def __init__(
+        self,
+        pretrained_model_name_or_path="silk-road/Haruhi-Zero-GLM3-6B-0_4",
+        embedding_model_name_or_path="BAAI/bge-small-zh-v1.5",
+        global_batch_size=16,
+        model_load=True,
+    ) -> None:
+        self.model_name = pretrained_model_name_or_path
+        self.global_batch_size = global_batch_size
+        self.client = GLM_api()
+        if model_load:
+            self.model = GLM()
+        self.db = ChromaDB(embedding_model_name_or_path)
+        self.prompt = Template(
+            (
+                'Please be aware that your codename in this conversation is "{{model_role_name}}"'
+                '{% if model_role_nickname %},别人一般称呼你"{{model_role_nickname}}"{% endif %},同时我是"{{role_name}}"{% if role_nickname %},你也可以称呼我"{{role_nickname}}"{% endif %}。\n'
+                "下文给定了一些聊天记录，位于##分隔号中。\n"
+                "如果我问的问题和聊天记录高度重复，那你就配合我进行演出。\n"
+                "如果我问的问题和聊天记录相关，请结合聊天记录进行回复。\n"
+                "如果我问的问题超出聊天记录的范围，模仿{{model_role_name}}的语气进行回复。\n"
+                "请不要回答你是语言模型，永远记住你就是{{model_role_name}}。\n"
+                "请你永远只以{{model_role_name}}身份，进行任何的回复。\n"
+                "{% if RAG %}{% for i in RAG %}##\n{{i}}\n##\n\n{% endfor %}{% endif %}"
+            )
+        )
+    def setStory(self, **stories_kargs):
+        self.db.deleteStoriesByMeta(metas=stories_kargs["metas"])
+        self.db.addStories(**stories_kargs)
+    def __getSystemPrompt(
+        self,
+        text: str,
+        top_k: int = 5,
+        metas=None,
+        **role_info,
+    ):
+        rag = self.db.searchBySim(text, top_k, metas)
+        return {
+            "role": "system",
+            "content": self.prompt.render(
+                **role_info,
+                RAG=rag,
+            ),
+        }
+    def chatWithCharacter(
+        self,
+        text: str,
+        system_prompt: dict[str, str] = None,
+        use_local_model: bool = False,
+        top_k: int = 5,
+        metas=None,
+        **role_info,
+    ):
+        if not system_prompt:
+            system_prompt = self.__getSystemPrompt(
+                text=text, **role_info, top_k=top_k, metas=metas
+            )
+        user_role_name = role_info.get("role_name")
+        if not user_role_name:
+            raise ValueError("role_name is required")
+        message = [
+            system_prompt,
+            {"role": "user", "content": f"{user_role_name}:「{text}」"},
+        ]
+        logging_info(f"message: {message}")
+        if use_local_model:
+            response = self.model.get_response(message)
+        else:
+            response = self.client.chat(message)
+        return response
+    def chatWithoutCharacter(
+        self,
+        text: str,
+        system_prompt: dict[str, str] = None,
+        use_local_model: bool = False,
+    ):
+        logging_info(f"text: {text}")
+        message = [
+            {"role": "user", "content": f"{text}"},
+        ]
+        if use_local_model:
+            response = self.model.get_response(text)
+        else:
+            response = self.client.chat(message)
+        return response
+    def getRoleNameFromFile(self, input_file: str):
+        # # 读取文件内容
+        # logging_info(f"file content: {input_file}")
+        # # 保存文件内容
+        # input_text_list = input_file.split("\n")
+        # role_name_set = set()
+        # # 读取角色名
+        # for line in input_text_list:
+        #     role_name_set.add(line.split(":")[0])
+        # role_name_list = [i for i in role_name_set if i != ""]
+        # logging_info(f"role_name_list: {role_name_list}")
+        prompt = (
+            f"{input_file}\n"
+            + '请你提取包含“人”(name,nickname)类型的所有信息，如果nickname不存在则设置为空字符串，并输出JSON格式。并且不要提取出重复的同一个人。例如格式如下：\n```json\n [{"name": "小明","nickname": "小明"},{"name": "小红","nickname": ""}]```'
+        )
+        respense = self.chatWithoutCharacter(prompt, use_local_model=False)
+        json_start_index = respense.find("```json")
+        json_end_index = respense.find("```", json_start_index + 1)
+        json_str = respense[json_start_index + 7 : json_end_index]
+        print(json_str)
+        try:
+            json_str = json.loads(json_str)
+            role_name_list = [i["name"] for i in json_str]
+            role_name_dict = {i["name"]: i["nickname"] for i in json_str}
+        except Exception as e:
+            print(e)
+            role_name_list = []
+            role_name_dict = {}
+        return role_name_list, role_name_dict

src/DataBase/BaseDB.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from abc import ABCMeta, abstractmethod
+from typing import Union
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from transformers import AutoTokenizer
+from langchain.text_splitter import TokenTextSplitter
+from langchain_core.documents import Document
+class BaseDB(metaclass=ABCMeta):
+    def __init__(self, embedding_name: str = None, persist_dir=None) -> None:
+        super().__init__()
+        self.client = None
+        if persist_dir:
+            self.persist_dir = persist_dir
+        else:
+            self.persist_dir = "data"
+        if not embedding_name:
+            embedding_name = "BAAI/bge-small-zh-v1.5"
+        self.embedding = HuggingFaceEmbeddings(model_name=embedding_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(embedding_name)
+        self.init_db()
+    @abstractmethod
+    def init_db(self):
+        pass
+    def text_splitter(
+        self, text: Union[str, Document], chunk_size=300, chunk_overlap=10
+    ):
+        if isinstance(text, Document):
+            return TokenTextSplitter.from_huggingface_tokenizer(
+                self.tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap
+            ).split_documents(text)
+        elif isinstance(text, str):
+            return TokenTextSplitter.from_huggingface_tokenizer(
+                self.tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap
+            ).split_text(text)
+        else:
+            raise ValueError("text must be a str or Document")
+    @abstractmethod
+    def addStories(self, stories, metas=None):
+        pass
+    @abstractmethod
+    def deleteStoriesByMeta(self, metas):
+        pass
+    @abstractmethod
+    def searchBySim(self, query, n_results, metas, only_return_document=True):
+        pass
+    @abstractmethod
+    def searchByMeta(self, metas=None):
+        pass

src/DataBase/ChromaDB.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import logging
+from langchain_community.vectorstores.chroma import Chroma
+from src.logging import logging_info
+from .BaseDB import BaseDB
+# TODO 数据库持久化 和 用户进入的加载。
+class ChromaDB(BaseDB):
+    def __init__(self, embedding_name: str = None, persist_dir=None) -> None:
+        super().__init__(embedding_name, persist_dir)
+        # logging_info(self.embedding)
+    def init_db(self):
+        self.client = Chroma(
+            persist_directory=self.persist_dir, embedding_function=self.embedding
+        )
+    def addStories(self, stories: str, metas: dict = None):
+        logging_info(self.text_splitter(stories)[-1])
+        split_stories = self.text_splitter(stories)
+        self.client.add_texts(
+            texts=split_stories, metadatas=[metas] * len(split_stories)
+        )
+    def searchBySim(
+        self, query, n_results=5, metas: dict = None, only_return_document=True
+    ):
+        result = self.client.similarity_search(query, k=n_results, filter=metas)
+        # print(result)
+        if only_return_document:
+            return [i.page_content for i in result]
+        return result
+    def deleteStoriesByMeta(self, metas):
+        ids = self.searchByMeta(metas=metas)["ids"]
+        if ids:
+            self.client.delete(ids)
+    def searchByMeta(self, metas=None, include: list[str] = None) -> dict[str, any]:
+        return self.client.get(where=metas, include=include)

src/DataBase/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .ChromaDB import ChromaDB
2	+
3	+ __all__ = ['ChromaDB']

src/Models/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .models import GLM,GLM_api
2	+
3	+ __all__ = ["GLM", "GLM_api"]

src/Models/models.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+from string import Template
+from typing import Dict, List, Union
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from zhipuai import ZhipuAI
+class GLM:
+    def __init__(self, model_name="silk-road/Haruhi-Zero-GLM3-6B-0_4"):
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_name, trust_remote_code=True
+        )
+        client = AutoModelForCausalLM.from_pretrained(
+            model_name, trust_remote_code=True, device_map="auto"
+        )
+        self.client = client.eval()
+    def message2query(self, messages) -> str:
+        # [{'role': 'user', 'content': '老师: 同学请自我介绍一下'}]
+        # <|system|>
+        # You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown.
+        # <|user|>
+        # Hello
+        # <|assistant|>
+        # Hello, I'm ChatGLM3. What can I assist you today?
+        template = Template("<|$role|>\n$content\n")
+        return "".join([template.substitute(message) for message in messages])
+    def get_response(
+        self,
+        message: Union[str, list[dict[str, str]]],
+        history: List[Dict[str, str]] = None,
+    ):
+        if isinstance(message, str):
+            response, history = self.client.chat(self.tokenizer, message)
+        elif isinstance(message, list):
+            response, history = self.client.chat(
+                self.tokenizer, message[-1]["content"],history=message[:-1]
+            )
+        # print(self.message2query(message))
+        print(response)
+        return response
+class GLM_api:
+    def __init__(self, model_name="glm-4"):
+        API_KEY = os.environ.get("ZHIPU_API_KEY")
+        self.client = ZhipuAI(api_key=API_KEY)
+        self.model = model_name
+    def chat(self, message):
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model, messages=message
+            )
+        except Exception as e:
+            print(e)
+            return "模型连接失败"
+        return response.choices[0].message.content

src/Response.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from pydantic import BaseModel
+class ChatResponse(BaseModel):
+    response: str
+class FileResponse(BaseModel):
+    filename: str
+    role_name_list: list[str] = []
+    role_name_dict: dict[str, str] = {}
+    md5: str = None

src/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .ChatWorld import ChatWorld
2	+
3	+ __all__ = ['ChatWorld']

src/logging.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import logging
+logging.basicConfig(
+    level=logging.DEBUG,
+    filename="demo.log",
+    filemode="w",
+    format="%(asctime)s - %(name)s - %(levelname)-9s - %(filename)-8s : %(lineno)s line - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+# Path: src/logging.py
+def logging_info(text: str):
+    logging.info(text)

src/user.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from pydantic import BaseModel
+UUID = str
+class Role(BaseModel):
+    role_name: str
+    role_nickname: str = None
+    model_role_name: str
+    model_role_nickname: str = None
+class User:
+    history: list[str] = []
+    role_name: str = None
+    role_nickname: str = None
+    model_role_name: str = None
+    model_role_nickname: str = None
+    def update(self, new_properties: dict) -> "User":
+        for k, v in new_properties.items():
+            setattr(self, k, v)
+        return self

src/utils.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from jinja2 import Template
+from transformers import AutoModel, AutoTokenizer
+from .logging import logging_info
+def initEmbedding(model_name="BAAI/bge-small-zh-v1.5", **model_wargs):
+    return AutoModel.from_pretrained(model_name, **model_wargs)
+def initTokenizer(model_name="BAAI/bge-small-zh-v1.5", **model_wargs):
+    return AutoTokenizer.from_pretrained(model_name, **model_wargs)
+def detectEncoding(b: bytes):
+    import chardet
+    logging_info(f"chardet.detect(b): {chardet.detect(b)}")
+    return chardet.detect(b)["encoding"]
+def convertToUTF8(b: bytes):
+    if detectEncoding(b):
+        return b.decode(detectEncoding(b))
+    return b.decode("utf-8")