ChatWorld / src /utils.py
JiangYH's picture
Upload folder using huggingface_hub
6f179e7 verified
from jinja2 import Template
from transformers import AutoModel, AutoTokenizer
from .logging import logging_info
def initEmbedding(model_name="BAAI/bge-small-zh-v1.5", **model_wargs):
return AutoModel.from_pretrained(model_name, **model_wargs)
def initTokenizer(model_name="BAAI/bge-small-zh-v1.5", **model_wargs):
return AutoTokenizer.from_pretrained(model_name, **model_wargs)
def detectEncoding(b: bytes):
import chardet
logging_info(f"chardet.detect(b): {chardet.detect(b)}")
return chardet.detect(b)["encoding"]
def convertToUTF8(b: bytes):
if detectEncoding(b):
return b.decode(detectEncoding(b))
return b.decode("utf-8")