|
from transformers import AutoTokenizer, AutoModel |
|
from langchain.document_loaders import TextLoader |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.llms import HuggingFaceHub |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.vectorstores import FAISS |
|
import datetime |
|
|
|
|
|
model_name = "sentence-transformers/all-mpnet-base-v2" |
|
model_kwargs = {'device': 'cpu'} |
|
encode_kwargs = {'normalize_embeddings': False} |
|
eb = HuggingFaceEmbeddings( |
|
model_name=model_name, |
|
model_kwargs=model_kwargs, |
|
encode_kwargs=encode_kwargs |
|
) |
|
|
|
loader = TextLoader("./test.txt") |
|
docs = loader.load() |
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=50) |
|
tl = [] |
|
for doc in docs: |
|
for t in text_splitter.split_text(doc.page_content): |
|
tl.append(t) |
|
|
|
vector_store = FAISS.from_texts(tl, eb) |
|
query = '为什么client不直接从data server拉取数据' |
|
docs = vector_store.similarity_search(query, k=4) |
|
|
|
|
|
context = '' |
|
for i in range(len(docs)): |
|
context += f'{i+1}、{docs[i].page_content}\n' |
|
prompt = f'已知:\n{context}\n问题:\n{query}' |
|
print(prompt) |
|
|
|
print(f'[{datetime.datetime.now()}]start to init model') |
|
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True) |
|
model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).float() |
|
model = model.eval() |
|
print(f'[{datetime.datetime.now()}]model ready, start to chat') |
|
response, history = model.chat(tokenizer, prompt, history=[]) |
|
|
|
print(f'[{datetime.datetime.now()}]response:{response}') |
|
|