""" 1. 完成了用Qwen通义千问作为知识库查询。 1. 总共有三个区块:知识库回答,应用来源,相关问题。 1. 在Huggingface的API上部署了一个在线BGE的模型,用于回答问题。OpenAI的Emebedding或者Langchain的Embedding都不可以用(会报错: self.d)。 注意事项: 1. langchain_KB.py中的代码是用来构建本地知识库的,里面的embeddings需要与rag_response_002.py中的embeddings一致。否则会出错! """ ##TODO: # -*- coding: utf-8 -*- import requests import streamlit as st import openai import os import numpy as np import pandas as pd import csv import tempfile from tempfile import NamedTemporaryFile import pathlib from pathlib import Path import re from re import sub import matplotlib.pyplot as plt from itertools import product from tqdm import tqdm_notebook, tqdm, trange import time from time import sleep from matplotlib.pyplot import style from rich import print import warnings import streamlit_authenticator as stauth # from langchain.vectorstores import FAISS from langchain_community.vectorstores import FAISS from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from langchain_core.runnables import RunnableParallel from langchain.llms.base import LLM from langchain.llms.utils import enforce_stop_tokens from typing import Dict, List, Optional, Tuple, Union import requests import json import streamlit as st # import rag_reponse_001 import qwen_response import rag_reponse_002 # import chatgpt # from st_copy_to_clipboard import st_copy_to_clipboard import clipboard import dashscope from dotenv import load_dotenv # warnings.filterwarnings('ignore') from datetime import datetime import pytz from pytz import timezone # def get_current_time(): # beijing_tz = timezone('Asia/Shanghai') # beijing_time = datetime.now(beijing_tz) # current_time = beijing_time.strftime('%H:%M:%S') # return current_time load_dotenv() ### 设置openai的API key os.environ["OPENAI_API_KEY"] = os.environ['user_token'] openai.api_key = os.environ['user_token'] bing_search_api_key = os.environ['bing_api_key'] dashscope.api_key = os.environ['dashscope_api_key'] ### Streamlit页面设定。 st.set_page_config(layout="wide") st.title("本地化国产大模型智能知识库查询演示") # st.title("大语言模型智能知识库查询中心") # st.title("大语言模型本地知识库问答系统") # st.subheader("Large Language Model-based Knowledge Base QA System") # st.warning("_声明:内容由人工智能生成,仅供参考。如果您本人使用或对外传播本服务生成的输出,您应当主动核查输出内容的真实性、准确性,避免传播虚假信息。_") st.caption("_声明:内容由人工智能生成,仅供参考。您应当主动核查输出内容的真实性、准确性,避免传播虚假信息。_") # st.caption("_声明:内容由人工智能生成,仅供参考。如果您本人使用或对外传播本服务生成的输出,您应当主动核查输出内容的真实性、准确性,避免传播虚假信息。_") # st.info("_声明:内容由人工智能生成,仅供参考。如果您本人使用或对外传播本服务生成的输出,您应当主动核查输出内容的真实性、准确性,避免传播虚假信息。_") # st.divider() ### upload file # username = 'test' # path = f'./{username}/faiss_index/index.faiss' # if os.path.exists(path): # print(f'{path} local KB exists') # database_info = pd.read_csv(f'./{username}/database_name.csv') # current_database_name = database_info.iloc[-1][0] # current_database_date = database_info.iloc[-1][1] # database_claim = f"当前知识库为:{current_database_name},创建于{current_database_date}。可以开始提问!" # st.markdown(database_claim) # uploaded_file = st.file_uploader( # "选择上传一个新知识库", type=(["pdf"])) # # 默认状态下没有上传文件,None,会报错。需要判断。 # if uploaded_file is not None: # # uploaded_file_path = upload_file(uploaded_file) # upload_file(uploaded_file) # # ## 创建向量数据库 # from langchain.embeddings.openai import OpenAIEmbeddings # embeddings = OpenAIEmbeddings(disallowed_special=()) ## 这里是联网情况下,部署在Huggingface上后使用。 # print('embeddings:', embeddings) # embedding_model_name = 'GanymedeNil/text2vec-large-chinese' # # embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name) ## 这里是联网情况下连接huggingface后使用。 # embeddings = HuggingFaceEmbeddings(model_name='/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/RAG/bge-large-zh') ## 切换成BGE的embedding。 # embeddings = HuggingFaceEmbeddings(model_name='/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/RAG/bge-large-zh/') ## 切换成BGE的embedding。 # embeddings = HuggingFaceEmbeddings(model_name='/Users/yunshi/Downloads/chatGLM/My_LocalKB_Project/GanymedeNil_text2vec-large-chinese/') ## 这里会有个“No sentence-transformers model found with name“的warning,但不是error,不影响使用。 ### authentication with a local yaml file. import yaml from yaml.loader import SafeLoader with open('./config.yaml') as file: config = yaml.load(file, Loader=SafeLoader) authenticator = stauth.Authenticate( config['credentials'], config['cookie']['name'], config['cookie']['key'], config['cookie']['expiry_days'], config['preauthorized'] ) user, authentication_status, username = authenticator.login('用户登录', 'main') if authentication_status: with st.sidebar: st.markdown( """