import json import faiss import streamlit as st import pandas as pd import numpy as np from tqdm.auto import tqdm from sentence_transformers import SentenceTransformer import torch import copy import llama_cpp from llama_cpp import Llama from huggingface_hub import hf_hub_download llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/Llama-2-7b-Chat-GGUF", filename="llama-2-7b-chat.Q4_K_M.gguf"), n_ctx=2048) #llm = Llama(model_path= hf_hub_download(repo_id="DFofanov78/ruGPT-3.5-13B-GGUF", filename="ruGPT-3.5-13B-Q4_0.gguf"), n_ctx=2048) def list_to_numpy(obj): if isinstance(obj, list): return np.array(obj) return obj def load_documents_from_jsonl(embeddings_model, jsonl_path, createEmbeddings=False): tqdm.pandas(desc="Loading Data") df = pd.read_json(jsonl_path, lines=True).progress_apply(lambda x: x) df.columns = ['Question' if 'Question' in col else 'Answer' if 'Answer' in col else col for col in df.columns] return df def generate_embeddings(tokenizer, model, text): with torch.no_grad(): embeddings = model.encode(text, convert_to_tensor=True) return embeddings.cpu().numpy() def save_to_faiss(df): dimension = len(df['Embeddings'].iloc[0]) db = faiss.IndexFlatL2(dimension) embeddings = np.array(df['Embeddings'].tolist()).astype('float32') db.add(embeddings) faiss.write_index(db, "faiss_index") def search_in_faiss(query_vector, df, k=5): db = faiss.read_index("faiss_index") query_vector = np.array(query_vector).astype('float32').reshape(1, -1) distances, indices = db.search(query_vector, k) results = [] for idx, dist in zip(indices[0], distances[0]): answer_text = df.iloc[idx]['Answer'] dist = np.sqrt(dist) results.append({"Answer": answer_text, "Distance": dist}) return results def main(): # Заголовок приложения st.title("Demo for LLAMA-2 RAG with CPU only") model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2') df_qa = load_documents_from_jsonl(model, 'ExportForAI2.jsonl', False) save_to_faiss(df_qa) # Текстовое поле для ввода вопроса input_text = st.text_input("Input", "") dataList = [ {"Answer": "", "Distance": 0}, {"Answer": "", "Distance": 0}, {"Answer": "", "Distance": 0} ] # Кнопка "Answer" if st.button("Answer"): query_vector = model.encode(input_text.lower()) dataList = search_in_faiss(query_vector, df_qa, k=3) pass # Таблица с данными st.write("Most relevants answers") st.table(dataList) # Текстовое поле для вывода ответа LLM llm_output_text = st.empty() # Кнопка "Answer LLM" if st.button("LLM Answer"): question = input_text.lower() context = dataList[0]["Answer"] text_input = f''' [INST]<> Вы помощник в вопросах-ответах. Используйте следующий фрагменты полученного контекста, чтобы ответить на вопрос. Если вы не знаете ответа, просто скажите, что не знаете. Используйте максимум три предложения и будьте краткими.<> Вопрос: {question} Контекст: {context} Ответ: [/INST] ''' output = llm(text_input, max_tokens=512, stream=True) text_output = "" for out in output: stream = copy.deepcopy(out) text_output += stream["choices"][0]["text"] llm_output_text.text("LLAMA generated answer:") llm_output_text.text(text_output) # Запуск основной части приложения if __name__ == "__main__": main()