LLAMA2_QA_RAG / app.py
ddovidovich
Update app.py
e31f1f0
import json
import faiss
import streamlit as st
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer
import torch
import copy
import llama_cpp
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/Llama-2-7b-Chat-GGUF", filename="llama-2-7b-chat.Q4_K_M.gguf"), n_ctx=2048)
#llm = Llama(model_path= hf_hub_download(repo_id="DFofanov78/ruGPT-3.5-13B-GGUF", filename="ruGPT-3.5-13B-Q4_0.gguf"), n_ctx=2048)
def list_to_numpy(obj):
if isinstance(obj, list):
return np.array(obj)
return obj
def load_documents_from_jsonl(embeddings_model, jsonl_path, createEmbeddings=False):
tqdm.pandas(desc="Loading Data")
df = pd.read_json(jsonl_path, lines=True).progress_apply(lambda x: x)
df.columns = ['Question' if 'Question' in col else 'Answer' if 'Answer' in col else col for col in df.columns]
return df
def generate_embeddings(tokenizer, model, text):
with torch.no_grad():
embeddings = model.encode(text, convert_to_tensor=True)
return embeddings.cpu().numpy()
def save_to_faiss(df):
dimension = len(df['Embeddings'].iloc[0])
db = faiss.IndexFlatL2(dimension)
embeddings = np.array(df['Embeddings'].tolist()).astype('float32')
db.add(embeddings)
faiss.write_index(db, "faiss_index")
def search_in_faiss(query_vector, df, k=5):
db = faiss.read_index("faiss_index")
query_vector = np.array(query_vector).astype('float32').reshape(1, -1)
distances, indices = db.search(query_vector, k)
results = []
for idx, dist in zip(indices[0], distances[0]):
answer_text = df.iloc[idx]['Answer']
dist = np.sqrt(dist)
results.append({"Answer": answer_text, "Distance": dist})
return results
def main():
# Заголовок приложения
st.title("Demo for LLAMA-2 RAG with CPU only")
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
df_qa = load_documents_from_jsonl(model, 'ExportForAI2.jsonl', False)
save_to_faiss(df_qa)
# Текстовое поле для ввода вопроса
input_text = st.text_input("Input", "")
dataList = [
{"Answer": "", "Distance": 0},
{"Answer": "", "Distance": 0},
{"Answer": "", "Distance": 0}
]
# Кнопка "Answer"
if st.button("Answer"):
query_vector = model.encode(input_text.lower())
dataList = search_in_faiss(query_vector, df_qa, k=3)
pass
# Таблица с данными
st.write("Most relevants answers")
st.table(dataList)
# Текстовое поле для вывода ответа LLM
llm_output_text = st.empty()
# Кнопка "Answer LLM"
if st.button("LLM Answer"):
question = input_text.lower()
context = dataList[0]["Answer"]
text_input = f'''
[INST]<<SYS>> Вы помощник в вопросах-ответах. Используйте следующий фрагменты полученного контекста, чтобы ответить на вопрос. Если вы не знаете ответа, просто скажите, что не знаете. Используйте максимум три предложения и будьте краткими.<</SYS>>
Вопрос: {question}
Контекст: {context}
Ответ: [/INST]
'''
output = llm(text_input, max_tokens=512, stream=True)
text_output = ""
for out in output:
stream = copy.deepcopy(out)
text_output += stream["choices"][0]["text"]
llm_output_text.text("LLAMA generated answer:")
llm_output_text.text(text_output)
# Запуск основной части приложения
if __name__ == "__main__":
main()