Spaces:
Sleeping
Sleeping
File size: 3,913 Bytes
3c4a7fb 69d4a53 bb2f2e7 3c4a7fb 4289de9 6ecdc04 94899d7 5e36062 e31f1f0 3c4a7fb 6ecdc04 3c4a7fb bdeb96a 3c4a7fb bb2f2e7 3c4a7fb bb2f2e7 bdeb96a 2714936 062df6a 3c4a7fb bb2f2e7 bea368c bb2f2e7 3c4a7fb bea368c bb2f2e7 3c4a7fb bb2f2e7 f29a842 94899d7 be55b3c f29a842 94899d7 f29a842 94899d7 bb2f2e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import json
import faiss
import streamlit as st
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer
import torch
import copy
import llama_cpp
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/Llama-2-7b-Chat-GGUF", filename="llama-2-7b-chat.Q4_K_M.gguf"), n_ctx=2048)
#llm = Llama(model_path= hf_hub_download(repo_id="DFofanov78/ruGPT-3.5-13B-GGUF", filename="ruGPT-3.5-13B-Q4_0.gguf"), n_ctx=2048)
def list_to_numpy(obj):
if isinstance(obj, list):
return np.array(obj)
return obj
def load_documents_from_jsonl(embeddings_model, jsonl_path, createEmbeddings=False):
tqdm.pandas(desc="Loading Data")
df = pd.read_json(jsonl_path, lines=True).progress_apply(lambda x: x)
df.columns = ['Question' if 'Question' in col else 'Answer' if 'Answer' in col else col for col in df.columns]
return df
def generate_embeddings(tokenizer, model, text):
with torch.no_grad():
embeddings = model.encode(text, convert_to_tensor=True)
return embeddings.cpu().numpy()
def save_to_faiss(df):
dimension = len(df['Embeddings'].iloc[0])
db = faiss.IndexFlatL2(dimension)
embeddings = np.array(df['Embeddings'].tolist()).astype('float32')
db.add(embeddings)
faiss.write_index(db, "faiss_index")
def search_in_faiss(query_vector, df, k=5):
db = faiss.read_index("faiss_index")
query_vector = np.array(query_vector).astype('float32').reshape(1, -1)
distances, indices = db.search(query_vector, k)
results = []
for idx, dist in zip(indices[0], distances[0]):
answer_text = df.iloc[idx]['Answer']
dist = np.sqrt(dist)
results.append({"Answer": answer_text, "Distance": dist})
return results
def main():
# Заголовок приложения
st.title("Demo for LLAMA-2 RAG with CPU only")
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
df_qa = load_documents_from_jsonl(model, 'ExportForAI2.jsonl', False)
save_to_faiss(df_qa)
# Текстовое поле для ввода вопроса
input_text = st.text_input("Input", "")
dataList = [
{"Answer": "", "Distance": 0},
{"Answer": "", "Distance": 0},
{"Answer": "", "Distance": 0}
]
# Кнопка "Answer"
if st.button("Answer"):
query_vector = model.encode(input_text.lower())
dataList = search_in_faiss(query_vector, df_qa, k=3)
pass
# Таблица с данными
st.write("Most relevants answers")
st.table(dataList)
# Текстовое поле для вывода ответа LLM
llm_output_text = st.empty()
# Кнопка "Answer LLM"
if st.button("LLM Answer"):
question = input_text.lower()
context = dataList[0]["Answer"]
text_input = f'''
[INST]<<SYS>> Вы помощник в вопросах-ответах. Используйте следующий фрагменты полученного контекста, чтобы ответить на вопрос. Если вы не знаете ответа, просто скажите, что не знаете. Используйте максимум три предложения и будьте краткими.<</SYS>>
Вопрос: {question}
Контекст: {context}
Ответ: [/INST]
'''
output = llm(text_input, max_tokens=512, stream=True)
text_output = ""
for out in output:
stream = copy.deepcopy(out)
text_output += stream["choices"][0]["text"]
llm_output_text.text("LLAMA generated answer:")
llm_output_text.text(text_output)
# Запуск основной части приложения
if __name__ == "__main__":
main() |