File size: 3,367 Bytes
b2e21c9
 
 
 
 
 
 
 
 
 
 
8b85fba
 
f56e52a
8b85fba
f56e52a
8b85fba
f56e52a
 
 
 
 
 
8b85fba
fc6490b
8b85fba
 
b2e21c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import streamlit as st
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import chromadb
from chromadb.config import Settings
import torch

st.set_page_config(page_title="RAG Chatbot", page_icon="πŸ€–", layout="wide")

st.title("πŸ€– RAG Chatbot – INSIEL")

import os
import streamlit as st
import subprocess

def run_ingest_if_needed():
    if not os.path.exists("vectorstore"):
        st.info("Inizializzazione: generazione vectorstore in corso...")
        try:
            subprocess.run(["python", "rag_ingest.py"], check=True)
            st.success("Vectorstore generata correttamente βœ…")
        except subprocess.CalledProcessError:
            st.error("Errore durante la generazione della vectorstore.")

run_ingest_if_needed()


@st.cache_resource
def load_models():
    # Embedding model
    embedder = SentenceTransformer("sentence-transformers/distiluse-base-multilingual-cased-v1")

    # LLM model (TinyLlama su CPU)
    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id).to(torch.device("cpu"))
    rag_chat = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=300, device=-1)

    return embedder, rag_chat

embedder, rag_chat = load_models()

# --- CHROMA DB SETUP ---
#client = chromadb.PersistentClient(path="./vectorstore")

#collection = client.get_or_create_collection("insiel_chunks")

client = chromadb.PersistentClient(path="./vectorstore")
collection = client.get_or_create_collection(name="insiel_chunks")

# --- FUNZIONE DI RISPOSTA ---

def generate_rag_response_local(query, retrieved_chunks):
    context = "\n\n".join(retrieved_chunks)
    context = context[:3000]  # taglia se troppo lungo per evitare overflow

    prompt = (
        "Rispondi alla domanda usando solo le informazioni nel contesto. "
        "Se la risposta non Γ¨ presente, di' chiaramente che non Γ¨ specificato nel documento.\n\n"
        f"Contesto:\n{context}\n\n"
        f"Domanda: \n{query}\n"
        "Risposta:"
    )

    result = rag_chat(prompt)[0]["generated_text"]

    return result.split("Risposta:")[-1].strip()


# --- INTERFACCIA ---

if "history" not in st.session_state:
    st.session_state.history = []

query = st.text_input("πŸ’¬ Inserisci la tua domanda qui:")

if query:
    # 1. Embedding della query
    query_embedding = embedder.encode([query])

    # 2. Retrieval da Chroma
    results = collection.query(query_embeddings=query_embedding, n_results=3)
    retrieved_chunks = results["documents"][0]

    # 3. Risposta con modello locale
    response = generate_rag_response_local(query, retrieved_chunks)

    # 4. Aggiorna cronologia chat
    st.session_state.history.append(("πŸ§‘β€πŸ’» Tu", query))
    response_preview = "\n".join(response.strip().split("\n")[:2])
    st.session_state.history.append(("πŸ€– RAG Bot", response_preview))

# --- OUTPUT CHAT ---

if st.session_state.history:
    for speaker, msg in st.session_state.history:
        st.markdown(f"**{speaker}**: {msg}")

# --- VISUALIZZA I CHUNK USATI ---
if query:
    with st.expander("πŸ” Mostra i documenti/chunk usati"):
        for i, chunk in enumerate(retrieved_chunks):
            st.markdown(f"**Chunk {i+1}**\n\n{chunk}\n\n---")