Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,28 +7,23 @@ from sentence_transformers import SentenceTransformer
|
|
| 7 |
import requests
|
| 8 |
from langchain_community.vectorstores import Chroma
|
| 9 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
|
| 10 |
|
| 11 |
load_dotenv()
|
| 12 |
URL_APP_SCRIPT = os.getenv('URL_APP_SCRIPT')
|
| 13 |
-
|
| 14 |
-
|
| 15 |
SYSTEM_PROMPT = ["Sei BonsiAI e mi aiuterai nelle mie richieste (Parla in ITALIANO)", "Esatto, sono BonsiAI. Di cosa hai bisogno?"]
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
'systemStyle': '
|
| 19 |
-
'instruction':
|
| 20 |
-
'
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
'systemStyle': 'Sii molto formale, sintetico e parla in ITALIANO',
|
| 27 |
-
'instruction': '',
|
| 28 |
-
'RAG': True}
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
-
st.set_page_config(page_title="BonsiAI", page_icon="🤖")
|
| 32 |
|
| 33 |
def init_state() :
|
| 34 |
if "messages" not in st.session_state:
|
|
@@ -52,25 +47,48 @@ def init_state() :
|
|
| 52 |
if 'loaded_data' not in st.session_state:
|
| 53 |
st.session_state.loaded_data = False
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
if not st.session_state.loaded_data:
|
| 56 |
-
with st.
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
st.session_state.loaded_data = True
|
|
|
|
| 60 |
|
| 61 |
def sidebar():
|
| 62 |
def retrieval_settings() :
|
| 63 |
st.markdown("# Impostazioni Prompt")
|
| 64 |
-
st.session_state.selected_option_key = st.selectbox('Azione', list(st.session_state.options.keys())
|
| 65 |
st.session_state.selected_option = st.session_state.options.get(st.session_state.selected_option_key, {})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
st.session_state.systemRole = st.session_state.selected_option.get('systemRole', '')
|
| 67 |
st.session_state.systemRole = st.text_area("Descrizione", st.session_state.systemRole, help='Ruolo del chatbot e descrizione dell\'azione che deve svolgere')
|
| 68 |
st.session_state.systemStyle = st.session_state.selected_option.get('systemStyle', '')
|
| 69 |
st.session_state.systemStyle = st.text_area("Stile", st.session_state.systemStyle, help='Descrizione dello stile utilizzato per generare il testo')
|
| 70 |
-
st.session_state.instruction = st.session_state.selected_option.get('instruction', '')
|
| 71 |
-
#st.session_state.instruction = st.text_area("Istruzioni", st.session_state.instruction, help='Testo di riferimento sul quale il modello si basa per generare il testo')
|
| 72 |
-
|
| 73 |
st.session_state.rag_enabled = st.session_state.selected_option.get('tipo', '')=='RAG'
|
|
|
|
| 74 |
if st.session_state.selected_option_key == 'Decreti':
|
| 75 |
st.session_state.top_k = st.slider(label="Documenti da ricercare", min_value=1, max_value=20, value=4, disabled=not st.session_state.rag_enabled)
|
| 76 |
st.session_state.decreti_escludere = st.multiselect(
|
|
@@ -81,9 +99,11 @@ def sidebar():
|
|
| 81 |
|
| 82 |
def model_settings() :
|
| 83 |
st.markdown("# Impostazioni Modello")
|
| 84 |
-
st.session_state.chat_bot = st.sidebar.radio('
|
|
|
|
| 85 |
st.session_state.temp = st.slider(label="Creatività", min_value=0.0, max_value=1.0, step=0.1, value=0.9)
|
| 86 |
st.session_state.max_tokens = st.slider(label="Lunghezza Output", min_value = 64, max_value=2048, step= 32, value=1024)
|
|
|
|
| 87 |
|
| 88 |
with st.sidebar:
|
| 89 |
retrieval_settings()
|
|
@@ -102,6 +122,12 @@ def chat_box() :
|
|
| 102 |
with st.chat_message(message["role"]):
|
| 103 |
st.markdown(message["content"])
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
def formattaPrompt(prompt, systemRole, systemStyle, instruction):
|
| 106 |
input_text = f'''
|
| 107 |
{{
|
|
@@ -140,29 +166,24 @@ def gen_augmented_prompt(prompt, top_k) :
|
|
| 140 |
links.append((reference, testo))
|
| 141 |
generated_prompt = f"""
|
| 142 |
A PARTIRE DAL SEGUENTE CONTESTO: {docs},
|
| 143 |
-
|
| 144 |
----
|
| 145 |
RISPONDI ALLA SEGUENTE RICHIESTA: {prompt}
|
| 146 |
"""
|
| 147 |
return context, links
|
| 148 |
|
| 149 |
-
def generate_chat_stream(prompt) :
|
| 150 |
links = []
|
| 151 |
-
prompt_originale = prompt
|
| 152 |
if st.session_state.rag_enabled :
|
| 153 |
with st.spinner("Ricerca nei Decreti...."):
|
| 154 |
time.sleep(1)
|
| 155 |
st.session_state.instruction, links = gen_augmented_prompt(prompt=prompt_originale, top_k=st.session_state.top_k)
|
| 156 |
-
prompt = formattaPrompt(prompt, st.session_state.systemRole, st.session_state.systemStyle, st.session_state.instruction)
|
| 157 |
-
print(prompt)
|
| 158 |
with st.spinner("Generazione in corso...") :
|
| 159 |
time.sleep(1)
|
| 160 |
chat_stream = chat(prompt, st.session_state.history,chat_client=CHAT_BOTS[st.session_state.chat_bot] ,
|
| 161 |
temperature=st.session_state.temp, max_new_tokens=st.session_state.max_tokens)
|
| 162 |
-
return chat_stream, links
|
| 163 |
|
| 164 |
def stream_handler(chat_stream, placeholder) :
|
| 165 |
-
start_time = time.time()
|
| 166 |
full_response = ''
|
| 167 |
for chunk in chat_stream :
|
| 168 |
if chunk.token.text!='</s>' :
|
|
@@ -177,24 +198,49 @@ def show_source(links) :
|
|
| 177 |
reference, testo = link
|
| 178 |
st.info('##### ' + reference.replace('_', ' ') + '\n\n'+ testo)
|
| 179 |
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
chat_stream, links = generate_chat_stream(prompt)
|
| 190 |
-
|
| 191 |
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import requests
|
| 8 |
from langchain_community.vectorstores import Chroma
|
| 9 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 10 |
+
import json
|
| 11 |
|
| 12 |
load_dotenv()
|
| 13 |
URL_APP_SCRIPT = os.getenv('URL_APP_SCRIPT')
|
| 14 |
+
URL_PROMPT = URL_APP_SCRIPT + '?IdFoglio=1cLw9q70BsPmxMBj9PIzgXtq6sm3X-GVBVnOB5wE8jr8'
|
| 15 |
+
URL_DOCUMENTI = URL_APP_SCRIPT + '?IdSecondoFoglio=1cLw9q70BsPmxMBj9PIzgXtq6sm3X-GVBVnOB5wE8jr8'
|
| 16 |
SYSTEM_PROMPT = ["Sei BonsiAI e mi aiuterai nelle mie richieste (Parla in ITALIANO)", "Esatto, sono BonsiAI. Di cosa hai bisogno?"]
|
| 17 |
+
CHAT_BOTS = {"Mixtral 8x7B v0.1" :"mistralai/Mixtral-8x7B-Instruct-v0.1"}
|
| 18 |
+
option_personalizzata = {'Personalizzata': {'systemRole': 'Tu sei BONSI AI, il mio assistente personale della scuola superiore del Bonsignori. Aiutami in base alle mie esigenze',
|
| 19 |
+
'systemStyle': 'Firmati sempre come BONSI AI. (scrivi in italiano)',
|
| 20 |
+
'instruction': '',
|
| 21 |
+
'tipo': '',
|
| 22 |
+
'RAG': False}
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
# ----------------------------------------------------------- Interfaccia --------------------------------------------------------------------
|
| 26 |
+
st.set_page_config(page_title="Bonsi AI", page_icon="🏫")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
def init_state() :
|
| 29 |
if "messages" not in st.session_state:
|
|
|
|
| 47 |
if 'loaded_data' not in st.session_state:
|
| 48 |
st.session_state.loaded_data = False
|
| 49 |
|
| 50 |
+
if "split" not in st.session_state:
|
| 51 |
+
st.session_state.split = 30
|
| 52 |
+
|
| 53 |
+
if "enable_history" not in st.session_state:
|
| 54 |
+
st.session_state.enable_history = False
|
| 55 |
+
|
| 56 |
+
if "numero_generazioni" not in st.session_state:
|
| 57 |
+
st.session_state.numero_generazioni = 1
|
| 58 |
+
|
| 59 |
if not st.session_state.loaded_data:
|
| 60 |
+
with st.status("Caricamento in corso...", expanded=True) as status:
|
| 61 |
+
st.write("Inizializzazione Ambiente")
|
| 62 |
+
time.sleep(1)
|
| 63 |
+
st.write("Inizializzazione Prompt")
|
| 64 |
+
options = requests.get(URL_PROMPT).json()
|
| 65 |
+
st.write("Inizializzazione Documenti")
|
| 66 |
+
documenti = requests.get(URL_DOCUMENTI).json()
|
| 67 |
+
st.session_state.options = {**option_personalizzata, **options}
|
| 68 |
+
st.session_state.documenti = documenti
|
| 69 |
st.session_state.loaded_data = True
|
| 70 |
+
status.update(label="Caricamento Completato", state="complete", expanded=False)
|
| 71 |
|
| 72 |
def sidebar():
|
| 73 |
def retrieval_settings() :
|
| 74 |
st.markdown("# Impostazioni Prompt")
|
| 75 |
+
st.session_state.selected_option_key = st.selectbox('Azione', list(st.session_state.options.keys()))
|
| 76 |
st.session_state.selected_option = st.session_state.options.get(st.session_state.selected_option_key, {})
|
| 77 |
+
|
| 78 |
+
if st.session_state.options.get(st.session_state.selected_option_key, {})["tipo"]=='DOCUMENTO':
|
| 79 |
+
st.session_state.selected_documento_key = st.selectbox('Documento', list(st.session_state.documenti.keys()))
|
| 80 |
+
st.session_state.selected_documento = st.session_state.documenti.get(st.session_state.selected_documento_key, {})
|
| 81 |
+
st.session_state.instruction = st.session_state.selected_documento.get('instruction', '')['Testo']
|
| 82 |
+
st.session_state.split = st.slider(label="Pagine Suddivisione", min_value=1, max_value=30, value=30, help='Se il documento ha 100 pagine e suddivido per 20 pagine elaborerà la risposta 5 volte. Più alto è il numero e meno volte elaborerà ma la risposta sarà più imprecisa')
|
| 83 |
+
else:
|
| 84 |
+
st.session_state.instruction = st.session_state.selected_option.get('instruction', '')
|
| 85 |
+
|
| 86 |
st.session_state.systemRole = st.session_state.selected_option.get('systemRole', '')
|
| 87 |
st.session_state.systemRole = st.text_area("Descrizione", st.session_state.systemRole, help='Ruolo del chatbot e descrizione dell\'azione che deve svolgere')
|
| 88 |
st.session_state.systemStyle = st.session_state.selected_option.get('systemStyle', '')
|
| 89 |
st.session_state.systemStyle = st.text_area("Stile", st.session_state.systemStyle, help='Descrizione dello stile utilizzato per generare il testo')
|
|
|
|
|
|
|
|
|
|
| 90 |
st.session_state.rag_enabled = st.session_state.selected_option.get('tipo', '')=='RAG'
|
| 91 |
+
|
| 92 |
if st.session_state.selected_option_key == 'Decreti':
|
| 93 |
st.session_state.top_k = st.slider(label="Documenti da ricercare", min_value=1, max_value=20, value=4, disabled=not st.session_state.rag_enabled)
|
| 94 |
st.session_state.decreti_escludere = st.multiselect(
|
|
|
|
| 99 |
|
| 100 |
def model_settings() :
|
| 101 |
st.markdown("# Impostazioni Modello")
|
| 102 |
+
st.session_state.chat_bot = st.sidebar.radio('Modello:', [key for key, value in CHAT_BOTS.items() ])
|
| 103 |
+
st.session_state.numero_generazioni = st.slider(label="Generazioni", min_value = 1, max_value=10, value=1)
|
| 104 |
st.session_state.temp = st.slider(label="Creatività", min_value=0.0, max_value=1.0, step=0.1, value=0.9)
|
| 105 |
st.session_state.max_tokens = st.slider(label="Lunghezza Output", min_value = 64, max_value=2048, step= 32, value=1024)
|
| 106 |
+
st.session_state.enable_history = st.toggle("Storico Messaggi", value=False)
|
| 107 |
|
| 108 |
with st.sidebar:
|
| 109 |
retrieval_settings()
|
|
|
|
| 122 |
with st.chat_message(message["role"]):
|
| 123 |
st.markdown(message["content"])
|
| 124 |
|
| 125 |
+
init_state()
|
| 126 |
+
sidebar()
|
| 127 |
+
header()
|
| 128 |
+
chat_box()
|
| 129 |
+
|
| 130 |
+
# ----------------------------------------------------------- Funzioni Varie --------------------------------------------------------------------
|
| 131 |
def formattaPrompt(prompt, systemRole, systemStyle, instruction):
|
| 132 |
input_text = f'''
|
| 133 |
{{
|
|
|
|
| 166 |
links.append((reference, testo))
|
| 167 |
generated_prompt = f"""
|
| 168 |
A PARTIRE DAL SEGUENTE CONTESTO: {docs},
|
|
|
|
| 169 |
----
|
| 170 |
RISPONDI ALLA SEGUENTE RICHIESTA: {prompt}
|
| 171 |
"""
|
| 172 |
return context, links
|
| 173 |
|
| 174 |
+
def generate_chat_stream(prompt, prompt_originale, inst) :
|
| 175 |
links = []
|
|
|
|
| 176 |
if st.session_state.rag_enabled :
|
| 177 |
with st.spinner("Ricerca nei Decreti...."):
|
| 178 |
time.sleep(1)
|
| 179 |
st.session_state.instruction, links = gen_augmented_prompt(prompt=prompt_originale, top_k=st.session_state.top_k)
|
|
|
|
|
|
|
| 180 |
with st.spinner("Generazione in corso...") :
|
| 181 |
time.sleep(1)
|
| 182 |
chat_stream = chat(prompt, st.session_state.history,chat_client=CHAT_BOTS[st.session_state.chat_bot] ,
|
| 183 |
temperature=st.session_state.temp, max_new_tokens=st.session_state.max_tokens)
|
| 184 |
+
return chat_stream, links, inst
|
| 185 |
|
| 186 |
def stream_handler(chat_stream, placeholder) :
|
|
|
|
| 187 |
full_response = ''
|
| 188 |
for chunk in chat_stream :
|
| 189 |
if chunk.token.text!='</s>' :
|
|
|
|
| 198 |
reference, testo = link
|
| 199 |
st.info('##### ' + reference.replace('_', ' ') + '\n\n'+ testo)
|
| 200 |
|
| 201 |
+
def split_text(text, chunk_size):
|
| 202 |
+
testo_suddiviso = []
|
| 203 |
+
if text == '':
|
| 204 |
+
text = ' '
|
| 205 |
+
if chunk_size < 100:
|
| 206 |
+
chunk_size = 60000
|
| 207 |
+
for i in range(0, len(text), chunk_size):
|
| 208 |
+
testo_suddiviso.append(text[i:i+chunk_size])
|
| 209 |
+
return testo_suddiviso
|
|
|
|
|
|
|
| 210 |
|
| 211 |
+
# -------------------------------------------------------------- Gestione Chat -----------------------------------------------------------------------
|
| 212 |
+
if prompt := st.chat_input("Chatta con BonsiAI..."):
|
| 213 |
+
instruction_suddivise = split_text(st.session_state.instruction, st.session_state.split*2000)
|
| 214 |
+
prompt_originale = prompt
|
| 215 |
+
ruolo_originale = st.session_state.systemRole
|
| 216 |
+
ruoli_divisi = ruolo_originale.split("&&")
|
| 217 |
+
parte = 1
|
| 218 |
+
i = 1
|
| 219 |
+
risposta_completa = ''
|
| 220 |
+
for ruolo_singolo in ruoli_divisi:
|
| 221 |
+
for instruction_singola in instruction_suddivise:
|
| 222 |
+
for numgen in range(1, st.session_state.numero_generazioni+1):
|
| 223 |
+
prompt = formattaPrompt(prompt_originale, ruolo_singolo, st.session_state.systemStyle, instruction_singola)
|
| 224 |
+
if i==1:
|
| 225 |
+
st.chat_message("user").markdown(prompt_originale + (': Parte ' + str(parte) if i > 1 else ''))
|
| 226 |
+
i+=1
|
| 227 |
+
st.session_state.messages.append({"role": "user", "content": prompt_originale})
|
| 228 |
+
chat_stream, links, inst = generate_chat_stream(prompt, prompt_originale, instruction_singola)
|
| 229 |
+
with st.chat_message("assistant"):
|
| 230 |
+
placeholder = st.empty()
|
| 231 |
+
full_response = stream_handler(chat_stream, placeholder)
|
| 232 |
+
if st.session_state.rag_enabled:
|
| 233 |
+
show_source(links)
|
| 234 |
+
if st.session_state.options.get(st.session_state.selected_option_key, {})["tipo"]=='DOCUMENTO':
|
| 235 |
+
with st.expander("Mostra Documento") :
|
| 236 |
+
st.info('##### ' + st.session_state.selected_documento_key + ' (Parte ' + str(parte) +')'+ '\n\n\n' + inst)
|
| 237 |
+
parte+=1
|
| 238 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 239 |
+
risposta_completa = risposta_completa + '\n' + full_response
|
| 240 |
+
|
| 241 |
+
if st.session_state.enable_history:
|
| 242 |
+
st.session_state.history.append([prompt, full_response])
|
| 243 |
+
st.success('Generazione Completata')
|
| 244 |
+
payload = {"domanda": prompt_originale, "risposta": risposta_completa}
|
| 245 |
+
json_payload = json.dumps(payload)
|
| 246 |
+
response = requests.post(URL_APP_SCRIPT, data=json_payload)
|