|
from llama_index.core import SimpleDirectoryReader, GPTListIndex, GPTVectorStoreIndex, PromptHelper |
|
from llama_index.core import StorageContext, load_index_from_storage, get_response_synthesizer |
|
from llama_index.core.retrievers import VectorIndexRetriever |
|
from llama_index.core.query_engine import RetrieverQueryEngine |
|
from llama_index.core.postprocessor import SimilarityPostprocessor |
|
|
|
|
|
from langchain_community.embeddings import OpenAIEmbeddings |
|
import gradio as gr |
|
import os |
|
import openai |
|
from gradio.themes.utils import colors, fonts, sizes |
|
|
|
|
|
|
|
messages = [ |
|
{"role": "system", "content": "follow the 4 instructions below for your outputs:"}, |
|
{"role": "system", "content": "1. make sure all expressions are compatible with Polish"}, |
|
{"role": "system", "content": "2. use Polish only for outputs"}, |
|
{"role": "system", "content": "3. if you cannot answer, reply that you do not have enough information"}, |
|
{"role": "system", "content": "4. do not make up any answer if you do know the answer"}, |
|
] |
|
|
|
def construct_index(directory_path): |
|
max_input_size = 4096 |
|
num_outputs = 512 |
|
max_chunk_overlap = 0.05 |
|
chunk_size_limit = 1000 |
|
temperature = 0.1 |
|
|
|
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) |
|
|
|
documents = SimpleDirectoryReader(directory_path).load_data() |
|
|
|
index = GPTVectorStoreIndex.from_documents(documents, urls=[ |
|
'https://trio.house/', |
|
'https://trio.house/kontakt/', |
|
'https://trio.house/o-nas/', |
|
'https://trio.house/w-sprzedazy/', |
|
'https://trio.house/dzialki/', |
|
'https://trio.house/zainwestuj-z-nami/', |
|
'https://trio.house/potrzebujesz-konsultacji-rynku-nieruchomosci/', |
|
'https://trio.house/potrzebujesz-remontu/', |
|
'https://trio.house/potrzebujesz-projektu-wnetrza/', |
|
'https://trio.house/potrzebujesz-mebli-na-wymiar/', |
|
'https://trio.house/potrzebujesz-kredytu-na-zakup-nieruchomosci/', |
|
'https://trio.house/makroekonomia/', |
|
'https://trio.house/rynek-nieruchomosci/', |
|
'https://trio.house/2023/05/24/deweloperzy-buduja-coraz-mniej/', |
|
'https://trio.house/2023/04/27/prognozy-na-2023-2025-co-nas-czeka/', |
|
'https://trio.house/2023/04/18/wycinka-drzew-na-wlasnej-dzialce-w-2023/', |
|
'https://trio.house/2023/04/03/lipiec-rozpoczynamy-juz-w-kwietniu/', |
|
'https://trio.house/2023/04/03/zmiany-w-podatku-od-czynnosci-cywilnoprawnych/', |
|
'https://trio.house/2023/03/23/czy-aby-napewno-najdrozsze-mieszkania-sa-w-stolicy/', |
|
'https://trio.house/2023/06/15/rekomendacja-s-korzystniejsza-dla-bezpiecznego-kredytu-2/', |
|
'https://trio.house/2023/07/20/warszawski-rynek-nieruchomosci-mieszkaniowych-na-6-biegu/', |
|
'https://livesmarter.pl/najlepsze-lokaty-maj-2023/', |
|
'https://www.money.pl/gospodarka/inflacja-maj-2023-r-finalny-odczyt-gus-6909186710817344a.html', |
|
'https://ksiegowosc.infor.pl/wiadomosci/5754337,oprocentowanie-lokat-bankowych-i-kont-oszczednosciowych-2023-koniec-maja-poczatek-czerwca-tabela.html#:~:text=7%2C05%25%20%2D%20takie%20jest,proc.', |
|
|
|
], prompt_helper=prompt_helper) |
|
index.storage_context.persist('index.json') |
|
|
|
return index |
|
|
|
def chatbotCustom(input): |
|
storage_context = StorageContext.from_defaults(persist_dir="index.json") |
|
index = load_index_from_storage(storage_context) |
|
|
|
|
|
|
|
|
|
retriever = VectorIndexRetriever( |
|
index=index, |
|
similarity_top_k=10, |
|
response_mode="tree_summarize", |
|
) |
|
|
|
|
|
response_synthesizer = get_response_synthesizer() |
|
|
|
|
|
query_engine = RetrieverQueryEngine( |
|
retriever=retriever, |
|
response_synthesizer=response_synthesizer, |
|
node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)], |
|
) |
|
|
|
response = query_engine.query(input) |
|
return response.response |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def clear(): |
|
return None, None |
|
|
|
theme = gr.themes.Default(font=[gr.themes.GoogleFont("Roboto"), "sans-serif", "sans-serif"], primary_hue="neutral", secondary_hue="neutral", neutral_hue="neutral").set( |
|
button_primary_background_fill="#3FCCA5", |
|
button_primary_background_fill_dark="#3FCCA5", |
|
button_primary_text_color="#003F62", |
|
body_background_fill="FFFFFF", |
|
body_background_fill_dark="FFFFFF" |
|
) |
|
|
|
with gr.Blocks(theme=theme) as trioGPT: |
|
inputs = gr.Textbox(lines=4, elem_id="inputs", label="Zadaj mi pytanie") |
|
outputs = gr.Textbox(label="Odpowiedź", elem_id="outputs") |
|
with gr.Row(): |
|
submit_btn = gr.Button("Wyślij", variant="primary") |
|
clear_btn = gr.Button("Wyczyść") |
|
|
|
submit_btn.click(chatbotCustom, inputs=inputs, outputs=outputs) |
|
clear_btn.click(fn=clear, inputs=None, outputs=[inputs, outputs]) |
|
|
|
index = construct_index("data") |
|
trioGPT.launch() |