Spaces:

adowu
/

prawnikai

Sleeping

App Files Files Community

adowu commited on Nov 16, 2024

Commit

13a4ba2

verified ·

1 Parent(s): dde9d2b

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -57

app.py CHANGED Viewed

@@ -1,80 +1,81 @@
 import streamlit as st
-import logging
-from database import KodeksProcessor
-from config import DATABASE_DIR
 import os
-logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-def initialize_session_state():
-    if 'chatbot' not in st.session_state:
-        st.session_state.chatbot = None
-    if 'messages' not in st.session_state:
-        st.session_state.messages = []
 def main():
-    st.title("Asystent Prawny")
-    initialize_session_state()
-    # Inicjalizacja bazy danych (jeśli potrzebna)
-    if 'db_initialized' not in st.session_state:
-        with st.spinner("Inicjalizacja bazy danych..."):
-            processor = KodeksProcessor()
-            if not os.path.exists(DATABASE_DIR):
-                logger.info(f"Przetwarzanie plików w katalogu: data/kodeksy")
-                processor.process_all_files("data/kodeksy")
-            else:
-                logger.info(f"Baza danych już istnieje w {DATABASE_DIR}")
-            st.session_state.db_initialized = True
-    # Przycisk do czyszczenia historii
-    if st.sidebar.button("Wyczyść historię"):
         st.session_state.messages = []
-        st.rerun()
-    # Wyświetlenie historii czatu
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
-    # Input użytkownika
     if prompt := st.chat_input("Zadaj pytanie dotyczące prawa..."):
-        # Dodaj pytanie użytkownika do historii
         st.session_state.messages.append({"role": "user", "content": prompt})
         with st.chat_message("user"):
             st.markdown(prompt)
-        # Wyszukaj odpowiednie fragmenty w bazie
-        processor = KodeksProcessor()
-        relevant_chunks = processor.search(prompt)
-        # Wygeneruj odpowiedź
-        with st.chat_message("assistant"):
-            message_placeholder = st.empty()
-            full_response = "Oto co znalazłem w bazie danych:\n\n"
-            for doc, metadata in zip(relevant_chunks['documents'][0], relevant_chunks['metadatas'][0]):
-                full_response += f"**Artykuł {metadata['article']}**\n{doc}\n\n"
-            message_placeholder.markdown(full_response)
-        # Dodaj odpowiedź asystenta do historii
-        st.session_state.messages.append({"role": "assistant", "content": full_response})
-    # Sekcja debugowania
-    if st.sidebar.checkbox("Pokaż informacje debugowania"):
-        st.subheader("Informacje debugowania")
-        processor = KodeksProcessor()
-        doc_count = processor.collection.count()
-        st.write(f"Całkowita liczba dokumentów w bazie danych: {doc_count}")
-        if st.button("Przetwórz pliki ponownie"):
-            processor.process_all_files("data/kodeksy")
-            st.success("Przetwarzanie zakończone")
-        if st.button("Pokaż wszystkie dokumenty"):
-            processor.list_all_documents()
 if __name__ == "__main__":
     main()

 import streamlit as st
+import json
 import os
+from sentence_transformers import SentenceTransformer, util
+import torch
+# Load the processed legal code data
+@st.cache_resource
+def load_data(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+# Initialize the sentence transformer model
+@st.cache_resource
+def load_model():
+    return SentenceTransformer('distiluse-base-multilingual-cased-v1')
+def search_relevant_chunks(query, chunks, model, top_k=3):
+    query_embedding = model.encode(query, convert_to_tensor=True)
+    chunk_embeddings = model.encode([chunk['text'] for chunk in chunks], convert_to_tensor=True)
+    cos_scores = util.pytorch_cos_sim(query_embedding, chunk_embeddings)[0]
+    top_results = torch.topk(cos_scores, k=top_k)
+    return [chunks[idx] for idx in top_results.indices]
 def main():
+    st.title("Chatbot Prawny")
+    # Load data and model
+    data_file = "processed_kodeksy.json"
+    if not os.path.exists(data_file):
+        st.error(f"Plik {data_file} nie istnieje. Najpierw przetwórz dane kodeksów.")
+        return
+    chunks = load_data(data_file)
+    model = load_model()
+    # Initialize chat history
+    if "messages" not in st.session_state:
         st.session_state.messages = []
+    # Display chat history
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
+    # User input
     if prompt := st.chat_input("Zadaj pytanie dotyczące prawa..."):
         st.session_state.messages.append({"role": "user", "content": prompt})
         with st.chat_message("user"):
             st.markdown(prompt)
+        # Search for relevant chunks
+        relevant_chunks = search_relevant_chunks(prompt, chunks, model)
+        # Generate response
+        response = "Oto co znalazłem w kodeksie:\n\n"
+        for chunk in relevant_chunks:
+            response += f"**{chunk['metadata']['nazwa']} - Artykuł {chunk['metadata']['article']}**\n"
+            response += f"{chunk['text']}\n\n"
+        # Display assistant response
+        with st.chat_message("assistant"):
+            st.markdown(response)
+        st.session_state.messages.append({"role": "assistant", "content": response})
+    # Sidebar for additional options
+    with st.sidebar:
+        st.subheader("Opcje")
+        if st.button("Wyczyść historię czatu"):
+            st.session_state.messages = []
+            st.experimental_rerun()
+        st.subheader("Informacje o bazie danych")
+        st.write(f"Liczba chunków: {len(chunks)}")
+        st.write(f"Przykładowy chunk:")
+        st.json(chunks[0] if chunks else {})
 if __name__ == "__main__":
     main()