Spaces:

Rulga
/

Doc-chat

Sleeping

App Files Files Community

Rulga commited on Feb 20

Commit

ed07e8e

verified ·

1 Parent(s): 785ef60

Upload 10 files

Browse files

Files changed (10) hide show

.gitignore +6 -0
README.md +15 -10
api/analysis.py +90 -0
api/main.py +19 -0
app - Copy.py +242 -0
app.py +200 -0
gitattributes +44 -0
gitignore +4 -0
requirements.txt +21 -0
run.sh +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+/.streamlit
+*.env
+.env
+venv
+.streamlit/secrets.toml

README.md CHANGED Viewed

@@ -1,10 +1,15 @@
----
-title: Doc Chat
-emoji: 💻
-colorFrom: gray
-colorTo: gray
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: LS Chatbot Log
+emoji: 🌍
+colorFrom: blue
+colorTo: blue
+sdk: streamlit
+sdk_version: 1.42.0
+app_file: app.py
+pinned: false
+short_description: It is a chat built with an AI model about www.Status.law
+---
+# LS Chatbot Log
+It is a chat app built using Streamlit that allows users to interact with an AI model to communicate about www.Status.law

api/analysis.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# analysis.py
+import json
+import pandas as pd
+from collections import defaultdict
+from typing import List, Dict
+from datetime import datetime
+class LogAnalyzer:
+    def __init__(self, log_path: str = "chat_history/chat_logs.json"):
+        self.log_path = log_path
+        self.logs = self._load_logs()
+    def _load_logs(self) -> List[Dict]:
+        """Load and parse log entries from JSON file"""
+        try:
+            with open(self.log_path, "r", encoding="utf-8") as f:
+                return [json.loads(line) for line in f]
+        except (FileNotFoundError, json.JSONDecodeError):
+            return []
+    def get_basic_stats(self) -> Dict:
+        """Calculate basic conversation statistics"""
+        if not self.logs:
+            return {}
+        return {
+            "total_interactions": len(self.logs),
+            "unique_users": len({log.get('session_id') for log in self.logs}),
+            "avg_response_length": pd.Series([len(log['bot_response']) for log in self.logs]).mean(),
+            "most_common_questions": self._get_common_questions(),
+            "knowledge_base_usage": self._calculate_kb_usage()
+        }
+    def _get_common_questions(self, top_n: int = 5) -> List[Dict]:
+        """Identify most frequent user questions"""
+        question_counts = defaultdict(int)
+        for log in self.logs:
+            question_counts[log['user_input']] += 1
+        return sorted(
+            [{"question": k, "count": v} for k, v in question_counts.items()],
+            key=lambda x: x["count"],
+            reverse=True
+        )[:top_n]
+    def _calculate_kb_usage(self) -> Dict:
+        """Analyze knowledge base effectiveness"""
+        context_usage = defaultdict(int)
+        for log in self.logs:
+            if log.get('context'):
+                context_usage['with_context'] += 1
+            else:
+                context_usage['without_context'] += 1
+        return context_usage
+    def temporal_analysis(self) -> Dict:
+        """Analyze usage patterns over time"""
+        df = pd.DataFrame(self.logs)
+        df['timestamp'] = pd.to_datetime(df['timestamp'])
+        return {
+            "daily_activity": df.resample('D', on='timestamp').size().to_dict(),
+            "hourly_pattern": df.groupby(df['timestamp'].dt.hour).size().to_dict()
+        }
+    def generate_report(self) -> str:
+        """Generate comprehensive analysis report"""
+        stats = self.get_basic_stats()
+        temporal = self.temporal_analysis()
+        report = f"""
+        Legal Assistant Usage Report
+        ----------------------------
+        Period: {self.logs[0]['timestamp']} - {self.logs[-1]['timestamp']}
+        Total Interactions: {stats['total_interactions']}
+        Unique Users: {stats['unique_users']}
+        Average Response Length: {stats['avg_response_length']:.1f} chars
+        Top Questions:
+        {''.join(f"{q['question']}: {q['count']}\n" for q in stats['most_common_questions'])}
+        Knowledge Base Usage:
+        - With context: {stats['knowledge_base_usage'].get('with_context', 0)}
+        - Without context: {stats['knowledge_base_usage'].get('without_context', 0)}
+        Usage Patterns:
+        - Daily Activity: {temporal['daily_activity']}
+        - Hourly Distribution: {temporal['hourly_pattern']}
+        """
+        return report

api/main.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from fastapi import APIRouter
+from analysis import LogAnalyzer
+router = APIRouter()
+@router.get("/analysis/basic")
+async def get_basic_analysis():
+    analyzer = LogAnalyzer()
+    return analyzer.get_basic_stats()
+@router.get("/analysis/temporal")
+async def get_temporal_analysis():
+    analyzer = LogAnalyzer()
+    return analyzer.temporal_analysis()
+@router.get("/analysis/report")
+async def get_full_report():
+    analyzer = LogAnalyzer()
+    return {"report": analyzer.generate_report()}

app - Copy.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import os
+import time
+import streamlit as st
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnableLambda
+import requests
+import json
+# Логирует взаимодействие в JSON-файл
+from datetime import datetime
+def log_interaction(user_input: str, bot_response: str):
+    """Логирует взаимодействие в JSON-файл"""
+    log_entry = {
+        "timestamp": datetime.now().isoformat(),
+        "user_input": user_input,
+        "bot_response": bot_response
+    }
+    log_dir = "chat_history"
+    os.makedirs(log_dir, exist_ok=True)
+    log_path = os.path.join(log_dir, "chat_logs.json")
+    with open(log_path, "a") as f:
+        f.write(json.dumps(log_entry) + "\n")
+#
+# Page configuration
+st.set_page_config(page_title="Status Law Assistant", page_icon="⚖️")
+# Knowledge base info in session_state
+if 'kb_info' not in st.session_state:
+    st.session_state.kb_info = {
+        'build_time': None,
+        'size': None
+    }
+# Display title and knowledge base info
+# st.title("www.Status.Law Legal Assistant")
+st.markdown(
+    '''
+    <h1>
+        ⚖️
+        <a href="https://status.law/" style="text-decoration: underline; color: blue; font-size: inherit;">
+            Status.Law
+        </a>
+        Legal Assistant
+    </h1>
+    ''',
+    unsafe_allow_html=True
+)
+if st.session_state.kb_info['build_time'] and st.session_state.kb_info['size']:
+    st.caption(f"(Knowledge base build time: {st.session_state.kb_info['build_time']:.2f} seconds, "
+               f"size: {st.session_state.kb_info['size']:.2f} MB)")
+# Path to store vector database
+VECTOR_STORE_PATH = "vector_store"
+# Создание папки истории, если она не существует
+if not os.path.exists("chat_history"):
+    os.makedirs("chat_history")
+# Website URLs
+urls = [
+    "https://status.law",
+    "https://status.law/about",
+    "https://status.law/careers",
+    "https://status.law/tariffs-for-services-of-protection-against-extradition",
+    "https://status.law/challenging-sanctions",
+    "https://status.law/law-firm-contact-legal-protection"
+    "https://status.law/cross-border-banking-legal-issues",
+    "https://status.law/extradition-defense",
+    "https://status.law/international-prosecution-protection",
+    "https://status.law/interpol-red-notice-removal",
+    "https://status.law/practice-areas",
+    "https://status.law/reputation-protection",
+    "https://status.law/faq"
+]
+# Load secrets
+try:
+    GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
+except Exception as e:
+    st.error("Error loading secrets. Please check your configuration.")
+    st.stop()
+# Initialize models
+@st.cache_resource
+def init_models():
+    llm = ChatGroq(
+        model_name="llama-3.3-70b-versatile",
+        temperature=0.6,
+        api_key=GROQ_API_KEY
+    )
+    embeddings = HuggingFaceEmbeddings(
+        model_name="intfloat/multilingual-e5-large-instruct"
+    )
+    return llm, embeddings
+# Build knowledge base
+def build_knowledge_base(embeddings):
+    start_time = time.time()
+    documents = []
+    with st.status("Loading website content...") as status:
+        for url in urls:
+            try:
+                loader = WebBaseLoader(url)
+                docs = loader.load()
+                documents.extend(docs)
+                status.update(label=f"Loaded {url}")
+            except Exception as e:
+                st.error(f"Error loading {url}: {str(e)}")
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=100
+    )
+    chunks = text_splitter.split_documents(documents)
+    vector_store = FAISS.from_documents(chunks, embeddings)
+    vector_store.save_local(VECTOR_STORE_PATH)
+    end_time = time.time()
+    build_time = end_time - start_time
+    # Calculate knowledge base size
+    total_size = 0
+    for path, dirs, files in os.walk(VECTOR_STORE_PATH):
+        for f in files:
+            fp = os.path.join(path, f)
+            total_size += os.path.getsize(fp)
+    size_mb = total_size / (1024 * 1024)
+    # Save knowledge base info
+    st.session_state.kb_info['build_time'] = build_time
+    st.session_state.kb_info['size'] = size_mb
+    st.success(f"""
+    Knowledge base created successfully:
+    - Time taken: {build_time:.2f} seconds
+    - Size: {size_mb:.2f} MB
+    - Number of chunks: {len(chunks)}
+    """)
+    return vector_store
+# Main function
+def main():
+    # Initialize models
+    llm, embeddings = init_models()
+    # Check if knowledge base exists
+    if not os.path.exists(VECTOR_STORE_PATH):
+        st.warning("Knowledge base not found.")
+        if st.button("Create Knowledge Base"):
+            vector_store = build_knowledge_base(embeddings)
+            st.session_state.vector_store = vector_store
+            st.rerun()
+    else:
+        if 'vector_store' not in st.session_state:
+            st.session_state.vector_store = FAISS.load_local(
+                VECTOR_STORE_PATH,
+                embeddings,
+                allow_dangerous_deserialization=True
+            )
+    # Chat mode
+    if 'vector_store' in st.session_state:
+        if 'messages' not in st.session_state:
+            st.session_state.messages = []
+        # Display chat history
+        for message in st.session_state.messages:
+            st.chat_message("user").write(message["question"])
+            st.chat_message("assistant").write(message["answer"])
+        # User input
+        if question := st.chat_input("Ask your question"):
+            st.chat_message("user").write(question)
+            # Retrieve context and generate response
+            with st.chat_message("assistant"):
+                with st.spinner("Thinking..."):
+                    context = st.session_state.vector_store.similarity_search(question)
+                    context_text = "\n".join([doc.page_content for doc in context])
+                    prompt = PromptTemplate.from_template("""
+                    You are a helpful and polite legal assistant at Status Law.
+                    You answer in the language in which the question was asked.
+                    Answer the question based on the context provided.
+                    If you cannot answer based on the context, say so politely and offer to contact Status Law directly via the following channels:
+                    - For all users: +32465594521 (landline phone).
+                    - For English and Swedish speakers only: +46728495129 (available on WhatsApp, Telegram, Signal, IMO).
+                    - Provide a link to the contact form: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
+                    If the user has questions about specific services and their costs, suggest they visit the page https://status.law/tariffs-for-services-of-protection-against-extradition-and-international-prosecution/ for detailed information.
+                    Ask the user additional questions to understand which service to recommend and provide an estimated cost. For example, clarify their situation and needs to suggest the most appropriate options.
+                    Also, offer free consultations if they are available and suitable for the user's request.
+                    Answer professionally but in a friendly manner.
+                    Example:
+                    Q: How can I challenge the sanctions?
+                    A: To challenge the sanctions, you should consult with our legal team, who specialize in this area. Please contact us directly for detailed advice. You can fill out our contact form here: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
+                    Context: {context}
+                    Question: {question}
+                    """)
+                    chain = prompt | llm | StrOutputParser()
+                    response = chain.invoke({
+                        "context": context_text,
+                        "question": question
+                    })
+                    st.write(response)
+                    # В блоке генерации ответа (после st.write(response))
+                    log_interaction(question, response)
+                    # Save chat history
+                    st.session_state.messages.append({
+                        "question": question,
+                        "answer": response
+                    })
+if __name__ == "__main__":
+    main()

app.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import os
+import time
+import json
+import traceback
+from datetime import datetime
+import streamlit as st
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+# Initialize environment variables
+load_dotenv()
+# --------------- Enhanced Logging System ---------------
+def log_interaction(user_input: str, bot_response: str, context: str):
+    """Log user interactions with context and error handling"""
+    try:
+        log_entry = {
+            "timestamp": datetime.now().isoformat(),
+            "user_input": user_input,
+            "bot_response": bot_response,
+            "context": context,
+            "model": "llama-3.3-70b-versatile",
+            "kb_version": st.session_state.kb_info.get('version', '1.0')
+        }
+        os.makedirs("chat_history", exist_ok=True)
+        log_path = os.path.join("chat_history", "chat_logs.json")
+        # Atomic write operation with UTF-8 encoding
+        with open(log_path, "a", encoding="utf-8") as f:
+            f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
+    except Exception as e:
+        error_msg = f"Logging error: {str(e)}\n{traceback.format_exc()}"
+        print(error_msg)
+        st.error("Error saving interaction log. Please contact support.")
+# --------------- Page Configuration ---------------
+st.set_page_config(
+    page_title="Status Law Assistant",
+    page_icon="⚖️",
+    layout="wide",
+    menu_items={
+        'About': "### Legal AI Assistant powered by Status.Law"
+    }
+)
+# --------------- Knowledge Base Management ---------------
+VECTOR_STORE_PATH = "vector_store"
+URLS = [
+    "https://status.law",
+    "https://status.law/about",
+    "https://status.law/careers",
+    "https://status.law/tariffs-for-services-of-protection-against-extradition",
+    "https://status.law/challenging-sanctions",
+    "https://status.law/law-firm-contact-legal-protection"
+    "https://status.law/cross-border-banking-legal-issues",
+    "https://status.law/extradition-defense",
+    "https://status.law/international-prosecution-protection",
+    "https://status.law/interpol-red-notice-removal",
+    "https://status.law/practice-areas",
+    "https://status.law/reputation-protection",
+    "https://status.law/faq"
+]
+def init_models():
+    """Initialize AI models with caching"""
+    llm = ChatGroq(
+        model_name="llama-3.3-70b-versatile",
+        temperature=0.6,
+        api_key=os.getenv("GROQ_API_KEY")
+    )
+    embeddings = HuggingFaceEmbeddings(
+        model_name="intfloat/multilingual-e5-large-instruct"
+    )
+    return llm, embeddings
+def build_knowledge_base(embeddings):
+    """Create or update the vector knowledge base"""
+    start_time = time.time()
+    documents = []
+    with st.status("Building knowledge base..."):
+        for url in URLS:
+            try:
+                loader = WebBaseLoader(url)
+                documents.extend(loader.load())
+            except Exception as e:
+                st.error(f"Failed to load {url}: {str(e)}")
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=100
+    )
+    chunks = text_splitter.split_documents(documents)
+    vector_store = FAISS.from_documents(chunks, embeddings)
+    vector_store.save_local(VECTOR_STORE_PATH)
+    # Update version information
+    st.session_state.kb_info.update({
+        'build_time': time.time() - start_time,
+        'size': sum(os.path.getsize(f) for f in os.listdir(VECTOR_STORE_PATH)) / (1024 ** 2),
+        'version': datetime.now().strftime("%Y%m%d-%H%M%S")
+    })
+    return vector_store
+# --------------- Chat Interface ---------------
+def main():
+    llm, embeddings = init_models()
+    # Initialize or load knowledge base
+    if not os.path.exists(VECTOR_STORE_PATH):
+        if st.button("Initialize Knowledge Base"):
+            with st.spinner("Creating knowledge base..."):
+                st.session_state.vector_store = build_knowledge_base(embeddings)
+                st.rerun()
+        return
+    if 'vector_store' not in st.session_state:
+        st.session_state.vector_store = FAISS.load_local(
+            VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True
+        )
+    # Display chat history
+    if 'messages' not in st.session_state:
+        st.session_state.messages = []
+    for msg in st.session_state.messages:
+        st.chat_message(msg["role"]).write(msg["content"])
+    # Process user input
+    if user_input := st.chat_input("Ask your legal question"):
+        # Display user message
+        st.chat_message("user").write(user_input)
+        with st.chat_message("assistant"):
+            with st.spinner("Analyzing your question..."):
+                try:
+                    # Retrieve relevant context
+                    context_docs = st.session_state.vector_store.similarity_search(user_input)
+                    context_text = "\n".join(d.page_content for d in context_docs)
+                    # Generate response
+                    prompt_template = PromptTemplate.from_template("""
+                    You are a helpful and polite legal assistant at Status Law.
+                    You answer in the language in which the question was asked.
+                    Answer the question based on the context provided.
+                    If you cannot answer based on the context, say so politely and offer to contact Status Law directly via the following channels:
+                    - For all users: +32465594521 (landline phone).
+                    - For English and Swedish speakers only: +46728495129 (available on WhatsApp, Telegram, Signal, IMO).
+                    - Provide a link to the contact form: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
+                    If the user has questions about specific services and their costs, suggest they visit the page https://status.law/tariffs-for-services-of-protection-against-extradition-and-international-prosecution/ for detailed information.
+                    Ask the user additional questions to understand which service to recommend and provide an estimated cost. For example, clarify their situation and needs to suggest the most appropriate options.
+                    Also, offer free consultations if they are available and suitable for the user's request.
+                    Answer professionally but in a friendly manner.
+                    Example:
+                    Q: How can I challenge the sanctions?
+                    A: To challenge the sanctions, you should consult with our legal team, who specialize in this area. Please contact us directly for detailed advice. You can fill out our contact form here: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
+                    Context: {context}
+                    Question: {question}
+                    Response Guidelines:
+                    1. Answer in the user's language
+                    2. Cite sources when possible
+                    3. Offer contact options if unsure
+                    """)
+                    response = (prompt_template | llm | StrOutputParser()).invoke({
+                        "context": context_text,
+                        "question": user_input
+                    })
+                    # Display and log interaction
+                    st.write(response)
+                    log_interaction(user_input, response, context_text)
+                    st.session_state.messages.extend([
+                        {"role": "user", "content": user_input},
+                        {"role": "assistant", "content": response}
+                    ])
+                except Exception as e:
+                    error_msg = f"Processing error: {str(e)}\n{traceback.format_exc()}"
+                    st.error("Error processing request. Please try again.")
+                    print(error_msg)
+                    log_interaction(user_input, "SYSTEM_ERROR", context_text)
+if __name__ == "__main__":
+    main()

gitattributes ADDED Viewed

	@@ -0,0 +1,44 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+* text=auto eol=crlf
+*.bin binary
+.gitignore text eol=lf
+.gitattributes text eol=lf

gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+*.env
+venv
+.streamlit/secrets.toml

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+streamlit
+langchain-community
+langchain-core
+langchain-huggingface
+langchain-groq
+python-dotenv
+beautifulsoup4
+faiss-cpu
+requests
+langgraph
+langchain-anthropic
+fastapi
+uvicorn[standard]
+pydantic
+python-multipart
+pandas
+langchain

run.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/bin/bash
+# Запуск Streamlit и FastAPI параллельно
+streamlit run app.py &          # Запуск чат-бота
+uvicorn api.main:app --reload   # Запуск API для анализа логов