Spaces:

Pujan-Dev
/

AI_API

Sleeping

App Files Files Community

Sangyog commited on Aug 5, 2025

Commit

311d019

unverified ·

2 Parent(s): 72b7684 b0dd1a2

Merge pull request #22 from cyberalertnepal/sangyog

Browse files

Files changed (10) hide show

.env-example +32 -0
.gitignore +3 -0
README.md +3 -1
app.py +3 -0
features/rag_chatbot/__init__.py +0 -0
features/rag_chatbot/controller.py +182 -0
features/rag_chatbot/document_handler.py +37 -0
features/rag_chatbot/rag_pipeline.py +327 -0
features/rag_chatbot/routes.py +111 -0
requirements.txt +10 -0

.env-example CHANGED Viewed

	@@ -1,2 +1,34 @@
1	MY_SECRET_TOKEN="SECRET_CODE_TOKEN"
2

 MY_SECRET_TOKEN="SECRET_CODE_TOKEN"
+# CHROMA_HOST = "localhost" (Host gareko address rakhney)
+# EXAMPLE CONFIGURATIONS FOR DIFFERENT PROVIDERS(Use only one at once)
+# ===========================================
+# FOR OPENAI:(PAID)
+# LLM_PROVIDER=openai
+# LLM_API_KEY=sk-your-openai-api-key
+# LLM_MODEL=gpt-3.5-turbo
+# # Other options: gpt-4, gpt-4-turbo-preview, etc.
+# FOR GROQ:(FREE: BABAL XA-> prefer this)
+# LLM_PROVIDER=groq
+# LLM_API_KEY=gsk_your-groq-api-key
+# LLM_MODEL=llama-3.3-70b-versatile
+# # Other options: llama-3.1-70b-versatile, mixtral-8x7b-32768, etc.
+# FOR OPENROUTER:(FREE: LASTAI RATE LIMIT LAGAUXA)
+# LLM_PROVIDER=openrouter
+# LLM_API_KEY=sk-or-your-openrouter-api-key
+# LLM_MODEL=meta-llama/llama-3.1-8b-instruct:free
+# # Other options: anthropic/claude-3-haiku, google/gemma-7b-it, etc.
+# ===========================================
+# ADVANCED CONFIGURATION
+# ===========================================
+# Temperature (0.0 to 1.0) - controls randomness
+# LLM_TEMPERATURE=0.1
+# Maximum tokens for response
+# LLM_MAX_TOKENS=4096

.gitignore CHANGED Viewed

@@ -66,3 +66,6 @@ notebooks
 np_text_model/classifier/sentencepiece.bpe.model
 np_text_model/classifier/tokenizer.json

 np_text_model/classifier/sentencepiece.bpe.model
 np_text_model/classifier/tokenizer.json
+# vector database
+chroma_data
+chroma_database

README.md CHANGED Viewed

@@ -119,7 +119,9 @@ AI-Checker/
 2. **Run the API**
    ```bash
-   uvicorn app:app --reload
    ```
 3. **Build Docker (optional)**

 2. **Run the API**
    ```bash
+   chroma run --path ./chroma_database ## to run chromadb locally
+   uvicorn app:app --reload --port 8001 ## fastapi (run after chromadb)
    ```
 3. **Build Docker (optional)**

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from features.nepali_text_classifier.routes import (
 )
 from features.image_classifier.routes import router as image_classifier_router
 from features.image_edit_detector.routes import router as image_edit_detector_router
 from fastapi.staticfiles import StaticFiles
 from config import ACCESS_RATE
@@ -41,6 +42,8 @@ app.include_router(text_classifier_router, prefix="/text")
 app.include_router(nepali_text_classifier_router, prefix="/NP")
 app.include_router(image_classifier_router, prefix="/AI-image")
 app.include_router(image_edit_detector_router, prefix="/detect")
 @app.get("/")

 )
 from features.image_classifier.routes import router as image_classifier_router
 from features.image_edit_detector.routes import router as image_edit_detector_router
+from features.rag_chatbot.routes import router as rag_router
 from fastapi.staticfiles import StaticFiles
 from config import ACCESS_RATE
 app.include_router(nepali_text_classifier_router, prefix="/NP")
 app.include_router(image_classifier_router, prefix="/AI-image")
 app.include_router(image_edit_detector_router, prefix="/detect")
+app.include_router(rag_router, prefix="/rag")
 @app.get("/")

features/rag_chatbot/__init__.py ADDED Viewed

File without changes

features/rag_chatbot/controller.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import os
+import asyncio
+import logging
+from io import BytesIO
+from typing import Dict, Any
+from fastapi import HTTPException, UploadFile, status, Depends
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from .rag_pipeline import route_and_process_query, add_document_to_rag, check_system_health
+from .document_handler import extract_text_from_file
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+security = HTTPBearer()
+# Supported file types
+SUPPORTED_CONTENT_TYPES = {
+    "application/pdf",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "text/plain"
+}
+MAX_FILE_SIZE = 100 * 1024 * 1024  # 100MB
+async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    """Verify Bearer token from Authorization header."""
+    token = credentials.credentials
+    expected_token = os.getenv("MY_SECRET_TOKEN")
+    if not expected_token:
+        logger.error("MY_SECRET_TOKEN not configured")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Server configuration error"
+        )
+    if token != expected_token:
+        logger.warning(f"Invalid token attempt: {token[:10]}...")
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Invalid or expired token"
+        )
+    return token
+async def handle_rag_query(query: str) -> Dict[str, Any]:
+    """Handle an incoming query by routing it and getting the appropriate answer."""
+    # Input validation
+    if not query or not query.strip():
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Query cannot be empty"
+        )
+    if len(query) > 1000:  # Reasonable limit
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Query too long. Please limit to 1000 characters."
+        )
+    try:
+        logger.info(f"Processing query: {query[:50]}...")
+        # Process query in thread pool
+        response = await asyncio.to_thread(route_and_process_query, query)
+        logger.info(f"Query processed successfully. Route: {response.get('route', 'Unknown')}")
+        return response
+    except Exception as e:
+        logger.error(f"Error processing query: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Error processing your query. Please try again."
+        )
+async def handle_document_upload(file: UploadFile) -> Dict[str, str]:
+    """Handle uploading a document to the RAG's vector store."""
+    # File validation
+    if not file.filename:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="No file provided"
+        )
+    if file.content_type not in SUPPORTED_CONTENT_TYPES:
+        raise HTTPException(
+            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
+            detail=f"Unsupported file type: {file.content_type}. "
+                   f"Supported types: {', '.join(SUPPORTED_CONTENT_TYPES)}"
+        )
+    # Check file size
+    contents = await file.read()
+    if len(contents) > MAX_FILE_SIZE:
+        raise HTTPException(
+            status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
+            detail=f"File too large. Maximum size: {MAX_FILE_SIZE / (1024*1024):.1f}MB"
+        )
+    # Reset file pointer
+    await file.seek(0)
+    try:
+        logger.info(f"Processing file upload: {file.filename}")
+        # Extract text from file
+        text = await extract_text_from_file(file)
+        if not text or not text.strip():
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="The file appears to be empty or could not be read."
+            )
+        if len(text) < 50:  # Too short to be meaningful
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="The extracted text is too short to be meaningful."
+            )
+        # Add to RAG system
+        success = await asyncio.to_thread(
+            add_document_to_rag,
+            text,
+            {
+                "source": file.filename,
+                "content_type": file.content_type,
+                "size": len(contents)
+            }
+        )
+        if not success:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Failed to add document to the knowledge base"
+            )
+        logger.info(f"Successfully processed file: {file.filename}")
+        return {
+            "message": f"Successfully uploaded and processed '{file.filename}'. "
+                      f"It is now available for querying.",
+            "filename": file.filename,
+            "text_length": len(text),
+            "content_type": file.content_type
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error processing file {file.filename}: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Error processing the file. Please try again."
+        )
+async def handle_health_check() -> Dict[str, Any]:
+    """Handle health check requests."""
+    try:
+        health_status = await asyncio.to_thread(check_system_health)
+        if health_status["status"] == "unhealthy":
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+                detail="Service is currently unhealthy"
+            )
+        return health_status
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Health check failed: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Health check failed"
+        )

features/rag_chatbot/document_handler.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from io import BytesIO
+from fastapi import UploadFile, HTTPException
+import PyPDF2
+import docx
+async def extract_text_from_file(file: UploadFile) -> str:
+    """Extracts text from various file types."""
+    content = await file.read()
+    file_stream = BytesIO(content)
+    if file.content_type == "application/pdf":
+        return extract_text_from_pdf(file_stream)
+    elif file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        return extract_text_from_docx(file_stream)
+    elif file.content_type == "text/plain":
+        return file_stream.read().decode("utf-8")
+    else:
+        raise HTTPException(
+            status_code=415,
+            detail="Unsupported file type. Please upload a .pdf, .docx, or .txt file."
+        )
+def extract_text_from_pdf(file_stream: BytesIO) -> str:
+    """Extracts text from a PDF file."""
+    reader = PyPDF2.PdfReader(file_stream)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text() or ""
+    return text
+def extract_text_from_docx(file_stream: BytesIO) -> str:
+    """Extracts text from a DOCX file."""
+    doc = docx.Document(file_stream)
+    text = ""
+    for para in doc.paragraphs:
+        text += para.text + "\n"
+    return text

features/rag_chatbot/rag_pipeline.py ADDED Viewed

	@@ -0,0 +1,327 @@

+import os
+import chromadb
+from dotenv import load_dotenv
+from langchain_core.documents import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.llms import OpenAI
+from langchain.chains.question_answering import load_qa_chain
+from langchain_community.vectorstores import Chroma
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.chat_models import ChatOpenAI
+load_dotenv()
+# ChromaDB configuration
+CHROMA_HOST = os.getenv("CHROMA_HOST", "localhost") # change in env in production when hosted
+COLLECTION_NAME = "company_docs_collection"
+# LLM Provider Configuration
+LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai").lower()
+LLM_API_KEY = os.getenv("LLM_API_KEY")
+LLM_MODEL = os.getenv("LLM_MODEL", "gpt-3.5-turbo")
+LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0"))
+LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "2048"))
+# Provider-specific configurations
+PROVIDER_CONFIGS = {
+    "openai": {
+        "api_base": "https://api.openai.com/v1",
+        "default_model": "gpt-3.5-turbo"
+    },
+    "groq": {
+        "api_base": "https://api.groq.com/openai/v1",
+        "default_model": "llama-3.3-70b-versatile"
+    },
+    "openrouter": {
+        "api_base": "https://openrouter.ai/api/v1",
+        "default_model": "mistralai/mistral-small-3.2-24b-instruct:free"
+    }
+}
+vector_store = None
+company_qa_chain = None
+query_router_chain = None
+cybersecurity_chain = None
+llm = None
+def get_llm_config():
+    """Get the appropriate LLM configuration based on the provider."""
+    if LLM_PROVIDER not in PROVIDER_CONFIGS:
+        raise ValueError(f"Unsupported LLM provider: {LLM_PROVIDER}. Supported: {list(PROVIDER_CONFIGS.keys())}")
+    config = PROVIDER_CONFIGS[LLM_PROVIDER].copy()
+    # Use provided model or fall back to default
+    model = LLM_MODEL if LLM_MODEL != "gpt-3.5-turbo" else config["default_model"]
+    return {
+        "model": model,
+        "openai_api_key": LLM_API_KEY,
+        "openai_api_base": config["api_base"],
+        "temperature": LLM_TEMPERATURE,
+        "max_tokens": LLM_MAX_TOKENS,
+    }
+def initialize_llm():
+    """Initialize the LLM based on the configured provider."""
+    if not LLM_API_KEY:
+        raise ValueError(f"LLM_API_KEY environment variable is required for {LLM_PROVIDER}")
+    config = get_llm_config()
+    print(f"Initializing {LLM_PROVIDER.upper()} with model: {config['model']}")
+    return ChatOpenAI(**config)
+def initialize_pipelines():
+    """Initializes all required models, chains, and the vector store."""
+    global vector_store, company_qa_chain, query_router_chain, cybersecurity_chain, llm
+    try:
+        # Initialize LLM
+        llm = initialize_llm()
+        # Initialize embeddings
+        embeddings = HuggingFaceEmbeddings(
+            model_name="all-MiniLM-L6-v2",
+            model_kwargs={'device': 'cpu'},
+            encode_kwargs={'normalize_embeddings': True}
+        )
+        # Initialize ChromaDB client
+        try:
+            chroma_client = chromadb.HttpClient(host=CHROMA_HOST, port=8000)
+            chroma_client.heartbeat()
+        except Exception as e:
+            raise ConnectionError("Failed to connect to ChromaDB.") from e
+        # Initialize vector store
+        vector_store = Chroma(
+            client=chroma_client,
+            collection_name=COLLECTION_NAME,
+            embedding_function=embeddings,
+        )
+        # Query Router Chain
+        router_template = """You are a query classifier. Classify the following query into one of these categories:
+- COMPANY: Questions about our company, its products, services, or general information
+- CYBERSECURITY: Questions about cybersecurity, security threats, best practices, or vulnerabilities
+- OFF_TOPIC: Questions that don't fit the above categories
+Query: {query}
+Respond with only the category name (COMPANY, CYBERSECURITY, or OFF_TOPIC):"""
+        router_prompt = PromptTemplate(
+            input_variables=["query"],
+            template=router_template
+        )
+        query_router_chain = LLMChain(
+            llm=llm,
+            prompt=router_prompt
+        )
+        # Custom Company QA Chain
+        company_qa_template = """You are a helpful assistant for CyberAlertNepal. Answer the following question about our company using the information provided and links if only available. Give a natural, direct and polite response.
+Question: {question}
+Information:
+{context}
+Answer:"""
+        company_qa_prompt = PromptTemplate(
+            input_variables=["question", "context"],
+            template=company_qa_template
+        )
+        company_qa_chain = LLMChain(
+            llm=llm,
+            prompt=company_qa_prompt
+        )
+        # Cybersecurity Chain
+        cybersecurity_template = """You are a cybersecurity professional. Answer the following question truthfully and concisely.
+If you are not 100% sure about the answer, simply respond with: "I am not sure about the answer."
+Do not add extra explanations or assumptions. Do not provide false or speculative information.
+Question: {question}
+Provide a comprehensive and accurate answer about cybersecurity:"""
+        cybersecurity_prompt = PromptTemplate(
+            input_variables=["question"],
+            template=cybersecurity_template
+        )
+        cybersecurity_chain = LLMChain(
+            llm=llm,
+            prompt=cybersecurity_prompt
+        )
+        print(f"Successfully initialized pipelines with {LLM_PROVIDER.upper()}")
+    except Exception as e:
+        print(f"Error initializing pipelines: {e}")
+        raise
+def add_document_to_rag(text: str, metadata: dict):
+    """Splits a document and adds it to the ChromaDB index."""
+    global vector_store
+    if not vector_store:
+        initialize_pipelines()
+    try:
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200
+        )
+        docs = text_splitter.create_documents([text], metadatas=[metadata])
+        if not docs:
+            print("Document was empty after splitting, not adding to ChromaDB.")
+            return False
+        vector_store.add_documents(docs)
+        print("Successfully added documents.")
+        return True
+    except Exception as e:
+        print(f"Error adding document to RAG: {e}")
+        return False
+def route_and_process_query(query: str):
+    """Routes the query and processes it using the appropriate pipeline."""
+    global query_router_chain, vector_store, company_qa_chain, cybersecurity_chain
+    if not all([query_router_chain, vector_store, company_qa_chain, cybersecurity_chain]):
+        initialize_pipelines()
+    try:
+        # 1. Classify the query
+        route_result = query_router_chain.run(query)
+        route = route_result.strip().upper()
+        # 2. Route to appropriate logic
+        if "CYBERSECURITY" in route:
+            answer = cybersecurity_chain.run(question=query)
+            return {
+                "answer": answer,
+                "source": "Cybersecurity Knowledge Base",
+                "route": "CYBERSECURITY",
+                "provider": LLM_PROVIDER.upper(),
+                "model": get_llm_config()["model"]
+            }
+        elif "COMPANY" in route:
+            # Perform similarity search on ChromaDB
+            docs = vector_store.similarity_search(query, k=3)
+            if not docs:
+                return {
+                    "answer": "I could not find any relevant information to answer your question.",
+                    "source": "Company Documents",
+                    "route": "COMPANY",
+                    "provider": LLM_PROVIDER.upper(),
+                    "model": get_llm_config()["model"]
+                }
+            # Combine document content for context
+            context = "\n\n".join([doc.page_content for doc in docs])
+            # Run the custom QA chain
+            answer = company_qa_chain.run(question=query, context=context)
+            sources = list(set([doc.metadata.get("source", "Unknown") for doc in docs]))
+            return {
+                "answer": answer,
+                "source": "Company Documents",
+                "documents": sources,
+                "route": "COMPANY",
+                "provider": LLM_PROVIDER.upper(),
+                "model": get_llm_config()["model"]
+            }
+        else:  # OFF_TOPIC
+            return {
+                "answer": "I am a specialized assistant of CyberAlertNepal. I cannot answer questions outside of cybersecurity topics.",
+                "source": "N/A",
+                "route": "OFF_TOPIC",
+                "provider": LLM_PROVIDER.upper(),
+                "model": get_llm_config()["model"]
+            }
+    except Exception as e:
+        print(f"Error processing query: {e}")
+        return {
+            "answer": "I encountered an error while processing your query. Please try again.",
+            "source": "Error",
+            "route": None,
+            "documents": None,
+            "provider": LLM_PROVIDER.upper(),
+            "error": str(e)
+        }
+def check_system_health():
+    """Check if all components are properly initialized."""
+    try:
+        # Test ChromaDB connection
+        if vector_store:
+            vector_store._client.heartbeat()
+        # Test if all chains are initialized
+        components = {
+            "vector_store": vector_store is not None,
+            "company_qa_chain": company_qa_chain is not None,
+            "query_router_chain": query_router_chain is not None,
+            "cybersecurity_chain": cybersecurity_chain is not None,
+            "llm": llm is not None
+        }
+        return {
+            "status": "healthy" if all(components.values()) else "unhealthy",
+            "components": components,
+            "provider": LLM_PROVIDER.upper(),
+            "model": get_llm_config()["model"] if llm else "Not initialized"
+        }
+    except Exception as e:
+        return {
+            "status": "unhealthy",
+            "error": str(e),
+            "provider": LLM_PROVIDER.upper()
+        }
+def test_llm_connection():
+    """Test the LLM API connection."""
+    try:
+        if not llm:
+            initialize_pipelines()
+        # Simple test query
+        test_response = llm("Say 'Hello, LLM is working!'")
+        return {
+            "success": True,
+            "provider": LLM_PROVIDER.upper(),
+            "model": get_llm_config()["model"],
+            "response": str(test_response)
+        }
+    except Exception as e:
+        return {
+            "success": False,
+            "provider": LLM_PROVIDER.upper(),
+            "error": str(e)
+        }
+# Initialize pipelines on module import
+try:
+    initialize_pipelines()
+except Exception as e:
+    print(f"Failed to initialize pipelines on startup: {e}")

features/rag_chatbot/routes.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Request
+from fastapi.security import HTTPBearer
+from pydantic import BaseModel, Field
+from slowapi.util import get_remote_address
+from slowapi import Limiter
+from typing import Optional
+from config import ACCESS_RATE
+from .controller import (
+    handle_rag_query,
+    handle_document_upload,
+    handle_health_check,
+    verify_token,
+)
+limiter = Limiter(key_func=get_remote_address)
+router = APIRouter(prefix="/rag", tags=["RAG Chatbot"])
+security = HTTPBearer()
+class QueryInput(BaseModel):
+    query: str = Field(..., min_length=1, max_length=1000, description="The question to ask")
+class QueryResponse(BaseModel):
+    answer: str
+    source: str
+    route: Optional[str] = None
+    documents: Optional[list] = None
+    error: Optional[str] = None
+class UploadResponse(BaseModel):
+    message: str
+    filename: str
+    text_length: int
+    content_type: str
+class HealthResponse(BaseModel):
+    status: str
+    components: Optional[dict] = None
+    error: Optional[str] = None
+@router.post("/question", response_model=QueryResponse)
+@limiter.limit(ACCESS_RATE)
+async def ask_question(
+    request: Request,
+    data: QueryInput,
+    token: str = Depends(verify_token)
+) -> QueryResponse:
+    """
+    Ask a question to the RAG chatbot.
+    The chatbot can answer:
+    - Company-related questions (based on uploaded documents)
+    - Cybersecurity questions (from knowledge base)
+    """
+    response = await handle_rag_query(data.query)
+    return QueryResponse(**response)
+@router.post("/upload", response_model=UploadResponse)
+@limiter.limit(ACCESS_RATE)
+async def upload_document(
+    request: Request,
+    file: UploadFile = File(..., description="Document file (PDF, DOCX, or TXT)"),
+    token: str = Depends(verify_token)
+) -> UploadResponse:
+    """
+    Upload a document to the company knowledge base.
+    Supported formats:
+    - PDF (.pdf)
+    - Word documents (.docx)
+    - Plain text (.txt)
+    Maximum file size: 10MB
+    """
+    response = await handle_document_upload(file)
+    return UploadResponse(**response)
+@router.get("/health", response_model=HealthResponse)
+@limiter.limit(ACCESS_RATE)
+async def health_check(request: Request) -> HealthResponse:
+    """
+    Check the health status of the RAG system.
+    Returns the status of all components:
+    - ChromaDB connection
+    - Vector store
+    - AI chains
+    """
+    response = await handle_health_check()
+    return HealthResponse(**response)
+@router.get("/info")
+@limiter.limit(ACCESS_RATE)
+async def get_system_info(request: Request):
+    """Get information about the RAG system capabilities."""
+    return {
+        "name": "RAG Chatbot",
+        "version": "1.0.0",
+        "description": "A specialized chatbot for cybersecurity and company-related questions",
+        "capabilities": [
+            "Company document Q&A (based on uploaded documents)",
+            "Cybersecurity knowledge and best practices",
+            "Document upload and processing (PDF, DOCX, TXT)"
+        ],
+        "supported_file_types": [
+            "application/pdf",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "text/plain"
+        ],
+        "max_file_size_mb": 10,
+        "max_query_length": 1000
+    }

requirements.txt CHANGED Viewed

@@ -18,3 +18,13 @@ scipy
 fitz
 frontend
 tools

 fitz
 frontend
 tools
+langchain
+langchain-community
+langchain-openai
+faiss-cpu
+PyPDF2
+tiktoken
+chromadb
+langchain_chroma
+sentence-transformers
+tf-keras