Spaces:
Build error
Build error
| import os | |
| from typing import List, Dict | |
| import chromadb | |
| from chromadb.utils import embedding_functions | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import TextLoader | |
| class RAGSystem: | |
| """ | |
| Retrieval-Augmented Generation system for providing documentation context. | |
| """ | |
| def __init__(self, collection_name="python_docs"): | |
| self.client = chromadb.PersistentClient(path="./chroma_db") | |
| # Use sentence transformers for embeddings | |
| self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction( | |
| model_name="all-MiniLM-L6-v2" | |
| ) | |
| # Get or create collection | |
| self.collection = self.client.get_or_create_collection( | |
| name=collection_name, | |
| embedding_function=self.embedding_function | |
| ) | |
| # Load default documents if collection is empty | |
| if self.collection.count() == 0: | |
| self._load_default_documents() | |
| def _load_default_documents(self): | |
| """Load default Python documentation.""" | |
| default_docs = [ | |
| { | |
| "id": "1", | |
| "text": "Python functions are defined using the def keyword. Example: def hello(): return 'Hello'", | |
| "metadata": {"source": "python_basics"} | |
| }, | |
| { | |
| "id": "2", | |
| "text": "Use type hints for better code documentation. Example: def add(a: int, b: int) -> int:", | |
| "metadata": {"source": "best_practices"} | |
| }, | |
| { | |
| "id": "3", | |
| "text": "Always handle exceptions with try-except blocks to prevent crashes.", | |
| "metadata": {"source": "error_handling"} | |
| }, | |
| { | |
| "id": "4", | |
| "text": "Use list comprehensions for concise list creation: [x*2 for x in range(10)]", | |
| "metadata": {"source": "python_tips"} | |
| }, | |
| { | |
| "id": "5", | |
| "text": "Document your code with docstrings. Use triple quotes for multi-line documentation.", | |
| "metadata": {"source": "documentation"} | |
| } | |
| ] | |
| # Add documents to collection | |
| self.collection.add( | |
| documents=[doc["text"] for doc in default_docs], | |
| metadatas=[doc["metadata"] for doc in default_docs], | |
| ids=[doc["id"] for doc in default_docs] | |
| ) | |
| def add_document(self, text: str, source: str = "user"): | |
| """Add a new document to the knowledge base.""" | |
| doc_id = f"doc_{self.collection.count() + 1}" | |
| self.collection.add( | |
| documents=[text], | |
| metadatas=[{"source": source}], | |
| ids=[doc_id] | |
| ) | |
| def search(self, query: str, n_results: int = 3) -> List[Dict]: | |
| """ | |
| Search for relevant documents. | |
| Args: | |
| query: Search query | |
| n_results: Number of results to return | |
| Returns: | |
| List of relevant documents | |
| """ | |
| results = self.collection.query( | |
| query_texts=[query], | |
| n_results=n_results | |
| ) | |
| documents = [] | |
| if results['documents']: | |
| for i, doc in enumerate(results['documents'][0]): | |
| documents.append({ | |
| "text": doc, | |
| "metadata": results['metadatas'][0][i], | |
| "distance": results['distances'][0][i] | |
| }) | |
| return documents | |
| def get_context(self, query: str) -> str: | |
| """ | |
| Get relevant context for a coding query. | |
| Args: | |
| query: Coding task or question | |
| Returns: | |
| Context string from relevant documents | |
| """ | |
| relevant_docs = self.search(query) | |
| if not relevant_docs: | |
| return "" | |
| # Combine top documents into context | |
| context_parts = ["Relevant documentation:"] | |
| for i, doc in enumerate(relevant_docs[:2]): # Use top 2 documents | |
| context_parts.append(f"{i+1}. {doc['text']}") | |
| return "\n".join(context_parts) |