|
from buffalo_rag.scraper.scraper import BuffaloScraper |
|
from buffalo_rag.embeddings.chunker import DocumentChunker |
|
from buffalo_rag.vector_store.db import VectorStore |
|
from buffalo_rag.model.rag import BuffaloRAG |
|
|
|
def run_scraper(seed_url: str, max_pages: int): |
|
"""Run the web scraper in the background.""" |
|
scraper = BuffaloScraper(seed_url=seed_url) |
|
scraper.scrape(max_pages=max_pages) |
|
|
|
|
|
chunker = DocumentChunker() |
|
chunks = chunker.create_chunks() |
|
chunker.create_embeddings(chunks) |
|
|
|
|
|
global vector_store |
|
vector_store = VectorStore() |
|
|
|
|
|
global rag |
|
rag = BuffaloRAG(vector_store=vector_store) |
|
|
|
def refresh_index(): |
|
"""Refresh the vector index in the background.""" |
|
chunker = DocumentChunker() |
|
chunks = chunker.create_chunks() |
|
chunker.create_embeddings(chunks) |
|
|
|
|
|
global vector_store |
|
vector_store = VectorStore() |
|
|
|
|
|
global rag |
|
rag = BuffaloRAG(vector_store=vector_store) |