UB_VSA / buffalo_rag /api /background_tasks.py
SakshamLak's picture
Upload 2 files
867fb31 verified
from buffalo_rag.scraper.scraper import BuffaloScraper
from buffalo_rag.embeddings.chunker import DocumentChunker
from buffalo_rag.vector_store.db import VectorStore
from buffalo_rag.model.rag import BuffaloRAG
def run_scraper(seed_url: str, max_pages: int):
"""Run the web scraper in the background."""
scraper = BuffaloScraper(seed_url=seed_url)
scraper.scrape(max_pages=max_pages)
# After scraping, update the embeddings and index
chunker = DocumentChunker()
chunks = chunker.create_chunks()
chunker.create_embeddings(chunks)
# Reload the vector store
global vector_store
vector_store = VectorStore()
# Update the RAG model
global rag
rag = BuffaloRAG(vector_store=vector_store)
def refresh_index():
"""Refresh the vector index in the background."""
chunker = DocumentChunker()
chunks = chunker.create_chunks()
chunker.create_embeddings(chunks)
# Reload the vector store
global vector_store
vector_store = VectorStore()
# Update the RAG model
global rag
rag = BuffaloRAG(vector_store=vector_store)