|
import os |
|
import argparse |
|
import subprocess |
|
import time |
|
import requests |
|
|
|
def run_scraper(): |
|
"""Run the web scraper to collect data.""" |
|
from buffalo_rag.scraper.scraper import BuffaloScraper |
|
|
|
if os.path.exists("data/raw"): |
|
num_pages = len([name for name in os.listdir("data/raw") if os.path.isfile(os.path.join("data/raw", name))]) |
|
if num_pages > 100: |
|
print(f"{num_pages} scraped data files found under data/raw. Skipping data scraper stage.") |
|
else: |
|
print("Starting web scraper...") |
|
scraper = BuffaloScraper() |
|
scraper.scrape(max_pages=100) |
|
print("Scraping completed!") |
|
|
|
def build_embeddings(): |
|
"""Process documents and create embeddings.""" |
|
from buffalo_rag.embeddings.chunker import DocumentChunker |
|
|
|
print("Creating document chunks and embeddings...") |
|
chunker = DocumentChunker() |
|
chunks = chunker.create_chunks() |
|
chunker.create_embeddings(chunks) |
|
print("Embeddings created!") |
|
|
|
def run_api(): |
|
"""Run the FastAPI backend server.""" |
|
print("Starting API server...") |
|
subprocess.run(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]) |
|
|
|
def run_flask_frontend(): |
|
"""Run the Flask frontend.""" |
|
print("Starting Flask frontend...") |
|
subprocess.run(["flask", "run", "--host=0.0.0.0", "--port=7860"], |
|
env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"}) |
|
|
|
def create_project_structure(): |
|
"""Create the project folder structure.""" |
|
|
|
directories = [ |
|
"buffalo_rag", |
|
"buffalo_rag/scraper", |
|
"buffalo_rag/embeddings", |
|
"buffalo_rag/vector_store", |
|
"buffalo_rag/model", |
|
"buffalo_rag/api", |
|
"buffalo_rag/frontend", |
|
"buffalo_rag/utils", |
|
"data", |
|
"data/raw", |
|
"data/processed", |
|
"data/embeddings", |
|
] |
|
|
|
|
|
for directory in directories: |
|
os.makedirs(directory, exist_ok=True) |
|
|
|
print("Project structure created!") |
|
|
|
def build_react_frontend(): |
|
"""Build the React frontend.""" |
|
print("Building React frontend...") |
|
subprocess.run(["npm", "run", "build"], cwd="frontend") |
|
|
|
|
|
subprocess.run(["cp", "-r", "frontend/build/*", "buffalo_rag/api/static/"]) |
|
print("React frontend built successfully!") |
|
|
|
def setup_react_frontend(): |
|
"""Install dependencies and build the React frontend.""" |
|
print("Setting up React frontend...") |
|
if not os.path.exists("frontend"): |
|
|
|
subprocess.run(["bash", "setup_frontend.sh"]) |
|
|
|
|
|
subprocess.run(["npm", "install"], cwd="frontend") |
|
|
|
|
|
subprocess.run(["npm", "run", "build"], cwd="frontend") |
|
|
|
|
|
static_dir = os.path.join("buffalo_rag", "api", "static") |
|
os.makedirs(static_dir, exist_ok=True) |
|
|
|
|
|
if os.path.exists("frontend/build"): |
|
|
|
subprocess.run(["cp", "-r", "frontend/build/.", static_dir]) |
|
else: |
|
print("Frontend build directory not found. Please build the frontend manually.") |
|
|
|
print("React frontend setup completed!") |
|
|
|
def wait_for_server(url, timeout=30, interval=1): |
|
"""Waits for a server at the given URL to be reachable.""" |
|
start_time = time.time() |
|
print(f"Waiting for server at {url} to be ready...") |
|
while time.time() - start_time < timeout: |
|
try: |
|
|
|
response = requests.get(url, timeout=interval) |
|
if response.status_code < 500: |
|
print(f"Server at {url} is ready.") |
|
return True |
|
except requests.exceptions.RequestException: |
|
|
|
pass |
|
time.sleep(interval) |
|
print(f"Timeout waiting for server at {url}.") |
|
return False |
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description="BuffaloRAG - AI Assistant for UB International Students") |
|
parser.add_argument("--setup", action="store_true", help="Create project structure") |
|
parser.add_argument("--flask-setup", action="store_true", help="Setup Flask frontend") |
|
parser.add_argument("--scrape", action="store_true", help="Run web scraper") |
|
parser.add_argument("--build", action="store_true", help="Build embeddings") |
|
parser.add_argument("--api", action="store_true", help="Run API server") |
|
parser.add_argument("--frontend", action="store_true", help="Run Flask frontend") |
|
parser.add_argument("--all", action="store_true", help="Run the complete pipeline") |
|
parser.add_argument("--run", action="store_true", help="Run frontend & backend servers") |
|
|
|
args = parser.parse_args() |
|
|
|
if args.setup or args.all: |
|
create_project_structure() |
|
|
|
if args.flask_setup or args.all or args.run: |
|
|
|
from setup_flask_templates import setup_flask_templates |
|
setup_flask_templates() |
|
|
|
if args.scrape or args.all: |
|
run_scraper() |
|
|
|
if args.build or args.all: |
|
build_embeddings() |
|
|
|
if args.api or args.all or args.run: |
|
|
|
if args.all or args.run: |
|
|
|
api_process = subprocess.Popen(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]) |
|
|
|
api_ready = wait_for_server('http://localhost:8000/', timeout=60) |
|
|
|
if api_ready: |
|
time.sleep(3) |
|
|
|
flask_process = subprocess.Popen(["flask", "run", "--host=0.0.0.0", "--port=7860"], |
|
env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"}) |
|
|
|
|
|
import webbrowser |
|
webbrowser.open('http://localhost:5000') |
|
|
|
|
|
input("Press Enter to stop the server and exit...\n") |
|
if api_process.poll() is None: |
|
api_process.terminate() |
|
if flask_process.poll() is None: |
|
flask_process.terminate() |
|
|
|
if api_process.poll() is None: |
|
api_process.terminate() |
|
else: |
|
run_api() |
|
|
|
if args.frontend: |
|
run_flask_frontend() |
|
|
|
if __name__ == "__main__": |
|
main() |