File size: 4,224 Bytes
d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 1f7bf1a d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 2b03f76 d8f06d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import os
import argparse
import subprocess
import time
import requests
# Port configurations
API_PORT = 8000
FRONTEND_PORT = 7860
MAX_PAGES = 1000 # Max pages to be Scrapped
def run_scraper():
"""Run the web scraper to collect data."""
from buffalo_rag.scraper.scraper import BuffaloScraper
if os.path.exists("data/raw"):
num_pages = len([name for name in os.listdir("data/raw") if os.path.isfile(os.path.join("data/raw", name))])
if num_pages > MAX_PAGES:
print(f"{num_pages} scraped data files found under data/raw. Skipping data scraper stage.")
else:
print("Starting web scraper...")
scraper = BuffaloScraper()
scraper.scrape(max_pages=MAX_PAGES)
print("Scraping completed!")
def build_embeddings():
"""Process documents and create embeddings."""
from buffalo_rag.embeddings.chunker import DocumentChunker
print("Creating document chunks and embeddings...")
chunker = DocumentChunker()
chunks = chunker.create_chunks()
chunker.create_embeddings(chunks)
print("Embeddings created!")
def start_api_server():
"""Start the FastAPI backend server."""
print("Starting API server...")
subprocess.run(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", str(API_PORT), "--reload"])
def start_flask_frontend():
"""Start the Flask frontend."""
print("Starting Flask frontend...")
subprocess.run(["flask", "run", "--host=0.0.0.0", f"--port={FRONTEND_PORT}"],
env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})
def wait_for_server(url, timeout=30, interval=1):
"""Waits for a server at the given URL to be reachable."""
start_time = time.time()
print(f"Waiting for server at {url} to be ready...")
while time.time() - start_time < timeout:
try:
response = requests.get(url, timeout=interval)
if response.status_code < 500:
print(f"Server at {url} is ready.")
return True
except requests.exceptions.RequestException:
pass
time.sleep(interval)
print(f"Timeout waiting for server at {url}.")
return False
def main():
parser = argparse.ArgumentParser(description="BuffaloRAG - AI Assistant for UB International Students")
parser.add_argument("--scrape", action="store_true", help="Run web scraper")
parser.add_argument("--build", action="store_true", help="Build embeddings")
parser.add_argument("--api", action="store_true", help="Run API server")
parser.add_argument("--frontend", action="store_true", help="Run Flask frontend")
parser.add_argument("--all", action="store_true", help="Run the complete pipeline")
parser.add_argument("--run", action="store_true", help="Run frontend & backend servers")
args = parser.parse_args()
if args.scrape or args.all:
run_scraper()
if args.build or args.all:
build_embeddings()
if args.api or args.all or args.run:
if args.all or args.run:
api_process = subprocess.Popen(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", str(API_PORT), "--reload"])
api_ready = wait_for_server(f'http://localhost:{API_PORT}/', timeout=60)
if api_ready:
time.sleep(3)
flask_process = subprocess.Popen(["flask", "run", "--host=0.0.0.0", f"--port={FRONTEND_PORT}"],
env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})
import webbrowser
webbrowser.open(f'http://localhost:{FRONTEND_PORT}')
input("Press Enter to stop the server and exit...\n")
if api_process.poll() is None:
api_process.terminate()
if flask_process.poll() is None:
flask_process.terminate()
if api_process.poll() is None:
api_process.terminate()
else:
start_api_server()
if args.frontend:
start_flask_frontend()
if __name__ == "__main__":
main() |