File size: 4,224 Bytes
d8f06d4
 
 
 
 
 
2b03f76
 
 
 
 
d8f06d4
 
 
 
 
 
2b03f76
d8f06d4
 
 
 
2b03f76
d8f06d4
 
 
 
 
 
 
 
 
 
 
 
2b03f76
 
d8f06d4
2b03f76
d8f06d4
2b03f76
 
d8f06d4
2b03f76
d8f06d4
 
 
 
 
 
 
 
 
2b03f76
d8f06d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f7bf1a
d8f06d4
 
 
 
 
 
 
 
2b03f76
 
d8f06d4
 
 
2b03f76
d8f06d4
 
 
2b03f76
d8f06d4
 
 
 
 
 
 
 
 
 
2b03f76
d8f06d4
 
2b03f76
d8f06d4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import argparse
import subprocess
import time
import requests

# Port configurations
API_PORT = 8000
FRONTEND_PORT = 7860
MAX_PAGES = 1000 # Max pages to be Scrapped

def run_scraper():
    """Run the web scraper to collect data."""
    from buffalo_rag.scraper.scraper import BuffaloScraper
    
    if os.path.exists("data/raw"):
        num_pages = len([name for name in os.listdir("data/raw") if os.path.isfile(os.path.join("data/raw", name))])
        if num_pages > MAX_PAGES:
            print(f"{num_pages} scraped data files found under data/raw. Skipping data scraper stage.")
        else:
            print("Starting web scraper...")
            scraper = BuffaloScraper()
            scraper.scrape(max_pages=MAX_PAGES)
            print("Scraping completed!")

def build_embeddings():
    """Process documents and create embeddings."""
    from buffalo_rag.embeddings.chunker import DocumentChunker
    
    print("Creating document chunks and embeddings...")
    chunker = DocumentChunker()
    chunks = chunker.create_chunks()
    chunker.create_embeddings(chunks)
    print("Embeddings created!")

def start_api_server():
    """Start the FastAPI backend server."""
    print("Starting API server...")
    subprocess.run(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", str(API_PORT), "--reload"])

def start_flask_frontend():
    """Start the Flask frontend."""
    print("Starting Flask frontend...")
    subprocess.run(["flask", "run", "--host=0.0.0.0", f"--port={FRONTEND_PORT}"], 
                  env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})

def wait_for_server(url, timeout=30, interval=1):
    """Waits for a server at the given URL to be reachable."""
    start_time = time.time()
    print(f"Waiting for server at {url} to be ready...")
    while time.time() - start_time < timeout:
        try:
            response = requests.get(url, timeout=interval)
            if response.status_code < 500:
                print(f"Server at {url} is ready.")
                return True
        except requests.exceptions.RequestException:
            pass
        time.sleep(interval)
    print(f"Timeout waiting for server at {url}.")
    return False

def main():
    parser = argparse.ArgumentParser(description="BuffaloRAG - AI Assistant for UB International Students")
    parser.add_argument("--scrape", action="store_true", help="Run web scraper")
    parser.add_argument("--build", action="store_true", help="Build embeddings")
    parser.add_argument("--api", action="store_true", help="Run API server")
    parser.add_argument("--frontend", action="store_true", help="Run Flask frontend")
    parser.add_argument("--all", action="store_true", help="Run the complete pipeline")
    parser.add_argument("--run", action="store_true", help="Run frontend & backend servers")
    
    args = parser.parse_args()
        
    if args.scrape or args.all:
        run_scraper()
    
    if args.build or args.all:
        build_embeddings()
    
    if args.api or args.all or args.run:
        if args.all or args.run:
            api_process = subprocess.Popen(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", str(API_PORT), "--reload"])
            api_ready = wait_for_server(f'http://localhost:{API_PORT}/', timeout=60)

            if api_ready:
                time.sleep(3)
                flask_process = subprocess.Popen(["flask", "run", "--host=0.0.0.0", f"--port={FRONTEND_PORT}"], 
                                          env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})
            
                import webbrowser
                webbrowser.open(f'http://localhost:{FRONTEND_PORT}')
                
                input("Press Enter to stop the server and exit...\n")
                if api_process.poll() is None:
                    api_process.terminate()
                if flask_process.poll() is None:
                    flask_process.terminate()

            if api_process.poll() is None:
                 api_process.terminate()
        else:
            start_api_server()
    
    if args.frontend:
        start_flask_frontend()

if __name__ == "__main__":
    main()