Spaces:

Aryan2704
/

Leet-Search

Runtime error

App Files Files Community

Patel aryan commited on Jul 21

Commit

6505613

1 Parent(s): 92a5e4c

refactor: update import paths and enhance problem data structure with difficulty and topic tags

Browse files

Files changed (25) hide show

backend/app/__pycache__/main.cpython-313.pyc +0 -0
backend/app/controllers/__pycache__/search_controller.cpython-313.pyc +0 -0
backend/app/controllers/search_controller.py +2 -2
backend/app/database/__pycache__/find_k_nearest.cpython-313.pyc +0 -0
backend/app/database/__pycache__/insert_data.cpython-313.pyc +0 -0
backend/app/database/__pycache__/last_fetched_data.cpython-313.pyc +0 -0
backend/app/database/__pycache__/supabase_client.cpython-313.pyc +0 -0
backend/app/database/find_k_nearest.py +3 -1
backend/app/database/insert_data.py +5 -3
backend/app/database/last_fetched_data.py +1 -1
backend/app/main.py +11 -10
backend/app/routes/__pycache__/search_route.cpython-313.pyc +0 -0
backend/app/routes/search_route.py +1 -1
backend/app/scripts/__pycache__/populate_db.cpython-313.pyc +0 -0
backend/app/scripts/populate_db.py +16 -6
backend/app/scripts/sql.txt +3 -1
backend/app/scripts/update_data.py +1 -1
backend/app/services/__pycache__/__init__.cpython-313.pyc +0 -0
backend/app/services/__pycache__/genrate_embeddings.cpython-313.pyc +0 -0
backend/app/services/__pycache__/scrape_problems.cpython-313.pyc +0 -0
backend/app/services/genrate_embeddings.py +2 -2
backend/app/services/scrape_problems.py +13 -1
backend/app/utils/__pycache__/get_embeddings.cpython-313.pyc +0 -0
backend/app/utils/__pycache__/get_paid_problems.cpython-313.pyc +0 -0
backend/app/utils/get_paid_problems.py +3 -1

backend/app/__pycache__/main.cpython-313.pyc CHANGED Viewed

Binary files a/backend/app/__pycache__/main.cpython-313.pyc and b/backend/app/__pycache__/main.cpython-313.pyc differ

backend/app/controllers/__pycache__/search_controller.cpython-313.pyc CHANGED Viewed

Binary files a/backend/app/controllers/__pycache__/search_controller.cpython-313.pyc and b/backend/app/controllers/__pycache__/search_controller.cpython-313.pyc differ

backend/app/controllers/search_controller.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from backend.app.utils.get_embeddings import get_embedding
-from backend.app.database.find_k_nearest import get_questions_by_similarity_range
 from typing import List

+from app.utils.get_embeddings import get_embedding
+from app.database.find_k_nearest import get_questions_by_similarity_range
 from typing import List

backend/app/database/__pycache__/find_k_nearest.cpython-313.pyc CHANGED Viewed

Binary files a/backend/app/database/__pycache__/find_k_nearest.cpython-313.pyc and b/backend/app/database/__pycache__/find_k_nearest.cpython-313.pyc differ

backend/app/database/__pycache__/insert_data.cpython-313.pyc ADDED Viewed

Binary file (1.42 kB). View file

backend/app/database/__pycache__/last_fetched_data.cpython-313.pyc ADDED Viewed

Binary file (838 Bytes). View file

backend/app/database/__pycache__/supabase_client.cpython-313.pyc CHANGED Viewed

Binary files a/backend/app/database/__pycache__/supabase_client.cpython-313.pyc and b/backend/app/database/__pycache__/supabase_client.cpython-313.pyc differ

backend/app/database/find_k_nearest.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import List
 # Adjust the import path as needed
-from backend.app.database.supabase_client import supabase
 def get_questions_by_similarity_range(query_embedding: List[float], page: int
@@ -14,6 +14,8 @@ def get_questions_by_similarity_range(query_embedding: List[float], page: int
             content,
             original_content,
             paid_only,
             LEAST(
                 GREATEST(
                     ROUND(

 from typing import List
 # Adjust the import path as needed
+from app.database.supabase_client import supabase
 def get_questions_by_similarity_range(query_embedding: List[float], page: int
             content,
             original_content,
             paid_only,
+            difficulty,
+            topictags,
             LEAST(
                 GREATEST(
                     ROUND(

backend/app/database/insert_data.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # insert_problems.py
 import logging
-from backend.app.database.supabase_client import supabase
 logging.basicConfig(level=logging.INFO)
@@ -8,7 +8,7 @@ logging.basicConfig(level=logging.INFO)
 def insert_questions(problems):
     for prob in problems:
-        # Insert into Supabase
         supabase.table("problems_bge").upsert({
             "id": prob["id"],
             "title": prob["title"],
@@ -17,6 +17,8 @@ def insert_questions(problems):
             "content": prob.get("content", ""),
             "original_content": prob.get("original_content", ""),
             "embedding": prob.get("embedding", []),
-            "id_num": int(prob["id"])
         }, on_conflict=["id"]).execute()
     logging.info(f"Inserted {len(problems)} problems into Supabase.")

 # insert_problems.py
 import logging
+from app.database.supabase_client import supabase
 logging.basicConfig(level=logging.INFO)
 def insert_questions(problems):
     for prob in problems:
+        prob['topicTags'] = '@'.join(prob.get('topicTags', []))
         supabase.table("problems_bge").upsert({
             "id": prob["id"],
             "title": prob["title"],
             "content": prob.get("content", ""),
             "original_content": prob.get("original_content", ""),
             "embedding": prob.get("embedding", []),
+            "id_num": int(prob["id"]),
+            "difficulty": prob["difficulty"],
+            "topictags": prob["topicTags"]
         }, on_conflict=["id"]).execute()
     logging.info(f"Inserted {len(problems)} problems into Supabase.")

backend/app/database/last_fetched_data.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from backend.app.database.supabase_client import supabase
 def get_last_fetched_question(type):


1	+ from app.database.supabase_client import supabase
2
3
4	def get_last_fetched_question(type):

backend/app/main.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from backend.app.routes import search_route
 import logging
-# Optional: from backend.app.scripts.populate_db import populate_db
 app = FastAPI(title="LeetCode Vector Search API", version="1.0")
 # Set up logging
@@ -13,24 +14,24 @@ logging.basicConfig(level=logging.INFO)
 # Configure CORS
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=[
-        "https://leet-search-sepia.vercel.app",
-        "http://localhost:3000"
-    ],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 @app.get("/")
 def root():
     return {"message": "LeetCode Vector Search API is running 🚀"}
 app.include_router(search_route.router, prefix="/api", tags=["Search"])
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run("backend.app.main:app", host="0.0.0.0",
-                port=7860, reload=False)

 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from app.routes import search_route
 import logging
+from app.scripts.populate_db import populate_db
+# Initialize FastAPI app
 app = FastAPI(title="LeetCode Vector Search API", version="1.0")
 # Set up logging
 # Configure CORS
 app.add_middleware(
     CORSMiddleware,
+    # Replace with frontend URLs in production
+    allow_origins=["https://leet-search-sepia.vercel.app/search",
+                   "https://leet-search-sepia.vercel.app", "http://localhost:3000"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
+# Health check endpoint
 @app.get("/")
 def root():
     return {"message": "LeetCode Vector Search API is running 🚀"}
+# Register routes
 app.include_router(search_route.router, prefix="/api", tags=["Search"])
+# run initial population of the database only once then only run scraping
+# populate_db()

backend/app/routes/__pycache__/search_route.cpython-313.pyc CHANGED Viewed

Binary files a/backend/app/routes/__pycache__/search_route.cpython-313.pyc and b/backend/app/routes/__pycache__/search_route.cpython-313.pyc differ

backend/app/routes/search_route.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from fastapi import APIRouter, HTTPException, Query, Body
 from typing import Dict, Any
-from backend.app.controllers.search_controller import handle_search
 router = APIRouter()

 from fastapi import APIRouter, HTTPException, Query, Body
 from typing import Dict, Any
+from app.controllers.search_controller import handle_search
 router = APIRouter()

backend/app/scripts/__pycache__/populate_db.cpython-313.pyc CHANGED Viewed

Binary files a/backend/app/scripts/__pycache__/populate_db.cpython-313.pyc and b/backend/app/scripts/__pycache__/populate_db.cpython-313.pyc differ

backend/app/scripts/populate_db.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from backend.app.utils.get_paid_problems import get_paid_problems
-from backend.app.utils.get_embeddings import get_embedding
 import re
 import logging
 import os
@@ -61,7 +61,9 @@ def format_problem(problems=[], type=False):
             'paidOnly': type,
             'slug': problem['slug'],
             'content': clean_text,
-            'original_content': raw_html
         })
     return formatted_problems
@@ -75,13 +77,18 @@ def filter_problems(problems=[]):
             filtered_problems_paid.append({
                 'id': problem['questionFrontendId'],
                 'title': problem['title'],
-                'slug': problem['url'].rstrip('/').split('/')[-1]})
         else:
             filtered_problems_free.append({
                 'id': problem['questionFrontendId'],
                 'title': problem['title'],
                 'slug': problem['url'].rstrip('/').split('/')[-1],
                 'content': problem['content'],
             })
     return filtered_problems_free, filtered_problems_paid
@@ -93,7 +100,7 @@ def save_to_csv(data, filename='problems.csv'):
     csv_path = os.path.join(os.path.dirname(__file__), filename)
     with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
         fieldnames = ['id', 'id_num', 'url', 'title',
-                      'paid_only', 'content', 'original_content', 'embedding']
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
         writer.writeheader()
         for row in data:
@@ -104,6 +111,7 @@ def save_to_csv(data, filename='problems.csv'):
 def order_data(data):
     csv_data = []
     for problem in data:
         csv_data.append({
             'id': problem['id'],
             'id_num': int(problem['id']),
@@ -112,7 +120,9 @@ def order_data(data):
             'paid_only': problem['paidOnly'],
             'content': problem.get('content', ''),
             'original_content': problem.get('original_content', ''),
-            'embedding': json.dumps(problem.get('embedding', []))
         })
     return csv_data

+from app.utils.get_paid_problems import get_paid_problems
+from app.utils.get_embeddings import get_embedding
 import re
 import logging
 import os
             'paidOnly': type,
             'slug': problem['slug'],
             'content': clean_text,
+            'original_content': raw_html,
+            'difficulty': problem['difficulty'],
+            'topicTags': problem.get('topicTags', []),
         })
     return formatted_problems
             filtered_problems_paid.append({
                 'id': problem['questionFrontendId'],
                 'title': problem['title'],
+                'difficulty': problem['difficulty'],
+                'slug': problem['url'].rstrip('/').split('/')[-1],
+                'topicTags': [tag['name'] for tag in problem['topicTags']],
+            })
         else:
             filtered_problems_free.append({
                 'id': problem['questionFrontendId'],
                 'title': problem['title'],
                 'slug': problem['url'].rstrip('/').split('/')[-1],
                 'content': problem['content'],
+                'difficulty': problem['difficulty'],
+                'topicTags': [tag['name'] for tag in problem['topicTags']],
             })
     return filtered_problems_free, filtered_problems_paid
     csv_path = os.path.join(os.path.dirname(__file__), filename)
     with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
         fieldnames = ['id', 'id_num', 'url', 'title',
+                      'paid_only', 'content', 'original_content', 'embedding', 'difficulty', 'topictags']
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
         writer.writeheader()
         for row in data:
 def order_data(data):
     csv_data = []
     for problem in data:
+        problem['topictags'] = '@'.join(problem.get('topictags', []))
         csv_data.append({
             'id': problem['id'],
             'id_num': int(problem['id']),
             'paid_only': problem['paidOnly'],
             'content': problem.get('content', ''),
             'original_content': problem.get('original_content', ''),
+            'embedding': json.dumps(problem.get('embedding', [])),
+            'difficulty': problem['difficulty'],
+            'topicTags': problem['topicTags'],
         })
     return csv_data

backend/app/scripts/sql.txt CHANGED Viewed

@@ -13,7 +13,9 @@ CREATE TABLE problems_bge (
   paid_only BOOLEAN,
   content TEXT,
   original_content TEXT,
-  embedding vector(768)              -- Adjust dimension if needed
 );
 -- Drop pre-existing exec_sql RPC function (if exists)

   paid_only BOOLEAN,
   content TEXT,
   original_content TEXT,
+  embedding vector(768),              -- Adjust dimension if needed
+  difficulty TEXT,
+  topictags TEXT
 );
 -- Drop pre-existing exec_sql RPC function (if exists)

backend/app/scripts/update_data.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import logging
-from backend.app.services.scrape_problems import scrape_problems
 def main():

 import logging
+from app.services.scrape_problems import scrape_problems
 def main():

backend/app/services/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (173 Bytes). View file

backend/app/services/__pycache__/genrate_embeddings.cpython-313.pyc ADDED Viewed

Binary file (630 Bytes). View file

backend/app/services/__pycache__/scrape_problems.cpython-313.pyc ADDED Viewed

Binary file (6.06 kB). View file

backend/app/services/genrate_embeddings.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from backend.app.utils.get_embeddings import get_embedding
-from backend.app.database.insert_data import insert_questions
 def generate_embeddings(data):

+from app.utils.get_embeddings import get_embedding
+from app.database.insert_data import insert_questions
 def generate_embeddings(data):

backend/app/services/scrape_problems.py CHANGED Viewed

@@ -25,6 +25,10 @@ def get_all_problems(categorySlug="", skip=0, limit=10000, filters={}):
       paidOnly: isPaidOnly
       title
       titleSlug
     }
   }
 }
@@ -53,12 +57,16 @@ def filter_problems(problems=[]):
                 'id': problem['frontendQuestionId'],
                 'title': problem['title'],
                 'slug': problem['titleSlug'],
             })
         else:
             filtered_problems_free.append({
                 'id': problem['frontendQuestionId'],
                 'title': problem['title'],
                 'slug': problem['titleSlug'],
             })
     return filtered_problems_free, filtered_problems_paid
@@ -75,6 +83,8 @@ def get_json_problem(problems=[]):
                 'title': problem['title'],
                 'slug': problem['slug'],
                 'content': response_data['content'],
             })
         else:
             logging.error(
@@ -100,7 +110,9 @@ def format_problem(problems=[], type=False):
             'paidOnly': type,
             'slug': problem['slug'],
             'content': clean_text,
-            'original_content': raw_html
         })
     return formatted_problems

       paidOnly: isPaidOnly
       title
       titleSlug
+         topicTags {
+        name
+      }
+      difficulty
     }
   }
 }
                 'id': problem['frontendQuestionId'],
                 'title': problem['title'],
                 'slug': problem['titleSlug'],
+                'difficulty': problem['difficulty'],
+                'topicTags': [tag['name'] for tag in problem['topicTags']],
             })
         else:
             filtered_problems_free.append({
                 'id': problem['frontendQuestionId'],
                 'title': problem['title'],
                 'slug': problem['titleSlug'],
+                'difficulty': problem['difficulty'],
+                'topicTags': [tag['name'] for tag in problem['topicTags']],
             })
     return filtered_problems_free, filtered_problems_paid
                 'title': problem['title'],
                 'slug': problem['slug'],
                 'content': response_data['content'],
+                'difficulty': problem['difficulty'],
+                'topicTags': problem.get('topicTags', []),
             })
         else:
             logging.error(
             'paidOnly': type,
             'slug': problem['slug'],
             'content': clean_text,
+            'original_content': raw_html,
+            'difficulty': problem['difficulty'],
+            'topicTags': problem.get('topicTags', []),
         })
     return formatted_problems

backend/app/utils/__pycache__/get_embeddings.cpython-313.pyc CHANGED Viewed

Binary files a/backend/app/utils/__pycache__/get_embeddings.cpython-313.pyc and b/backend/app/utils/__pycache__/get_embeddings.cpython-313.pyc differ

backend/app/utils/__pycache__/get_paid_problems.cpython-313.pyc CHANGED Viewed

Binary files a/backend/app/utils/__pycache__/get_paid_problems.cpython-313.pyc and b/backend/app/utils/__pycache__/get_paid_problems.cpython-313.pyc differ

backend/app/utils/get_paid_problems.py CHANGED Viewed

@@ -53,6 +53,8 @@ def get_paid_problems(problems=[]):
             'id': problem['id'],
             'title': problem['title'],
             'slug': problem['slug'],
-            'content': clean_html
         })
     return paid_problems

             'id': problem['id'],
             'title': problem['title'],
             'slug': problem['slug'],
+            'content': clean_html,
+            'difficulty': problem['difficulty'],
+            'topicTags': problem.get('topicTags', []),
         })
     return paid_problems