Patel aryan commited on
Commit
6505613
·
1 Parent(s): 92a5e4c

refactor: update import paths and enhance problem data structure with difficulty and topic tags

Browse files
backend/app/__pycache__/main.cpython-313.pyc CHANGED
Binary files a/backend/app/__pycache__/main.cpython-313.pyc and b/backend/app/__pycache__/main.cpython-313.pyc differ
 
backend/app/controllers/__pycache__/search_controller.cpython-313.pyc CHANGED
Binary files a/backend/app/controllers/__pycache__/search_controller.cpython-313.pyc and b/backend/app/controllers/__pycache__/search_controller.cpython-313.pyc differ
 
backend/app/controllers/search_controller.py CHANGED
@@ -1,5 +1,5 @@
1
- from backend.app.utils.get_embeddings import get_embedding
2
- from backend.app.database.find_k_nearest import get_questions_by_similarity_range
3
  from typing import List
4
 
5
 
 
1
+ from app.utils.get_embeddings import get_embedding
2
+ from app.database.find_k_nearest import get_questions_by_similarity_range
3
  from typing import List
4
 
5
 
backend/app/database/__pycache__/find_k_nearest.cpython-313.pyc CHANGED
Binary files a/backend/app/database/__pycache__/find_k_nearest.cpython-313.pyc and b/backend/app/database/__pycache__/find_k_nearest.cpython-313.pyc differ
 
backend/app/database/__pycache__/insert_data.cpython-313.pyc ADDED
Binary file (1.42 kB). View file
 
backend/app/database/__pycache__/last_fetched_data.cpython-313.pyc ADDED
Binary file (838 Bytes). View file
 
backend/app/database/__pycache__/supabase_client.cpython-313.pyc CHANGED
Binary files a/backend/app/database/__pycache__/supabase_client.cpython-313.pyc and b/backend/app/database/__pycache__/supabase_client.cpython-313.pyc differ
 
backend/app/database/find_k_nearest.py CHANGED
@@ -1,6 +1,6 @@
1
  from typing import List
2
  # Adjust the import path as needed
3
- from backend.app.database.supabase_client import supabase
4
 
5
 
6
  def get_questions_by_similarity_range(query_embedding: List[float], page: int
@@ -14,6 +14,8 @@ def get_questions_by_similarity_range(query_embedding: List[float], page: int
14
  content,
15
  original_content,
16
  paid_only,
 
 
17
  LEAST(
18
  GREATEST(
19
  ROUND(
 
1
  from typing import List
2
  # Adjust the import path as needed
3
+ from app.database.supabase_client import supabase
4
 
5
 
6
  def get_questions_by_similarity_range(query_embedding: List[float], page: int
 
14
  content,
15
  original_content,
16
  paid_only,
17
+ difficulty,
18
+ topictags,
19
  LEAST(
20
  GREATEST(
21
  ROUND(
backend/app/database/insert_data.py CHANGED
@@ -1,6 +1,6 @@
1
  # insert_problems.py
2
  import logging
3
- from backend.app.database.supabase_client import supabase
4
 
5
 
6
  logging.basicConfig(level=logging.INFO)
@@ -8,7 +8,7 @@ logging.basicConfig(level=logging.INFO)
8
 
9
  def insert_questions(problems):
10
  for prob in problems:
11
- # Insert into Supabase
12
  supabase.table("problems_bge").upsert({
13
  "id": prob["id"],
14
  "title": prob["title"],
@@ -17,6 +17,8 @@ def insert_questions(problems):
17
  "content": prob.get("content", ""),
18
  "original_content": prob.get("original_content", ""),
19
  "embedding": prob.get("embedding", []),
20
- "id_num": int(prob["id"])
 
 
21
  }, on_conflict=["id"]).execute()
22
  logging.info(f"Inserted {len(problems)} problems into Supabase.")
 
1
  # insert_problems.py
2
  import logging
3
+ from app.database.supabase_client import supabase
4
 
5
 
6
  logging.basicConfig(level=logging.INFO)
 
8
 
9
  def insert_questions(problems):
10
  for prob in problems:
11
+ prob['topicTags'] = '@'.join(prob.get('topicTags', []))
12
  supabase.table("problems_bge").upsert({
13
  "id": prob["id"],
14
  "title": prob["title"],
 
17
  "content": prob.get("content", ""),
18
  "original_content": prob.get("original_content", ""),
19
  "embedding": prob.get("embedding", []),
20
+ "id_num": int(prob["id"]),
21
+ "difficulty": prob["difficulty"],
22
+ "topictags": prob["topicTags"]
23
  }, on_conflict=["id"]).execute()
24
  logging.info(f"Inserted {len(problems)} problems into Supabase.")
backend/app/database/last_fetched_data.py CHANGED
@@ -1,4 +1,4 @@
1
- from backend.app.database.supabase_client import supabase
2
 
3
 
4
  def get_last_fetched_question(type):
 
1
+ from app.database.supabase_client import supabase
2
 
3
 
4
  def get_last_fetched_question(type):
backend/app/main.py CHANGED
@@ -1,10 +1,11 @@
1
  from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from backend.app.routes import search_route
4
  import logging
5
 
6
- # Optional: from backend.app.scripts.populate_db import populate_db
7
 
 
8
  app = FastAPI(title="LeetCode Vector Search API", version="1.0")
9
 
10
  # Set up logging
@@ -13,24 +14,24 @@ logging.basicConfig(level=logging.INFO)
13
  # Configure CORS
14
  app.add_middleware(
15
  CORSMiddleware,
16
- allow_origins=[
17
- "https://leet-search-sepia.vercel.app",
18
- "http://localhost:3000"
19
- ],
20
  allow_credentials=True,
21
  allow_methods=["*"],
22
  allow_headers=["*"],
23
  )
24
 
 
 
25
 
26
  @app.get("/")
27
  def root():
28
  return {"message": "LeetCode Vector Search API is running 🚀"}
29
 
30
 
 
31
  app.include_router(search_route.router, prefix="/api", tags=["Search"])
32
 
33
- if __name__ == "__main__":
34
- import uvicorn
35
- uvicorn.run("backend.app.main:app", host="0.0.0.0",
36
- port=7860, reload=False)
 
1
  from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from app.routes import search_route
4
  import logging
5
 
6
+ from app.scripts.populate_db import populate_db
7
 
8
+ # Initialize FastAPI app
9
  app = FastAPI(title="LeetCode Vector Search API", version="1.0")
10
 
11
  # Set up logging
 
14
  # Configure CORS
15
  app.add_middleware(
16
  CORSMiddleware,
17
+ # Replace with frontend URLs in production
18
+ allow_origins=["https://leet-search-sepia.vercel.app/search",
19
+ "https://leet-search-sepia.vercel.app", "http://localhost:3000"],
 
20
  allow_credentials=True,
21
  allow_methods=["*"],
22
  allow_headers=["*"],
23
  )
24
 
25
+ # Health check endpoint
26
+
27
 
28
  @app.get("/")
29
  def root():
30
  return {"message": "LeetCode Vector Search API is running 🚀"}
31
 
32
 
33
+ # Register routes
34
  app.include_router(search_route.router, prefix="/api", tags=["Search"])
35
 
36
+ # run initial population of the database only once then only run scraping
37
+ # populate_db()
 
 
backend/app/routes/__pycache__/search_route.cpython-313.pyc CHANGED
Binary files a/backend/app/routes/__pycache__/search_route.cpython-313.pyc and b/backend/app/routes/__pycache__/search_route.cpython-313.pyc differ
 
backend/app/routes/search_route.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import APIRouter, HTTPException, Query, Body
2
  from typing import Dict, Any
3
- from backend.app.controllers.search_controller import handle_search
4
 
5
  router = APIRouter()
6
 
 
1
  from fastapi import APIRouter, HTTPException, Query, Body
2
  from typing import Dict, Any
3
+ from app.controllers.search_controller import handle_search
4
 
5
  router = APIRouter()
6
 
backend/app/scripts/__pycache__/populate_db.cpython-313.pyc CHANGED
Binary files a/backend/app/scripts/__pycache__/populate_db.cpython-313.pyc and b/backend/app/scripts/__pycache__/populate_db.cpython-313.pyc differ
 
backend/app/scripts/populate_db.py CHANGED
@@ -1,5 +1,5 @@
1
- from backend.app.utils.get_paid_problems import get_paid_problems
2
- from backend.app.utils.get_embeddings import get_embedding
3
  import re
4
  import logging
5
  import os
@@ -61,7 +61,9 @@ def format_problem(problems=[], type=False):
61
  'paidOnly': type,
62
  'slug': problem['slug'],
63
  'content': clean_text,
64
- 'original_content': raw_html
 
 
65
  })
66
  return formatted_problems
67
 
@@ -75,13 +77,18 @@ def filter_problems(problems=[]):
75
  filtered_problems_paid.append({
76
  'id': problem['questionFrontendId'],
77
  'title': problem['title'],
78
- 'slug': problem['url'].rstrip('/').split('/')[-1]})
 
 
 
79
  else:
80
  filtered_problems_free.append({
81
  'id': problem['questionFrontendId'],
82
  'title': problem['title'],
83
  'slug': problem['url'].rstrip('/').split('/')[-1],
84
  'content': problem['content'],
 
 
85
  })
86
  return filtered_problems_free, filtered_problems_paid
87
 
@@ -93,7 +100,7 @@ def save_to_csv(data, filename='problems.csv'):
93
  csv_path = os.path.join(os.path.dirname(__file__), filename)
94
  with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
95
  fieldnames = ['id', 'id_num', 'url', 'title',
96
- 'paid_only', 'content', 'original_content', 'embedding']
97
  writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
98
  writer.writeheader()
99
  for row in data:
@@ -104,6 +111,7 @@ def save_to_csv(data, filename='problems.csv'):
104
  def order_data(data):
105
  csv_data = []
106
  for problem in data:
 
107
  csv_data.append({
108
  'id': problem['id'],
109
  'id_num': int(problem['id']),
@@ -112,7 +120,9 @@ def order_data(data):
112
  'paid_only': problem['paidOnly'],
113
  'content': problem.get('content', ''),
114
  'original_content': problem.get('original_content', ''),
115
- 'embedding': json.dumps(problem.get('embedding', []))
 
 
116
  })
117
  return csv_data
118
 
 
1
+ from app.utils.get_paid_problems import get_paid_problems
2
+ from app.utils.get_embeddings import get_embedding
3
  import re
4
  import logging
5
  import os
 
61
  'paidOnly': type,
62
  'slug': problem['slug'],
63
  'content': clean_text,
64
+ 'original_content': raw_html,
65
+ 'difficulty': problem['difficulty'],
66
+ 'topicTags': problem.get('topicTags', []),
67
  })
68
  return formatted_problems
69
 
 
77
  filtered_problems_paid.append({
78
  'id': problem['questionFrontendId'],
79
  'title': problem['title'],
80
+ 'difficulty': problem['difficulty'],
81
+ 'slug': problem['url'].rstrip('/').split('/')[-1],
82
+ 'topicTags': [tag['name'] for tag in problem['topicTags']],
83
+ })
84
  else:
85
  filtered_problems_free.append({
86
  'id': problem['questionFrontendId'],
87
  'title': problem['title'],
88
  'slug': problem['url'].rstrip('/').split('/')[-1],
89
  'content': problem['content'],
90
+ 'difficulty': problem['difficulty'],
91
+ 'topicTags': [tag['name'] for tag in problem['topicTags']],
92
  })
93
  return filtered_problems_free, filtered_problems_paid
94
 
 
100
  csv_path = os.path.join(os.path.dirname(__file__), filename)
101
  with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
102
  fieldnames = ['id', 'id_num', 'url', 'title',
103
+ 'paid_only', 'content', 'original_content', 'embedding', 'difficulty', 'topictags']
104
  writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
105
  writer.writeheader()
106
  for row in data:
 
111
  def order_data(data):
112
  csv_data = []
113
  for problem in data:
114
+ problem['topictags'] = '@'.join(problem.get('topictags', []))
115
  csv_data.append({
116
  'id': problem['id'],
117
  'id_num': int(problem['id']),
 
120
  'paid_only': problem['paidOnly'],
121
  'content': problem.get('content', ''),
122
  'original_content': problem.get('original_content', ''),
123
+ 'embedding': json.dumps(problem.get('embedding', [])),
124
+ 'difficulty': problem['difficulty'],
125
+ 'topicTags': problem['topicTags'],
126
  })
127
  return csv_data
128
 
backend/app/scripts/sql.txt CHANGED
@@ -13,7 +13,9 @@ CREATE TABLE problems_bge (
13
  paid_only BOOLEAN,
14
  content TEXT,
15
  original_content TEXT,
16
- embedding vector(768) -- Adjust dimension if needed
 
 
17
  );
18
 
19
  -- Drop pre-existing exec_sql RPC function (if exists)
 
13
  paid_only BOOLEAN,
14
  content TEXT,
15
  original_content TEXT,
16
+ embedding vector(768), -- Adjust dimension if needed
17
+ difficulty TEXT,
18
+ topictags TEXT
19
  );
20
 
21
  -- Drop pre-existing exec_sql RPC function (if exists)
backend/app/scripts/update_data.py CHANGED
@@ -1,5 +1,5 @@
1
  import logging
2
- from backend.app.services.scrape_problems import scrape_problems
3
 
4
 
5
  def main():
 
1
  import logging
2
+ from app.services.scrape_problems import scrape_problems
3
 
4
 
5
  def main():
backend/app/services/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (173 Bytes). View file
 
backend/app/services/__pycache__/genrate_embeddings.cpython-313.pyc ADDED
Binary file (630 Bytes). View file
 
backend/app/services/__pycache__/scrape_problems.cpython-313.pyc ADDED
Binary file (6.06 kB). View file
 
backend/app/services/genrate_embeddings.py CHANGED
@@ -1,5 +1,5 @@
1
- from backend.app.utils.get_embeddings import get_embedding
2
- from backend.app.database.insert_data import insert_questions
3
 
4
 
5
  def generate_embeddings(data):
 
1
+ from app.utils.get_embeddings import get_embedding
2
+ from app.database.insert_data import insert_questions
3
 
4
 
5
  def generate_embeddings(data):
backend/app/services/scrape_problems.py CHANGED
@@ -25,6 +25,10 @@ def get_all_problems(categorySlug="", skip=0, limit=10000, filters={}):
25
  paidOnly: isPaidOnly
26
  title
27
  titleSlug
 
 
 
 
28
  }
29
  }
30
  }
@@ -53,12 +57,16 @@ def filter_problems(problems=[]):
53
  'id': problem['frontendQuestionId'],
54
  'title': problem['title'],
55
  'slug': problem['titleSlug'],
 
 
56
  })
57
  else:
58
  filtered_problems_free.append({
59
  'id': problem['frontendQuestionId'],
60
  'title': problem['title'],
61
  'slug': problem['titleSlug'],
 
 
62
  })
63
  return filtered_problems_free, filtered_problems_paid
64
 
@@ -75,6 +83,8 @@ def get_json_problem(problems=[]):
75
  'title': problem['title'],
76
  'slug': problem['slug'],
77
  'content': response_data['content'],
 
 
78
  })
79
  else:
80
  logging.error(
@@ -100,7 +110,9 @@ def format_problem(problems=[], type=False):
100
  'paidOnly': type,
101
  'slug': problem['slug'],
102
  'content': clean_text,
103
- 'original_content': raw_html
 
 
104
  })
105
  return formatted_problems
106
 
 
25
  paidOnly: isPaidOnly
26
  title
27
  titleSlug
28
+ topicTags {
29
+ name
30
+ }
31
+ difficulty
32
  }
33
  }
34
  }
 
57
  'id': problem['frontendQuestionId'],
58
  'title': problem['title'],
59
  'slug': problem['titleSlug'],
60
+ 'difficulty': problem['difficulty'],
61
+ 'topicTags': [tag['name'] for tag in problem['topicTags']],
62
  })
63
  else:
64
  filtered_problems_free.append({
65
  'id': problem['frontendQuestionId'],
66
  'title': problem['title'],
67
  'slug': problem['titleSlug'],
68
+ 'difficulty': problem['difficulty'],
69
+ 'topicTags': [tag['name'] for tag in problem['topicTags']],
70
  })
71
  return filtered_problems_free, filtered_problems_paid
72
 
 
83
  'title': problem['title'],
84
  'slug': problem['slug'],
85
  'content': response_data['content'],
86
+ 'difficulty': problem['difficulty'],
87
+ 'topicTags': problem.get('topicTags', []),
88
  })
89
  else:
90
  logging.error(
 
110
  'paidOnly': type,
111
  'slug': problem['slug'],
112
  'content': clean_text,
113
+ 'original_content': raw_html,
114
+ 'difficulty': problem['difficulty'],
115
+ 'topicTags': problem.get('topicTags', []),
116
  })
117
  return formatted_problems
118
 
backend/app/utils/__pycache__/get_embeddings.cpython-313.pyc CHANGED
Binary files a/backend/app/utils/__pycache__/get_embeddings.cpython-313.pyc and b/backend/app/utils/__pycache__/get_embeddings.cpython-313.pyc differ
 
backend/app/utils/__pycache__/get_paid_problems.cpython-313.pyc CHANGED
Binary files a/backend/app/utils/__pycache__/get_paid_problems.cpython-313.pyc and b/backend/app/utils/__pycache__/get_paid_problems.cpython-313.pyc differ
 
backend/app/utils/get_paid_problems.py CHANGED
@@ -53,6 +53,8 @@ def get_paid_problems(problems=[]):
53
  'id': problem['id'],
54
  'title': problem['title'],
55
  'slug': problem['slug'],
56
- 'content': clean_html
 
 
57
  })
58
  return paid_problems
 
53
  'id': problem['id'],
54
  'title': problem['title'],
55
  'slug': problem['slug'],
56
+ 'content': clean_html,
57
+ 'difficulty': problem['difficulty'],
58
+ 'topicTags': problem.get('topicTags', []),
59
  })
60
  return paid_problems