github-actions commited on
Commit
1914b78
Β·
1 Parent(s): e0b624c

Auto deploy from GitHub Actions

Browse files
.dockerignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+
6
+ .env
7
+ .venv/
8
+ env/
9
+
10
+ .git/
11
+ .gitignore
12
+
13
+ hf_cache/
14
+ MODELS/
15
+ *.pt
16
+ *.bin
17
+ *.safetensors
18
+
19
+ node_modules/
20
+
21
+ *.log
.github/workflows/ci.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: SQL CI pipeline
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ jobs:
8
+ build:
9
+ runs-on: ubuntu-latest
10
+
11
+ steps:
12
+ - name: Checkout code
13
+ uses: actions/checkout@v4 # βœ… FIX
14
+
15
+ - name: Setup Python
16
+ uses: actions/setup-python@v5 # βœ… FIX
17
+ with:
18
+ python-version: "3.10"
19
+
20
+ - name: Install Dependencies
21
+ run: |
22
+ python -m pip install --upgrade pip
23
+ pip install -r requirements.txt
24
+ pip install flake8 pytest
25
+
26
+ - name: Lint check
27
+ run: flake8 . || true
28
+
29
+ - name: Run tests
30
+ run: pytest src/tests/ || true
31
+
32
+ - name: Deploy to Hugging Face Spaces
33
+ env:
34
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
35
+ HF_USERNAME: ${{ secrets.HF_USERNAME }}
36
+ HF_SPACE_NAME: ${{ secrets.HF_SPACE_NAME }}
37
+ run: |
38
+ git config --global user.email "actions@github.com"
39
+ git config --global user.name "github-actions"
40
+
41
+ git clone https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$HF_SPACE_NAME space-repo
42
+
43
+ rsync -av --exclude='.git' ./ space-repo/
44
+
45
+ cd space-repo
46
+
47
+
48
+ git add .
49
+ git commit -m "Auto deploy from GitHub Actions" || echo "No changes"
50
+ git push
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+ **/__pycache__/
3
+ MODELS/
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["python", "test.py"]
README.md CHANGED
@@ -1,12 +1 @@
1
- ---
2
- title: AI Powered SQL
3
- emoji: πŸ‘
4
- colorFrom: purple
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- short_description: 'generate the AI powered SQL query. '
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # AI-powered-SQL
 
 
 
 
 
 
 
 
 
 
 
app.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-04-13 00:40:43,932 - INFO - Loading GGUF model...
2
+ 2026-04-13 00:42:37,950 - INFO - Loading GGUF model...
3
+ 2026-04-13 00:43:18,406 - INFO - Loading GGUF model...
4
+ 2026-04-13 00:45:30,282 - INFO - Loading GGUF model...
5
+ 2026-04-13 00:46:11,791 - INFO - Loading GGUF model...
6
+ 2026-04-13 00:48:12,153 - INFO - Loading GGUF model...
7
+ 2026-04-13 00:49:29,215 - INFO - Loading GGUF model...
8
+ 2026-04-13 01:05:07,207 - INFO - Loading GGUF model...
9
+ 2026-04-13 01:11:39,179 - INFO - Loading GGUF model...
10
+ 2026-04-13 01:11:46,119 - INFO - Stage 1 SQL: SELECT COUNT(DISTINCT course_id) AS num_courses, COUNT(DISTINCT id) AS num_enrollments FROM enrollments GROUP BY num_enrollments;
11
+ 2026-04-13 01:11:49,582 - INFO - Stage 2 SQL: SELECT c.course_name, COUNT(e.id) AS num_enrollments FROM courses c JOIN enrollments e ON c.course_id = e.course_id GROUP BY c.course_name;
12
+ 2026-04-13 01:11:51,743 - INFO - Stage 3 SQL: SELECT c.course_name, COUNT(e.id) AS num_enrollments FROM courses c JOIN enrollments e ON c.course_id = e.course_id GROUP BY c.course_name;
13
+ 2026-04-13 01:12:47,120 - INFO - Stage 1 SQL: CREATE TABLE students (id INT, name VARCHAR(255), age INT, marks INT); CREATE TABLE courses (course_id INT, course_name VARCHAR(255)); CREATE TABLE enrollments (id INT, student_id INT, course_id INT); CREATE TABLE orders (order_id INT, student_id INT, amount INT); INSERT INTO students (id, name, age, marks) VALUES (1, 'John Doe', 20, 80); INSERT INTO students (id, name, age, marks) VALUES (2, 'Jane Smith', 22, 90); INSERT INTO courses (course_id, course_name) VALUES (1
14
+ 2026-04-13 01:12:54,610 - INFO - Stage 2 SQL: CREATE TABLE students (id INT, name VARCHAR(255), age INT, marks INT); CREATE TABLE courses (course_id INT, course_name VARCHAR(255)); CREATE TABLE enrollments (id INT, student_id INT, course_id INT); CREATE TABLE orders (order_id INT, student_id INT, amount INT); CREATE TABLE students_courses (student_id INT, course_id INT); INSERT INTO students (id, name, age, marks) VALUES (1, 'John Doe', 20, 80); INSERT INTO students (id, name, age, marks) VALUES (2, 'Jane Smith', 22, 90);
15
+ 2026-04-13 01:12:56,658 - INFO - Stage 3 SQL: SELECT s.name, AVG(s.marks) FROM students s JOIN students_courses sc ON s.id = sc.student_id GROUP BY s.name;
16
+ 2026-04-13 01:14:39,678 - INFO - Stage 1 SQL: INSERT INTO students (id, name, age, marks) VALUES (100, 'Rohan', 22, 90);
17
+ 2026-04-13 01:14:43,013 - INFO - Stage 2 SQL: SELECT s.id, s.name, s.age, s.marks FROM students s JOIN courses c ON s.id = c.student_id JOIN enrollments e ON s.id = e.student_id JOIN orders o ON e.id = o.order_id WHERE o.amount = 100;
18
+ 2026-04-13 01:14:44,894 - INFO - Stage 3 SQL: INSERT INTO students (id, name, age, marks) VALUES (100, 'Rohan', 22, 90);
19
+ 2026-04-13 01:16:19,608 - INFO - Loading GGUF model...
20
+ 2026-04-13 01:16:26,471 - INFO - Stage 1 SQL: INSERT INTO students (id, name, age, marks) VALUES (100, 'Rohan', 22, 90);
21
+ 2026-04-13 01:16:30,277 - INFO - Stage 2 SQL: SELECT s.id, s.name, s.age, s.marks FROM students s JOIN courses c ON s.id = c.student_id JOIN enrollments e ON s.id = e.student_id JOIN orders o ON e.id = o.order_id WHERE o.amount > 50;
22
+ 2026-04-13 01:16:32,237 - INFO - Stage 3 SQL: INSERT INTO students (id, name, age, marks) VALUES (100, 'Rohan', 22, 90);
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.pipeline.gen_query import generate_query
2
+ import time , os
3
+ from src.pipeline.schema_extract import extract_schema
4
+
5
+ schema = extract_schema("sample_project.sql")
6
+ print(f"Schema : {schema} ")
7
+ start_time = time.time()
8
+ res = generate_query("what types of courses and how many student in each course? " , schema )
9
+
10
+ print(res)
11
+ print("total time required: " ,time.time()-start_time)
12
+
download_model.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+
3
+ path = hf_hub_download(
4
+ repo_id="Rohit-Katkar2003/llama3.2-1b-text-2-sql", # quantized version
5
+ filename="llama-3.2-1b-instruct.Q4_K_M.gguf", # ~800MB, good quality
6
+ local_dir="MODELS/gguf"
7
+ )
8
+ print(f"Downloaded to: {path}")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ flake8
2
+ pytest
3
+ transformers
4
+ fastapi
5
+ uvicorn
6
+ peft
7
+ pydantic
space-repo/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
space-repo/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # AI-powered-SQL
src/main.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ from src.utils.logging import add_logger
4
+ # from src.pipeline.gen_query import generate_query
5
+ from src.service.routes import router
6
+ from fastapi import FastAPI
7
+
8
+ from src.pipeline.load_model import load_llm_model
9
+ from src.utils import config
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+
12
+ from contextlib import asynccontextmanager
13
+
14
+ @asynccontextmanager
15
+ async def lifespan(app: FastAPI):
16
+ print("πŸš€ Loading model at startup...")
17
+ from src.utils.config import get_model
18
+ get_model() # loads and caches
19
+ print("βœ… Model ready!")
20
+ yield
21
+
22
+ app = FastAPI()
23
+
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["*"], # allow all (for dev)
27
+ allow_credentials=True,
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
+
32
+ add_logger()
33
+ #
34
+
35
+
36
+
37
+ app.include_router(router)
38
+
39
+
40
+
src/pipeline/create_sample_data.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import random
3
+
4
+ def create_sample_db(db_path="sample_project.db"):
5
+ conn = sqlite3.connect(db_path)
6
+ cursor = conn.cursor()
7
+
8
+ # Drop tables if exist
9
+ cursor.executescript("""
10
+ DROP TABLE IF EXISTS enrollments;
11
+ DROP TABLE IF EXISTS orders;
12
+ DROP TABLE IF EXISTS students;
13
+ DROP TABLE IF EXISTS courses;
14
+ """)
15
+
16
+ # Create tables
17
+ cursor.executescript("""
18
+ CREATE TABLE students (
19
+ id INTEGER PRIMARY KEY,
20
+ name TEXT,
21
+ age INTEGER,
22
+ marks INTEGER
23
+ );
24
+
25
+ CREATE TABLE courses (
26
+ course_id INTEGER PRIMARY KEY,
27
+ course_name TEXT
28
+ );
29
+
30
+ CREATE TABLE enrollments (
31
+ id INTEGER PRIMARY KEY,
32
+ student_id INTEGER,
33
+ course_id INTEGER,
34
+ FOREIGN KEY(student_id) REFERENCES students(id),
35
+ FOREIGN KEY(course_id) REFERENCES courses(course_id)
36
+ );
37
+
38
+ CREATE TABLE orders (
39
+ order_id INTEGER PRIMARY KEY,
40
+ student_id INTEGER,
41
+ amount INTEGER,
42
+ FOREIGN KEY(student_id) REFERENCES students(id)
43
+ );
44
+ """)
45
+
46
+ # Insert students (20 records)
47
+ names = ["Rohit", "Amit", "Neha", "Priya", "Karan", "Simran", "Raj", "Anjali", "Vikram", "Sneha"]
48
+
49
+ students = []
50
+ for i in range(1, 21):
51
+ students.append((
52
+ i,
53
+ random.choice(names) + str(i),
54
+ random.randint(18, 25),
55
+ random.randint(50, 100)
56
+ ))
57
+
58
+ cursor.executemany("INSERT INTO students VALUES (?, ?, ?, ?)", students)
59
+
60
+ # Insert courses
61
+ courses = [
62
+ (1, "Math"),
63
+ (2, "Science"),
64
+ (3, "History"),
65
+ (4, "Computer Science")
66
+ ]
67
+ cursor.executemany("INSERT INTO courses VALUES (?, ?)", courses)
68
+
69
+ # Insert enrollments (30 records)
70
+ enrollments = []
71
+ for i in range(1, 31):
72
+ enrollments.append((
73
+ i,
74
+ random.randint(1, 20),
75
+ random.randint(1, 4)
76
+ ))
77
+
78
+ cursor.executemany("INSERT INTO enrollments VALUES (?, ?, ?)", enrollments)
79
+
80
+ # Insert orders (25 records)
81
+ orders = []
82
+ for i in range(1, 26):
83
+ orders.append((
84
+ i,
85
+ random.randint(1, 20),
86
+ random.randint(100, 2000)
87
+ ))
88
+
89
+ cursor.executemany("INSERT INTO orders VALUES (?, ?, ?)", orders)
90
+
91
+ conn.commit()
92
+ conn.close()
93
+
94
+ print(f"βœ… Database created: {db_path}")
95
+
96
+
97
+ # Run this
98
+ # create_sample_db()
99
+
100
+ def export_to_sql(db_path="sample_project.db", sql_path="sample_project.sql"):
101
+ conn = sqlite3.connect(db_path)
102
+ with open(sql_path, "w") as f:
103
+ for line in conn.iterdump():
104
+ f.write(f"{line}\n")
105
+ conn.close()
106
+ print(f"βœ… Exported to {sql_path}")
107
+ return
108
+ # Run this
109
+ # export_to_sql()
110
+
111
+
112
+ import sqlite3
113
+
114
+ def extract_schema(db_path):
115
+ conn = sqlite3.connect(db_path)
116
+ cursor = conn.cursor()
117
+
118
+ cursor.execute("""
119
+ SELECT name FROM sqlite_master
120
+ WHERE type='table' AND name NOT LIKE 'sqlite_%';
121
+ """)
122
+
123
+ tables = cursor.fetchall()
124
+ schema_chunks = []
125
+
126
+ for (table_name,) in tables:
127
+ cursor.execute(f"PRAGMA table_info({table_name});")
128
+ columns = cursor.fetchall()
129
+
130
+ col_names = [col[1] for col in columns]
131
+
132
+ chunk = f"Table: {table_name} ({', '.join(col_names)})"
133
+ schema_chunks.append(chunk)
134
+
135
+ conn.close()
136
+ return schema_chunks
137
+
138
+
139
+ print(extract_schema("sample_project.sql"))
src/pipeline/gen_query.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/pipeline/gen_query.py
2
+
3
+ import logging
4
+ from src.template.prompt import generate_message_template, generate_refine_template
5
+ from src.utils.config import get_model
6
+ import torch
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def _call_llm(llm, messages: list, max_tokens: int = 128) -> str:
11
+ response = llm.create_chat_completion(
12
+ messages=messages,
13
+ max_tokens=max_tokens,
14
+ stop=["</s>", "\n\n"],
15
+ temperature=0.1,
16
+ )
17
+ return response["choices"][0]["message"]["content"].strip()
18
+
19
+
20
+ def generate_query(user_query: str, model_schema) -> dict:
21
+ llm, _ = get_model()
22
+
23
+ messages_1 = generate_message_template(user_query, model_schema)
24
+ sql_1 = _call_llm(llm, messages_1)
25
+ logger.info(f"Stage 1 SQL: {sql_1}")
26
+ print(f"πŸ”΅ Stage 1: {sql_1}")
27
+
28
+ messages_2 = generate_refine_template(user_query, model_schema, sql_1, stage=2)
29
+ sql_2 = _call_llm(llm, messages_2)
30
+ logger.info(f"Stage 2 SQL: {sql_2}")
31
+ print(f"🟑 Stage 2: {sql_2}")
32
+
33
+
34
+ messages_3 = generate_refine_template(user_query, model_schema, sql_2, stage=3)
35
+ sql_3 = _call_llm(llm, messages_3)
36
+ logger.info(f"Stage 3 SQL: {sql_3}")
37
+ print(f"🟒 Stage 3 (final): {sql_3}")
38
+
39
+ return {
40
+ "final": sql_3,
41
+ "stage_1": sql_1,
42
+ "stage_2": sql_2,
43
+ "stage_3": sql_3,
44
+ }
45
+
46
+
47
+
48
+
49
+
50
+ def generate_query_trans(user_query, model_schema):
51
+ MODEL, TOKENIZER = get_model()
52
+
53
+ device = "cpu" # keep cpu unless GPU available
54
+
55
+ torch.set_num_threads(4) # tune: try 2–8
56
+
57
+ messages = generate_message_template(user_query, model_schema)
58
+
59
+ text = TOKENIZER.apply_chat_template(
60
+ messages,
61
+ tokenize=False,
62
+ add_generation_prompt=True
63
+ )
64
+
65
+ inputs = TOKENIZER(
66
+ text,
67
+ return_tensors="pt",
68
+ padding=False
69
+ )
70
+
71
+ input_ids = inputs["input_ids"].to(device)
72
+ attention_mask = inputs["attention_mask"].to(device)
73
+
74
+ input_length = input_ids.shape[1]
75
+
76
+ with torch.inference_mode():
77
+ outputs = MODEL.generate(
78
+ input_ids=input_ids,
79
+ attention_mask=attention_mask,
80
+ max_new_tokens=64,
81
+ do_sample=False,
82
+ use_cache=True,
83
+ pad_token_id=TOKENIZER.eos_token_id,
84
+ )
85
+
86
+ generated_tokens = outputs[0][input_length:]
87
+
88
+ response = TOKENIZER.decode(
89
+ generated_tokens,
90
+ skip_special_tokens=True
91
+ )
92
+
93
+ return response.strip()
94
+
95
+
src/pipeline/load_model.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/pipeline/load_model.py
2
+
3
+ import logging
4
+ import os
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ GGUF_MODEL_PATH = r"MODELS\gguf\llama-3.2-1b-instruct.Q4_K_M.gguf"
9
+
10
+ def load_llm_model():
11
+ try:
12
+ from llama_cpp import Llama
13
+
14
+ if not os.path.exists(GGUF_MODEL_PATH):
15
+ raise FileNotFoundError(f"GGUF model not found at: {GGUF_MODEL_PATH}")
16
+
17
+ logger.info("Loading GGUF model...")
18
+ print(f"πŸ‘‰ Loading model from {GGUF_MODEL_PATH}")
19
+
20
+ llm = Llama(
21
+ model_path=GGUF_MODEL_PATH,
22
+ n_ctx=2048, # context window
23
+ n_threads=4, # CPU threads β€” adjust to your core count
24
+ n_gpu_layers=0, # 0 = CPU only; increase if you have GPU
25
+ verbose=False,
26
+ )
27
+
28
+ print("βœ… Model fully loaded!")
29
+ return llm, None # no separate tokenizer needed
30
+
31
+ except Exception as e:
32
+ import traceback
33
+ print("❌ ERROR LOADING MODEL:")
34
+ traceback.print_exc()
35
+ raise e
src/pipeline/schema_extract.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ from src.utils.db_helpers import convert_to_sqlite
3
+ def extract_schema(db_path):
4
+
5
+ db_path = convert_to_sqlite(db_path , "test.db" )
6
+ conn = sqlite3.connect(db_path)
7
+ cursor = conn.cursor()
8
+
9
+ cursor.execute("""
10
+ SELECT name FROM sqlite_master
11
+ WHERE type='table' AND name NOT LIKE 'sqlite_%';
12
+ """)
13
+
14
+ tables = cursor.fetchall()
15
+ schema_chunks = []
16
+
17
+ for (table_name,) in tables:
18
+ cursor.execute(f"PRAGMA table_info({table_name});")
19
+ columns = cursor.fetchall()
20
+
21
+ col_names = [col[1] for col in columns]
22
+
23
+ chunk = f"Table: {table_name} ({', '.join(col_names)})"
24
+ schema_chunks.append(chunk)
25
+
26
+ conn.close()
27
+ return schema_chunks
28
+
29
+
src/pipeline/sql_engine.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+
3
+ def execute_query(db_path: str, query: str):
4
+ conn = sqlite3.connect(db_path)
5
+ cursor = conn.cursor()
6
+ try:
7
+ cursor.execute(query)
8
+
9
+ # cursor.description is None for non-SELECT or empty results
10
+ if cursor.description is None:
11
+ conn.close()
12
+ return []
13
+
14
+ columns = [desc[0] for desc in cursor.description]
15
+ rows = cursor.fetchall()
16
+ conn.close()
17
+
18
+ return [dict(zip(columns, row)) for row in rows]
19
+
20
+ except sqlite3.OperationalError as e:
21
+ conn.close()
22
+ raise ValueError(f"SQL execution failed: {e}\nQuery: {query}")
23
+
24
+ except Exception as e:
25
+ conn.close()
26
+ raise e
src/service/models.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel , field_validator
2
+
3
+
4
+ class FileMeta(BaseModel):
5
+ filename : str
6
+ user_query : str
7
+ @field_validator("filename")
8
+ @classmethod
9
+
10
+ def validate_extension(cls , v):
11
+ allowed_ext = (".csv" , ".db" , ".sql")
12
+ if not v.lower().endswith(allowed_ext) :
13
+ raise ValueError("Only .csv, .db, .sql files are allowed")
14
+ return v
src/service/routes.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter , UploadFile , File , Form , HTTPException
2
+ from src.service.models import FileMeta
3
+ from src.pipeline.gen_query import generate_query
4
+ import time , os
5
+ from src.pipeline.schema_extract import extract_schema
6
+ from src.pipeline.sql_engine import execute_query
7
+ import shutil
8
+
9
+
10
+ router = APIRouter()
11
+
12
+
13
+ @router.get("/")
14
+ async def test():
15
+ return {"status":"success" , "messages":"Everything is Fine"}
16
+
17
+ UPLOAD_DIR = "uploads"
18
+ os.makedirs(UPLOAD_DIR , exist_ok=True)
19
+
20
+ @router.post("/upload")
21
+ async def upload_file_gen_query(file: UploadFile = File(...) ,
22
+ user_query: str = Form(...)):
23
+
24
+ try:
25
+ FileMeta(filename=file.filename ,
26
+ user_query=user_query)
27
+ except Exception as e:
28
+ raise HTTPException(status_code=400, detail=str(e))
29
+
30
+ allowed_types = [
31
+ "text/csv",
32
+ "application/sql",
33
+ "application/octet-stream" # for .db
34
+ ]
35
+
36
+ if file.content_type not in allowed_types:
37
+ raise HTTPException(
38
+ status_code=400,
39
+ detail=f"Invalid file type: {file.content_type}"
40
+ )
41
+ db_save_path = os.path.join(UPLOAD_DIR, file.filename)
42
+ try:
43
+ with open(db_save_path, "wb") as f:
44
+ shutil.copyfileobj(file.file, f)
45
+ except Exception as e:
46
+ return {"status": "error", "message": f"Failed to save file: {e}"}
47
+
48
+
49
+ try:
50
+ schema = extract_schema(file.filename)
51
+ print(f"Schema : {schema} ")
52
+ start_time = time.time()
53
+ sql_result = generate_query(user_query, schema)
54
+ print(f"Generated SQL stages: {sql_result}")
55
+
56
+ results = execute_query(db_save_path, sql_result["final"])
57
+
58
+
59
+ print("βœ…βœ… Final Query : ", sql_result["final"])
60
+ print("results : " , results)
61
+ return {
62
+ "status": "success",
63
+ "query_stages": {
64
+ "stage_1": sql_result["stage_1"],
65
+ "stage_2": sql_result["stage_2"],
66
+ "stage_3": sql_result["stage_3"],
67
+ },
68
+ "final_query": sql_result["final"],
69
+ "data": results,
70
+ "row_count": len(results),
71
+ "time_required": f"{time.time() - start_time:.2f}s"
72
+ }
73
+
74
+
75
+ except Exception as e:
76
+
77
+ return {"status":"error" , "message":f"got error {e}"}
src/template/prompt.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## as i fine tune model on following message set
2
+
3
+
4
+
5
+
6
+
7
+ def generate_message_template(User_query ,table_schema):
8
+
9
+ messages = [
10
+ {"role":"system" ,
11
+ "content":"You are a SQL generator. Only output SQL query. no explaination, no markdown."} ,
12
+
13
+ {
14
+ "role":"user" ,
15
+ "content":f"""{User_query},
16
+ Schema: {table_schema}"""
17
+ }
18
+ ]
19
+
20
+ return messages
21
+
22
+ def generate_refine_template(user_query: str, table_schema, previous_sql: str, stage: int):
23
+ """Stage 2 & 3: Refine previous SQL"""
24
+ stage_instructions = {
25
+ 2: "Review the SQL query below for logical errors, wrong joins, or missing conditions. Output only the corrected SQL.",
26
+ 3: "Final check: ensure the SQL is optimized, uses correct aggregations, and fully answers the user question. Output only the final SQL."
27
+ }
28
+
29
+ messages = [
30
+ {
31
+ "role": "system",
32
+ "content": "You are a SQL expert. Only output SQL query. No explanation, no markdown."
33
+ },
34
+ {
35
+ "role": "user",
36
+ "content": f"{user_query},\n Schema: {table_schema}"
37
+ },
38
+ {
39
+ "role": "assistant",
40
+ "content": previous_sql # previous stage output
41
+ },
42
+ {
43
+ "role": "user",
44
+ "content": stage_instructions[stage]
45
+ }
46
+ ]
47
+ return messages
src/tests/check_model.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from src.pipeline.load_model import load_llm_model
3
+ import os
4
+
5
+ @pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip heavy model in CI")
6
+ def test_llm_model():
7
+ model , tokenizer = load_llm_model()
8
+
9
+ assert model is not None
10
+ assert tokenizer is not None
11
+
src/utils/config.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ MODEL = None
3
+ TOKENIZER = None
4
+
5
+ def get_model():
6
+ global MODEL, TOKENIZER
7
+
8
+ if MODEL is None:
9
+ print("Lazy loading model...")
10
+ from src.pipeline.load_model import load_llm_model
11
+ MODEL, TOKENIZER = load_llm_model()
12
+ print("Model loaded!")
13
+
14
+ return MODEL, TOKENIZER
src/utils/db_helpers.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import os
3
+ import pandas as pd
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+ def import_sql_to_db(sql_path, db_path="output.db"):
8
+
9
+ if not os.path.exists(sql_path):
10
+ raise FileNotFoundError(f"{sql_path} not found")
11
+
12
+ conn = sqlite3.connect(db_path)
13
+ cursor = conn.cursor()
14
+
15
+ with open(sql_path, "r", encoding="utf-8") as f:
16
+ sql_script = f.read()
17
+
18
+ try:
19
+ cursor.executescript(sql_script) # βœ… executes full SQL dump
20
+ conn.commit()
21
+ print(f"βœ… Database created at {db_path}")
22
+
23
+ return db_path
24
+ except Exception as e:
25
+ print(f"❌ Error: {e}")
26
+ finally:
27
+ conn.close()
28
+
29
+
30
+ def convert_csv_to_sqlite(csv_path, db_path="output.db", table_name=None):
31
+
32
+ if table_name is None:
33
+ table_name = os.path.splitext(os.path.basename(csv_path))[0]
34
+
35
+ df = pd.read_csv(csv_path)
36
+
37
+ conn = sqlite3.connect(db_path)
38
+ df.to_sql(table_name, conn, if_exists="replace", index=False)
39
+ conn.close()
40
+
41
+ print(f"βœ… CSV converted to SQLite DB ({table_name} table)")
42
+ return db_path
43
+
44
+ def convert_to_sqlite(input_path, output_db="converted.db"):
45
+ ext = os.path.splitext(input_path)[1].lower()
46
+
47
+ if os.path.exists(output_db):
48
+ os.remove(output_db)
49
+
50
+ if ext == ".sql":
51
+ return import_sql_to_db(input_path, output_db)
52
+
53
+ elif ext in [".db", ".sqlite"]:
54
+ import shutil
55
+ shutil.copy(input_path, output_db)
56
+ print(f"βœ… Copied DB to {output_db}")
57
+ return output_db
58
+
59
+ elif ext == ".csv":
60
+ return convert_csv_to_sqlite(input_path, output_db)
61
+
62
+ else:
63
+ raise ValueError(f"❌ Unsupported format: {ext}")
64
+
src/utils/logging.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+
4
+ def add_logger():
5
+ logging.basicConfig(
6
+ level=logging.INFO,
7
+ format='%(asctime)s - %(levelname)s - %(message)s',
8
+ handlers=[
9
+ logging.FileHandler("app.log"),
10
+ logging.StreamHandler(sys.stdout)
11
+ ]
12
+ )
test.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from src.main import app
2
+ if __name__=="__main__":
3
+ print("Starting server...") # πŸ‘ˆ add this
4
+
5
+ import uvicorn
6
+ uvicorn.run(app , host="0.0.0.0" , port=8000)
7
+
8
+
9
+