Spaces:

Rohit-Katkar2003
/

AI-powered-SQL

Runtime error

App Files Files Community

github-actions commited on 12 days ago

Commit

1914b78

1 Parent(s): e0b624c

Auto deploy from GitHub Actions

Browse files

Files changed (25) hide show

.dockerignore +21 -0
.github/workflows/ci.yml +50 -0
.gitignore +3 -0
Dockerfile +11 -0
README.md +1 -12
app.log +22 -0
app.py +12 -0
download_model.py +8 -0
requirements.txt +7 -0
space-repo/.gitattributes +35 -0
space-repo/README.md +1 -0
src/main.py +40 -0
src/pipeline/create_sample_data.py +139 -0
src/pipeline/gen_query.py +95 -0
src/pipeline/load_model.py +35 -0
src/pipeline/schema_extract.py +29 -0
src/pipeline/sql_engine.py +26 -0
src/service/models.py +14 -0
src/service/routes.py +77 -0
src/template/prompt.py +47 -0
src/tests/check_model.py +11 -0
src/utils/config.py +14 -0
src/utils/db_helpers.py +64 -0
src/utils/logging.py +12 -0
test.py +9 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,21 @@

+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.env
+.venv/
+env/
+.git/
+.gitignore
+hf_cache/
+MODELS/
+*.pt
+*.bin
+*.safetensors
+node_modules/
+*.log

.github/workflows/ci.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+name: SQL CI pipeline
+on:
+  push:
+    branches: [main]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4   # ✅ FIX
+      - name: Setup Python
+        uses: actions/setup-python@v5   # ✅ FIX
+        with:
+          python-version: "3.10"
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install flake8 pytest
+      - name: Lint check
+        run: flake8 . || true
+      - name: Run tests
+        run: pytest src/tests/ || true
+      - name: Deploy to Hugging Face Spaces
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_USERNAME: ${{ secrets.HF_USERNAME }}
+          HF_SPACE_NAME: ${{ secrets.HF_SPACE_NAME }}
+        run: |
+          git config --global user.email "actions@github.com"
+          git config --global user.name "github-actions"
+          git clone https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$HF_SPACE_NAME space-repo
+          rsync -av --exclude='.git' ./ space-repo/
+          cd space-repo
+          git add .
+          git commit -m "Auto deploy from GitHub Actions" || echo "No changes"
+          git push

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.env
+**/__pycache__/
+MODELS/

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["python", "test.py"]

README.md CHANGED Viewed

@@ -1,12 +1 @@
----
-title: AI Powered SQL
-emoji: 👁
-colorFrom: purple
-colorTo: gray
-sdk: docker
-pinned: false
-license: mit
-short_description: 'generate the AI powered SQL query. '
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # AI-powered-SQL

app.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-04-13 00:40:43,932 - INFO - Loading GGUF model...
+2026-04-13 00:42:37,950 - INFO - Loading GGUF model...
+2026-04-13 00:43:18,406 - INFO - Loading GGUF model...
+2026-04-13 00:45:30,282 - INFO - Loading GGUF model...
+2026-04-13 00:46:11,791 - INFO - Loading GGUF model...
+2026-04-13 00:48:12,153 - INFO - Loading GGUF model...
+2026-04-13 00:49:29,215 - INFO - Loading GGUF model...
+2026-04-13 01:05:07,207 - INFO - Loading GGUF model...
+2026-04-13 01:11:39,179 - INFO - Loading GGUF model...
+2026-04-13 01:11:46,119 - INFO - Stage 1 SQL: SELECT COUNT(DISTINCT course_id) AS num_courses, COUNT(DISTINCT id) AS num_enrollments FROM enrollments GROUP BY num_enrollments;
+2026-04-13 01:11:49,582 - INFO - Stage 2 SQL: SELECT c.course_name, COUNT(e.id) AS num_enrollments FROM courses c JOIN enrollments e ON c.course_id = e.course_id GROUP BY c.course_name;
+2026-04-13 01:11:51,743 - INFO - Stage 3 SQL: SELECT c.course_name, COUNT(e.id) AS num_enrollments FROM courses c JOIN enrollments e ON c.course_id = e.course_id GROUP BY c.course_name;
+2026-04-13 01:12:47,120 - INFO - Stage 1 SQL: CREATE TABLE students (id INT, name VARCHAR(255), age INT, marks INT); CREATE TABLE courses (course_id INT, course_name VARCHAR(255)); CREATE TABLE enrollments (id INT, student_id INT, course_id INT); CREATE TABLE orders (order_id INT, student_id INT, amount INT); INSERT INTO students (id, name, age, marks) VALUES (1, 'John Doe', 20, 80); INSERT INTO students (id, name, age, marks) VALUES (2, 'Jane Smith', 22, 90); INSERT INTO courses (course_id, course_name) VALUES (1
+2026-04-13 01:12:54,610 - INFO - Stage 2 SQL: CREATE TABLE students (id INT, name VARCHAR(255), age INT, marks INT); CREATE TABLE courses (course_id INT, course_name VARCHAR(255)); CREATE TABLE enrollments (id INT, student_id INT, course_id INT); CREATE TABLE orders (order_id INT, student_id INT, amount INT); CREATE TABLE students_courses (student_id INT, course_id INT); INSERT INTO students (id, name, age, marks) VALUES (1, 'John Doe', 20, 80); INSERT INTO students (id, name, age, marks) VALUES (2, 'Jane Smith', 22, 90);
+2026-04-13 01:12:56,658 - INFO - Stage 3 SQL: SELECT s.name, AVG(s.marks) FROM students s JOIN students_courses sc ON s.id = sc.student_id GROUP BY s.name;
+2026-04-13 01:14:39,678 - INFO - Stage 1 SQL: INSERT INTO students (id, name, age, marks) VALUES (100, 'Rohan', 22, 90);
+2026-04-13 01:14:43,013 - INFO - Stage 2 SQL: SELECT s.id, s.name, s.age, s.marks FROM students s JOIN courses c ON s.id = c.student_id JOIN enrollments e ON s.id = e.student_id JOIN orders o ON e.id = o.order_id WHERE o.amount = 100;
+2026-04-13 01:14:44,894 - INFO - Stage 3 SQL: INSERT INTO students (id, name, age, marks) VALUES (100, 'Rohan', 22, 90);
+2026-04-13 01:16:19,608 - INFO - Loading GGUF model...
+2026-04-13 01:16:26,471 - INFO - Stage 1 SQL: INSERT INTO students (id, name, age, marks) VALUES (100, 'Rohan', 22, 90);
+2026-04-13 01:16:30,277 - INFO - Stage 2 SQL: SELECT s.id, s.name, s.age, s.marks FROM students s JOIN courses c ON s.id = c.student_id JOIN enrollments e ON s.id = e.student_id JOIN orders o ON e.id = o.order_id WHERE o.amount > 50;
+2026-04-13 01:16:32,237 - INFO - Stage 3 SQL: INSERT INTO students (id, name, age, marks) VALUES (100, 'Rohan', 22, 90);

app.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from src.pipeline.gen_query import generate_query
+import time  , os
+from src.pipeline.schema_extract import extract_schema
+schema = extract_schema("sample_project.sql")
+print(f"Schema : {schema} ")
+start_time = time.time()
+res = generate_query("what types of courses and how many student in each course? " , schema )
+print(res)
+print("total time required: " ,time.time()-start_time)

download_model.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from huggingface_hub import hf_hub_download
+path = hf_hub_download(
+    repo_id="Rohit-Katkar2003/llama3.2-1b-text-2-sql",  # quantized version
+    filename="llama-3.2-1b-instruct.Q4_K_M.gguf",    # ~800MB, good quality
+    local_dir="MODELS/gguf"
+)
+print(f"Downloaded to: {path}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+flake8
+pytest
+transformers
+fastapi
+uvicorn
+peft
+pydantic

space-repo/.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

space-repo/README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ # AI-powered-SQL

src/main.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from src.utils.logging import add_logger
+# from src.pipeline.gen_query import generate_query
+from src.service.routes import router
+from fastapi import FastAPI
+from src.pipeline.load_model import load_llm_model
+from src.utils import config
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    print("🚀 Loading model at startup...")
+    from src.utils.config import get_model
+    get_model()   # loads and caches
+    print("✅ Model ready!")
+    yield
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],   # allow all (for dev)
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+add_logger()
+#
+app.include_router(router)

src/pipeline/create_sample_data.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import sqlite3
+import random
+def create_sample_db(db_path="sample_project.db"):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    # Drop tables if exist
+    cursor.executescript("""
+    DROP TABLE IF EXISTS enrollments;
+    DROP TABLE IF EXISTS orders;
+    DROP TABLE IF EXISTS students;
+    DROP TABLE IF EXISTS courses;
+    """)
+    # Create tables
+    cursor.executescript("""
+    CREATE TABLE students (
+        id INTEGER PRIMARY KEY,
+        name TEXT,
+        age INTEGER,
+        marks INTEGER
+    );
+    CREATE TABLE courses (
+        course_id INTEGER PRIMARY KEY,
+        course_name TEXT
+    );
+    CREATE TABLE enrollments (
+        id INTEGER PRIMARY KEY,
+        student_id INTEGER,
+        course_id INTEGER,
+        FOREIGN KEY(student_id) REFERENCES students(id),
+        FOREIGN KEY(course_id) REFERENCES courses(course_id)
+    );
+    CREATE TABLE orders (
+        order_id INTEGER PRIMARY KEY,
+        student_id INTEGER,
+        amount INTEGER,
+        FOREIGN KEY(student_id) REFERENCES students(id)
+    );
+    """)
+    # Insert students (20 records)
+    names = ["Rohit", "Amit", "Neha", "Priya", "Karan", "Simran", "Raj", "Anjali", "Vikram", "Sneha"]
+    students = []
+    for i in range(1, 21):
+        students.append((
+            i,
+            random.choice(names) + str(i),
+            random.randint(18, 25),
+            random.randint(50, 100)
+        ))
+    cursor.executemany("INSERT INTO students VALUES (?, ?, ?, ?)", students)
+    # Insert courses
+    courses = [
+        (1, "Math"),
+        (2, "Science"),
+        (3, "History"),
+        (4, "Computer Science")
+    ]
+    cursor.executemany("INSERT INTO courses VALUES (?, ?)", courses)
+    # Insert enrollments (30 records)
+    enrollments = []
+    for i in range(1, 31):
+        enrollments.append((
+            i,
+            random.randint(1, 20),
+            random.randint(1, 4)
+        ))
+    cursor.executemany("INSERT INTO enrollments VALUES (?, ?, ?)", enrollments)
+    # Insert orders (25 records)
+    orders = []
+    for i in range(1, 26):
+        orders.append((
+            i,
+            random.randint(1, 20),
+            random.randint(100, 2000)
+        ))
+    cursor.executemany("INSERT INTO orders VALUES (?, ?, ?)", orders)
+    conn.commit()
+    conn.close()
+    print(f"✅ Database created: {db_path}")
+# Run this
+# create_sample_db()
+def export_to_sql(db_path="sample_project.db", sql_path="sample_project.sql"):
+    conn = sqlite3.connect(db_path)
+    with open(sql_path, "w") as f:
+        for line in conn.iterdump():
+            f.write(f"{line}\n")
+    conn.close()
+    print(f"✅ Exported to {sql_path}")
+    return
+# Run this
+# export_to_sql()
+import sqlite3
+def extract_schema(db_path):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute("""
+    SELECT name FROM sqlite_master
+    WHERE type='table' AND name NOT LIKE 'sqlite_%';
+    """)
+    tables = cursor.fetchall()
+    schema_chunks = []
+    for (table_name,) in tables:
+        cursor.execute(f"PRAGMA table_info({table_name});")
+        columns = cursor.fetchall()
+        col_names = [col[1] for col in columns]
+        chunk = f"Table: {table_name} ({', '.join(col_names)})"
+        schema_chunks.append(chunk)
+    conn.close()
+    return schema_chunks
+print(extract_schema("sample_project.sql"))

src/pipeline/gen_query.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# src/pipeline/gen_query.py
+import logging
+from src.template.prompt import generate_message_template, generate_refine_template
+from src.utils.config import get_model
+import torch
+logger = logging.getLogger(__name__)
+def _call_llm(llm, messages: list, max_tokens: int = 128) -> str:
+    response = llm.create_chat_completion(
+        messages=messages,
+        max_tokens=max_tokens,
+        stop=["</s>", "\n\n"],
+        temperature=0.1,
+    )
+    return response["choices"][0]["message"]["content"].strip()
+def generate_query(user_query: str, model_schema) -> dict:
+    llm, _ = get_model()
+    messages_1 = generate_message_template(user_query, model_schema)
+    sql_1 = _call_llm(llm, messages_1)
+    logger.info(f"Stage 1 SQL: {sql_1}")
+    print(f"🔵 Stage 1: {sql_1}")
+    messages_2 = generate_refine_template(user_query, model_schema, sql_1, stage=2)
+    sql_2 = _call_llm(llm, messages_2)
+    logger.info(f"Stage 2 SQL: {sql_2}")
+    print(f"🟡 Stage 2: {sql_2}")
+    messages_3 = generate_refine_template(user_query, model_schema, sql_2, stage=3)
+    sql_3 = _call_llm(llm, messages_3)
+    logger.info(f"Stage 3 SQL: {sql_3}")
+    print(f"🟢 Stage 3 (final): {sql_3}")
+    return {
+        "final": sql_3,
+        "stage_1": sql_1,
+        "stage_2": sql_2,
+        "stage_3": sql_3,
+    }
+def generate_query_trans(user_query, model_schema):
+    MODEL, TOKENIZER = get_model()
+    device = "cpu"   # keep cpu unless GPU available
+    torch.set_num_threads(4)   # tune: try 2–8
+    messages = generate_message_template(user_query, model_schema)
+    text = TOKENIZER.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    inputs = TOKENIZER(
+        text,
+        return_tensors="pt",
+        padding=False
+    )
+    input_ids = inputs["input_ids"].to(device)
+    attention_mask = inputs["attention_mask"].to(device)
+    input_length = input_ids.shape[1]
+    with torch.inference_mode():
+        outputs = MODEL.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            max_new_tokens=64,
+            do_sample=False,
+            use_cache=True,
+            pad_token_id=TOKENIZER.eos_token_id,
+        )
+    generated_tokens = outputs[0][input_length:]
+    response = TOKENIZER.decode(
+        generated_tokens,
+        skip_special_tokens=True
+    )
+    return response.strip()

src/pipeline/load_model.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# src/pipeline/load_model.py
+import logging
+import os
+logger = logging.getLogger(__name__)
+GGUF_MODEL_PATH = r"MODELS\gguf\llama-3.2-1b-instruct.Q4_K_M.gguf"
+def load_llm_model():
+    try:
+        from llama_cpp import Llama
+        if not os.path.exists(GGUF_MODEL_PATH):
+            raise FileNotFoundError(f"GGUF model not found at: {GGUF_MODEL_PATH}")
+        logger.info("Loading GGUF model...")
+        print(f"👉 Loading model from {GGUF_MODEL_PATH}")
+        llm = Llama(
+            model_path=GGUF_MODEL_PATH,
+            n_ctx=2048,        # context window
+            n_threads=4,       # CPU threads — adjust to your core count
+            n_gpu_layers=0,    # 0 = CPU only; increase if you have GPU
+            verbose=False,
+        )
+        print("✅ Model fully loaded!")
+        return llm, None       # no separate tokenizer needed
+    except Exception as e:
+        import traceback
+        print("❌ ERROR LOADING MODEL:")
+        traceback.print_exc()
+        raise e

src/pipeline/schema_extract.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import sqlite3
+from src.utils.db_helpers import convert_to_sqlite
+def extract_schema(db_path):
+    db_path = convert_to_sqlite(db_path , "test.db" )
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute("""
+    SELECT name FROM sqlite_master
+    WHERE type='table' AND name NOT LIKE 'sqlite_%';
+    """)
+    tables = cursor.fetchall()
+    schema_chunks = []
+    for (table_name,) in tables:
+        cursor.execute(f"PRAGMA table_info({table_name});")
+        columns = cursor.fetchall()
+        col_names = [col[1] for col in columns]
+        chunk = f"Table: {table_name} ({', '.join(col_names)})"
+        schema_chunks.append(chunk)
+    conn.close()
+    return schema_chunks

src/pipeline/sql_engine.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import sqlite3
+def execute_query(db_path: str, query: str):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    try:
+        cursor.execute(query)
+        # cursor.description is None for non-SELECT or empty results
+        if cursor.description is None:
+            conn.close()
+            return []
+        columns = [desc[0] for desc in cursor.description]
+        rows = cursor.fetchall()
+        conn.close()
+        return [dict(zip(columns, row)) for row in rows]
+    except sqlite3.OperationalError as e:
+        conn.close()
+        raise ValueError(f"SQL execution failed: {e}\nQuery: {query}")
+    except Exception as e:
+        conn.close()
+        raise e

src/service/models.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from pydantic import BaseModel , field_validator
+class FileMeta(BaseModel):
+    filename : str
+    user_query : str
+    @field_validator("filename")
+    @classmethod
+    def validate_extension(cls , v):
+        allowed_ext = (".csv" , ".db" , ".sql")
+        if not v.lower().endswith(allowed_ext) :
+            raise ValueError("Only .csv, .db, .sql files are allowed")
+        return v

src/service/routes.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from fastapi import APIRouter , UploadFile , File , Form  , HTTPException
+from src.service.models import FileMeta
+from src.pipeline.gen_query import generate_query
+import time  , os
+from src.pipeline.schema_extract import extract_schema
+from src.pipeline.sql_engine import execute_query
+import shutil
+router = APIRouter()
+@router.get("/")
+async def test():
+    return {"status":"success" , "messages":"Everything is Fine"}
+UPLOAD_DIR = "uploads"
+os.makedirs(UPLOAD_DIR , exist_ok=True)
+@router.post("/upload")
+async def upload_file_gen_query(file: UploadFile = File(...) ,
+                                user_query: str = Form(...)):
+    try:
+        FileMeta(filename=file.filename ,
+                user_query=user_query)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    allowed_types = [
+        "text/csv",
+        "application/sql",
+        "application/octet-stream"  # for .db
+    ]
+    if file.content_type not in allowed_types:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid file type: {file.content_type}"
+        )
+    db_save_path = os.path.join(UPLOAD_DIR, file.filename)
+    try:
+        with open(db_save_path, "wb") as f:
+            shutil.copyfileobj(file.file, f)
+    except Exception as e:
+        return {"status": "error", "message": f"Failed to save file: {e}"}
+    try:
+        schema = extract_schema(file.filename)
+        print(f"Schema : {schema} ")
+        start_time = time.time()
+        sql_result = generate_query(user_query, schema)
+        print(f"Generated SQL stages: {sql_result}")
+        results = execute_query(db_save_path, sql_result["final"])
+        print("✅✅  Final Query : ", sql_result["final"])
+        print("results : " , results)
+        return {
+            "status": "success",
+            "query_stages": {
+                "stage_1": sql_result["stage_1"],
+                "stage_2": sql_result["stage_2"],
+                "stage_3": sql_result["stage_3"],
+            },
+            "final_query": sql_result["final"],
+            "data": results,
+            "row_count": len(results),
+            "time_required": f"{time.time() - start_time:.2f}s"
+        }
+    except Exception as e:
+        return {"status":"error" , "message":f"got error {e}"}

src/template/prompt.py ADDED Viewed

	@@ -0,0 +1,47 @@

+## as i fine tune model on following message set
+def generate_message_template(User_query ,table_schema):
+    messages = [
+        {"role":"system" ,
+        "content":"You are a SQL generator. Only output SQL query. no explaination, no markdown."} ,
+         {
+        "role":"user" ,
+        "content":f"""{User_query},
+                    Schema: {table_schema}"""
+         }
+    ]
+    return messages
+def generate_refine_template(user_query: str, table_schema, previous_sql: str, stage: int):
+    """Stage 2 & 3: Refine previous SQL"""
+    stage_instructions = {
+        2: "Review the SQL query below for logical errors, wrong joins, or missing conditions. Output only the corrected SQL.",
+        3: "Final check: ensure the SQL is optimized, uses correct aggregations, and fully answers the user question. Output only the final SQL."
+    }
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a SQL expert. Only output SQL query. No explanation, no markdown."
+        },
+        {
+            "role": "user",
+            "content": f"{user_query},\n                    Schema: {table_schema}"
+        },
+        {
+            "role": "assistant",
+            "content": previous_sql   # previous stage output
+        },
+        {
+            "role": "user",
+            "content": stage_instructions[stage]
+        }
+    ]
+    return messages

src/tests/check_model.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import pytest
+from src.pipeline.load_model import load_llm_model
+import os
+@pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip heavy model in CI")
+def test_llm_model():
+    model , tokenizer = load_llm_model()
+    assert model is not None
+    assert tokenizer is not None

src/utils/config.py ADDED Viewed

	@@ -0,0 +1,14 @@

+MODEL = None
+TOKENIZER = None
+def get_model():
+    global MODEL, TOKENIZER
+    if MODEL is None:
+        print("Lazy loading model...")
+        from src.pipeline.load_model import load_llm_model
+        MODEL, TOKENIZER = load_llm_model()
+        print("Model loaded!")
+    return MODEL, TOKENIZER

src/utils/db_helpers.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import sqlite3
+import os
+import pandas as pd
+import logging
+logger = logging.getLogger(__name__)
+def import_sql_to_db(sql_path, db_path="output.db"):
+    if not os.path.exists(sql_path):
+        raise FileNotFoundError(f"{sql_path} not found")
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    with open(sql_path, "r", encoding="utf-8") as f:
+        sql_script = f.read()
+    try:
+        cursor.executescript(sql_script)  # ✅ executes full SQL dump
+        conn.commit()
+        print(f"✅ Database created at {db_path}")
+        return db_path
+    except Exception as e:
+        print(f"❌ Error: {e}")
+    finally:
+        conn.close()
+def convert_csv_to_sqlite(csv_path, db_path="output.db", table_name=None):
+    if table_name is None:
+        table_name = os.path.splitext(os.path.basename(csv_path))[0]
+    df = pd.read_csv(csv_path)
+    conn = sqlite3.connect(db_path)
+    df.to_sql(table_name, conn, if_exists="replace", index=False)
+    conn.close()
+    print(f"✅ CSV converted to SQLite DB ({table_name} table)")
+    return db_path
+def convert_to_sqlite(input_path, output_db="converted.db"):
+    ext = os.path.splitext(input_path)[1].lower()
+    if os.path.exists(output_db):
+        os.remove(output_db)
+    if ext == ".sql":
+        return import_sql_to_db(input_path, output_db)
+    elif ext in [".db", ".sqlite"]:
+        import shutil
+        shutil.copy(input_path, output_db)
+        print(f"✅ Copied DB to {output_db}")
+        return output_db
+    elif ext == ".csv":
+        return convert_csv_to_sqlite(input_path, output_db)
+    else:
+        raise ValueError(f"❌ Unsupported format: {ext}")

src/utils/logging.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import logging
+import sys
+def add_logger():
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        handlers=[
+            logging.FileHandler("app.log"),
+            logging.StreamHandler(sys.stdout)
+        ]
+    )

test.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from src.main import app
+if __name__=="__main__":
+    print("Starting server...")   # 👈 add this
+    import uvicorn
+    uvicorn.run(app , host="0.0.0.0" , port=8000)