Spaces:

richardr1126
/

sql-skeleton-wizardcoder-demo

Paused

File size: 8,528 Bytes

40c895f
 
 
 
 
 
 
 
 
 
 
80be5da
40c895f
f7fa740
40c895f
 
4592479
40c895f
 
 
 
 
78715f1
 
40c895f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f23744
40c895f
 
ba3b60e
5f23744
40c895f
 
 
9ae4caa
40c895f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d119dc4
913de56
 
 
 
80be5da
 
 
 
 
 
 
 
66983ee
e2ff706
9c4badc
d119dc4
e2ff706
2ea5c26
 
 
8a0ec5f
 
e2ff706
2ea5c26
 
8a0ec5f
 
2ea5c26
8a0ec5f
 
2ea5c26
8a0ec5f
e2ff706
8a0ec5f
2ea5c26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a0ec5f
e2ff706
4592479
2ea5c26

import os
from threading import Event, Thread
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    StoppingCriteria,
    StoppingCriteriaList,
    TextIteratorStreamer,
)
import gradio as gr
import torch
import sqlparse

model_name = os.getenv("HF_MODEL_NAME", None)
tok = AutoTokenizer.from_pretrained(model_name)

max_new_tokens = 1024

print(f"Starting to load the model {model_name}")

m = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map=0,
    #load_in_8bit=True,
)

m.config.pad_token_id = m.config.eos_token_id
m.generation_config.pad_token_id = m.config.eos_token_id

stop_tokens = [";", "###", "Result"]
stop_token_ids = tok.convert_tokens_to_ids(stop_tokens)

print(f"Successfully loaded the model {model_name} into memory")

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_id in stop_token_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False

def bot(input_message: str, db_info="", temperature=0.1, top_p=0.9, top_k=0, repetition_penalty=1.08):
    stop = StopOnTokens()

    # Format the user's input message
    messages = f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: {input_message} {db_info}\n\n### Response:\n\n"

    input_ids = tok(messages, return_tensors="pt").input_ids
    input_ids = input_ids.to(m.device)
    streamer = TextIteratorStreamer(tok, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        input_ids=input_ids,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        do_sample=temperature > 0.0,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        streamer=streamer,
        stopping_criteria=StoppingCriteriaList([stop]),
    )

    stream_complete = Event()

    def generate_and_signal_complete():
        m.generate(**generate_kwargs)
        stream_complete.set()

    t1 = Thread(target=generate_and_signal_complete)
    t1.start()

    partial_text = ""
    for new_text in streamer:
        partial_text += new_text

    # Split the text by "|", and get the last element in the list which should be the final query
    try:
        final_query = partial_text.split("|")[1].strip()
    except Exception:
        final_query = partial_text

    try:
        # Attempt to format SQL query using sqlparse
        formatted_query = sqlparse.format(final_query, reindent=True, keyword_case='upper')
    except Exception:
        # If formatting fails, use the original, unformatted query
        formatted_query = final_query

    # Convert SQL to markdown (not required, but just to show how to use the markdown module)
    final_query_markdown = f"{formatted_query}"
    return final_query_markdown

with gr.Blocks(theme='gradio/soft') as demo:
    header = gr.HTML("""
        <h1 style="text-align: center">SQL Skeleton WizardCoder Demo</h1>
        <h3 style="text-align: center">🧙‍♂️ Generate SQL queries from Natural Language 🧙‍♂️</h3>
    """)

    output_box = gr.Code(label="Generated SQL", lines=2, interactive=True)
    input_text = gr.Textbox(lines=3, placeholder='Write your question here...', label='NL Input')
    db_info = gr.Textbox(lines=4, placeholder='Example: | table_01 : column_01 , column_02 | table_02 : column_01 , column_02 | ...', label='Database Info')

    with gr.Accordion("Hyperparameters", open=False):
        temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1)
        top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.0, maximum=1.0, value=0.9, step=0.01)
        top_k = gr.Slider(label="Top-k", minimum=0, maximum=200, value=0, step=1)
        repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.08, step=0.01)
        
    run_button = gr.Button("Generate SQL", variant="primary")
    
    with gr.Accordion("Examples", open=True):
        examples = gr.Examples([
            ["What is the average, minimum, and maximum age for all French singers?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"],
            ["Show location and name for all stadiums with a capacity between 5000 and 10000.", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"],
            ["What are the number of concerts that occurred in the stadium with the largest capacity ?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"],
            ["How many male singers performed in concerts in the year 2023?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"],
            ["List the names of all singers who performed in a concert with the theme 'Rock'", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"]
        ], inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty], fn=bot)

    bitsandbytes_model = "richardr1126/spider-skeleton-wizard-coder-8bit"
    merged_model = "richardr1126/spider-skeleton-wizard-coder-merged"
    initial_model = "WizardLM/WizardCoder-15B-V1.0"
    finetuned_model = "richardr1126/spider-skeleton-wizard-coder-qlora"
    dataset = "richardr1126/spider-skeleton-context-instruct"
    
    footer = gr.HTML(f"""
        <p>🛠️ If you want you can <strong>duplicate this Space</strong>, then change the HF_MODEL_REPO spaces env varaible to use any Transformers model.</p>
        <p>🌐 Leveraging the <a href='https://huggingface.co/{bitsandbytes_model}'><strong>bitsandbytes 8-bit version</strong></a> of <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a> model.</p>
        <p>🔗 How it's made: <a href='https://huggingface.co/{initial_model}'><strong>{initial_model}</strong></a> was finetuned to create <a href='https://huggingface.co/{finetuned_model}'><strong>{finetuned_model}</strong></a>, then merged together to create <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a>.</p>
        <p>📉 Fine-tuning was performed using QLoRA techniques on the <a href='https://huggingface.co/datasets/{dataset}'><strong>{dataset}</strong></a> dataset. You can view training metrics on the <a href='https://huggingface.co/{finetuned_model}'><strong>QLoRa adapter HF Repo</strong></a>.</p>
    """)


    run_button.click(fn=bot, inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty], outputs=output_box, api_name="txt2sql")

demo.queue(concurrency_count=1, max_size=10).launch()