File size: 4,381 Bytes
47796ca 8347fc4 4e2df07 8347fc4 a513939 8347fc4 55b26c1 8347fc4 8269ab1 8347fc4 496a0f3 47796ca 8347fc4 4e2df07 8347fc4 4e2df07 496a0f3 8347fc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
title = """# 🙋🏻♂️Welcome to 🌟Tonic's Defog 🌬️🌁🌫️SqlCoder-2
You can use this Space to test out the current model [defog/sqlcoder2](https://huggingface.co/defog/sqlcoder2). [defog/sqlcoder2](https://huggingface.co/defog/sqlcoder2) is a 15B parameter model that doesn't outperform gpt-4 and gpt-4-turbo for natural language to SQL generation tasks on our sql-eval framework, and significantly outperforms all popular open-source models.
You can also use efog 🌬️🌁🌫️SqlCoder by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/sqlcoder2?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻[![Let's build the future of AI together! 🚀🤖](https://discordapp.com/api/guilds/1109943800132010065/widget.png)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [Poly](https://github.com/tonic-ai/poly) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""
@spaces.GPU
class SQLQueryGenerator:
def __init__(self, model_name, prompt_file="prompt.md", metadata_file="metadata.sql"):
self.tokenizer, self.model = self.get_tokenizer_model(model_name)
self.prompt_file = prompt_file
self.metadata_file = metadata_file
def get_tokenizer_model(self, model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype=torch.float16,
device_map="auto",
use_cache=True,
)
return tokenizer, model
def generate_prompt(self, question):
with open(self.prompt_file, "r") as f:
prompt = f.read()
with open(self.metadata_file, "r") as f:
table_metadata_string = f.read()
prompt = prompt.format(
user_question=question, table_metadata_string=table_metadata_string
)
return prompt
def run_inference(self, question):
self.model.to('cuda')
prompt = self.generate_prompt(question)
eos_token_id = self.tokenizer.eos_token_id
pipe = pipeline(
"text-generation",
model=self.model,
tokenizer=self.tokenizer,
max_new_tokens=300,
do_sample=False,
num_beams=5,
)
generated_query = (
pipe(
prompt,
num_return_sequences=1,
eos_token_id=eos_token_id,
pad_token_id=eos_token_id,
)[0]["generated_text"]
.split("```sql")[-1]
.split("```")[0]
.split(";")[0]
.strip()
+ ";"
)
return generated_query
def generate_sql(question, sql_query_generator):
return sql_query_generator.run_inference(question)
def main():
model_name = "defog/sqlcoder2"
sql_query_generator = SQLQueryGenerator(model_name)
with gr.Blocks() as demo:
gr.Markdown(title)
question = gr.Textbox(label="Enter your question")
submit = gr.Button("Generate SQL Query")
output = gr.Textbox(label="🌬️🌁🌫️SqlCoder-2")
submit.click(fn=generate_sql, inputs=[question, gr.State(sql_query_generator)], outputs=output)
demo.launch()
if __name__ == "__main__":
main() |