Spaces:

Tonic
/

sqlcoder2

Paused

App Files Files Community

sqlcoder2 / app.py

Tonic

Update app.py

47796ca verified over 1 year ago

raw

history blame

4.47 kB

	import spaces
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import gradio as gr

	title = """# 🙋🏻‍♂️Welcome to 🌟Tonic's Defog 🌬️🌁🌫️SqlCoder-34B-Alpha
	You can use this Space to test out the current model [defog/sqlcoder-34b-alpha](https://huggingface.co/defog/sqlcoder-34b-alpha). [defog/sqlcoder-34b-alpha](https://huggingface.co/defog/sqlcoder-34b-alpha) is a 34B parameter model that outperforms gpt-4 and gpt-4-turbo for natural language to SQL generation tasks on our sql-eval framework, and significantly outperforms all popular open-source models. SQLCoder-34B is fine-tuned on a base CodeLlama model.
	You can also use 👨🏻‍⚕️❤️‍🩹🧑🏻‍⚕️Meditron by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/Meditron70B-AWQ?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3>
	Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻[![Let's build the future of AI together! 🚀🤖](https://discordapp.com/api/guilds/1109943800132010065/widget.png)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [Poly](https://github.com/tonic-ai/poly) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
	"""

	class SQLQueryGenerator:
	def __init__(self, model_name, prompt_file="prompt.md", metadata_file="metadata.sql"):
	self.tokenizer, self.model = self.get_tokenizer_model(model_name)
	self.prompt_file = prompt_file
	self.metadata_file = metadata_file

	def get_tokenizer_model(self, model_name):
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True,
	torch_dtype=torch.float16,
	device_map="auto",
	use_cache=True,
	)
	return tokenizer, model

	def generate_prompt(self, question):
	with open(self.prompt_file, "r") as f:
	prompt = f.read()

	with open(self.metadata_file, "r") as f:
	table_metadata_string = f.read()

	prompt = prompt.format(
	user_question=question, table_metadata_string=table_metadata_string
	)
	return prompt

	def run_inference(self, question):
	prompt = self.generate_prompt(question)
	eos_token_id = self.tokenizer.eos_token_id
	pipe = pipeline(
	"text-generation",
	model=self.model,
	tokenizer=self.tokenizer,
	max_new_tokens=300,
	do_sample=False,
	num_beams=5,
	)
	generated_query = (
	pipe(
	prompt,
	num_return_sequences=1,
	eos_token_id=eos_token_id,
	pad_token_id=eos_token_id,
	)[0]["generated_text"]
	.split("```sql")[-1]
	.split("```")[0]
	.split(";")[0]
	.strip()
	+ ";"
	)
	return generated_query


	@spaces.GPU # Decorate this function to use GPU
	def generate_sql(question):
	return sql_query_generator.run_inference(question)

	def main():
	model_name = "defog/sqlcoder-34b-alpha"
	sql_query_generator = SQLQueryGenerator(model_name)

	with gr.Blocks() as demo:
	gr.Markdown(title)
	question = gr.Textbox(label="Enter your question")
	submit = gr.Button("Generate SQL Query")
	output = gr.Textbox(label="🌬️🌁🌫️SqlCoder-34B-alpha")
	submit.click(fn=generate_sql, inputs=question, outputs=output)

	demo.launch()

	if __name__ == "__main__":
	main()