from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr import sqlparse import torch model_name = "defog/sqlcoder-7b" tokenizer = AutoTokenizer.from_pretrained(model_name) # Update the model loading process with potential disk offloading model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, torch_dtype=torch.float16, # Use reduced precision device_map="auto", # Automatically distribute model layers use_cache=True, # Specify an offload folder if your setup requires offloading to disk offload_folder="text_to_sql_defog_7b/offfolder", # Uncomment and set path as necessary offload_state_dict=True, # Uncomment if offloading state dict is needed ) def generate_response(prompt): # Ensure the tokenizer and model are on the correct device device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) inputs = tokenizer(prompt, return_tensors="pt").to(device) generated_ids = model.generate( **inputs, num_return_sequences=1, max_new_tokens=400, do_sample=False, num_beams=1, ) outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) formatted_sql = sqlparse.format(outputs[0], reindent=True) return formatted_sql iface = gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=7, label="Input Prompt", placeholder="Enter your prompt here..."), outputs=gr.Textbox(label="Generated SQL"), title="SQL Query Generator", description="Generates SQL queries based on the provided natural language prompt. Powered by the 'defog/sqlcoder-7b' model." ) iface.launch(share=True)