richardr1126
commited on
Commit
β’
dd9d480
1
Parent(s):
ef53845
New and improved
Browse files- README.md +30 -0
- app-ngrok.py +15 -5
README.md
CHANGED
@@ -10,4 +10,34 @@ pinned: true
|
|
10 |
license: bigcode-openrail-m
|
11 |
---
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
10 |
license: bigcode-openrail-m
|
11 |
---
|
12 |
|
13 |
+
## Citations
|
14 |
+
|
15 |
+
```
|
16 |
+
@misc{luo2023wizardcoder,
|
17 |
+
title={WizardCoder: Empowering Code Large Language Models with Evol-Instruct},
|
18 |
+
author={Ziyang Luo and Can Xu and Pu Zhao and Qingfeng Sun and Xiubo Geng and Wenxiang Hu and Chongyang Tao and Jing Ma and Qingwei Lin and Daxin Jiang},
|
19 |
+
year={2023},
|
20 |
+
}
|
21 |
+
```
|
22 |
+
```
|
23 |
+
@article{yu2018spider,
|
24 |
+
title={Spider: A large-scale human-labeled dataset for complex and cross-domain semantic parsing and text-to-sql task},
|
25 |
+
author={Yu, Tao and Zhang, Rui and Yang, Kai and Yasunaga, Michihiro and Wang, Dongxu and Li, Zifan and Ma, James and Li, Irene and Yao, Qingning and Roman, Shanelle and others},
|
26 |
+
journal={arXiv preprint arXiv:1809.08887},
|
27 |
+
year={2018}
|
28 |
+
}
|
29 |
+
```
|
30 |
+
```
|
31 |
+
@article{dettmers2023qlora,
|
32 |
+
title={QLoRA: Efficient Finetuning of Quantized LLMs},
|
33 |
+
author={Dettmers, Tim and Pagnoni, Artidoro and Holtzman, Ari and Zettlemoyer, Luke},
|
34 |
+
journal={arXiv preprint arXiv:2305.14314},
|
35 |
+
year={2023}
|
36 |
+
}
|
37 |
+
```
|
38 |
+
|
39 |
+
## Disclaimer
|
40 |
+
|
41 |
+
The resources, including code, data, and model weights, associated with this project are restricted for academic research purposes only and cannot be used for commercial purposes. The content produced by any version of WizardCoder is influenced by uncontrollable variables such as randomness, and therefore, the accuracy of the output cannot be guaranteed by this project. This project does not accept any legal liability for the content of the model output, nor does it assume responsibility for any losses incurred due to the use of associated resources and output results.
|
42 |
+
|
43 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app-ngrok.py
CHANGED
@@ -3,6 +3,9 @@ import gradio as gr
|
|
3 |
import sqlparse
|
4 |
import requests
|
5 |
from time import sleep
|
|
|
|
|
|
|
6 |
|
7 |
def format(text):
|
8 |
# Split the text by "|", and get the last element in the list which should be the final query
|
@@ -23,8 +26,7 @@ def format(text):
|
|
23 |
|
24 |
return final_query_markdown
|
25 |
|
26 |
-
|
27 |
-
def bot(input_message: str, db_info="", temperature=0.3, top_p=0.9, top_k=0, repetition_penalty=1.08):
|
28 |
# Format the user's input message
|
29 |
messages = f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: {input_message} {db_info}\n\n### Response:\n\n"
|
30 |
|
@@ -62,10 +64,11 @@ def bot(input_message: str, db_info="", temperature=0.3, top_p=0.9, top_k=0, rep
|
|
62 |
print('Waiting for 10 seconds before retrying...')
|
63 |
sleep(10)
|
64 |
|
|
|
65 |
with gr.Blocks(theme='gradio/soft') as demo:
|
66 |
header = gr.HTML("""
|
67 |
<h1 style="text-align: center">SQL Skeleton WizardCoder Demo</h1>
|
68 |
-
<h3 style="text-align: center"
|
69 |
""")
|
70 |
|
71 |
output_box = gr.Code(label="Generated SQL", lines=2, interactive=True)
|
@@ -79,6 +82,7 @@ with gr.Blocks(theme='gradio/soft') as demo:
|
|
79 |
repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.08, step=0.01)
|
80 |
|
81 |
run_button = gr.Button("Generate SQL", variant="primary")
|
|
|
82 |
|
83 |
with gr.Accordion("Examples", open=True):
|
84 |
examples = gr.Examples([
|
@@ -87,7 +91,7 @@ with gr.Blocks(theme='gradio/soft') as demo:
|
|
87 |
["What are the number of concerts that occurred in the stadium with the largest capacity ?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"],
|
88 |
["How many male singers performed in concerts in the year 2023?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"],
|
89 |
["List the names of all singers who performed in a concert with the theme 'Rock'", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"]
|
90 |
-
], inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty], fn=
|
91 |
|
92 |
quantized_model = "richardr1126/spider-skeleton-wizard-coder-ggml"
|
93 |
merged_model = "richardr1126/spider-skeleton-wizard-coder-merged"
|
@@ -100,9 +104,15 @@ with gr.Blocks(theme='gradio/soft') as demo:
|
|
100 |
<p>π Leveraging the <a href='https://huggingface.co/{quantized_model}'><strong>4-bit GGML version</strong></a> of <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a> model.</p>
|
101 |
<p>π How it's made: <a href='https://huggingface.co/{initial_model}'><strong>{initial_model}</strong></a> was finetuned to create <a href='https://huggingface.co/{lora_model}'><strong>{lora_model}</strong></a>, then merged together to create <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a>.</p>
|
102 |
<p>π Fine-tuning was performed using QLoRA techniques on the <a href='https://huggingface.co/datasets/{dataset}'><strong>{dataset}</strong></a> dataset. You can view training metrics on the <a href='https://huggingface.co/{lora_model}'><strong>QLoRa adapter HF Repo</strong></a>.</p>
|
|
|
103 |
""")
|
104 |
|
|
|
|
|
105 |
|
106 |
-
|
|
|
|
|
|
|
107 |
|
108 |
demo.queue(concurrency_count=1, max_size=10).launch(debug=True)
|
|
|
3 |
import sqlparse
|
4 |
import requests
|
5 |
from time import sleep
|
6 |
+
import re
|
7 |
+
|
8 |
+
|
9 |
|
10 |
def format(text):
|
11 |
# Split the text by "|", and get the last element in the list which should be the final query
|
|
|
26 |
|
27 |
return final_query_markdown
|
28 |
|
29 |
+
def generate(input_message: str, db_info="", temperature=0.3, top_p=0.9, top_k=0, repetition_penalty=1.08):
|
|
|
30 |
# Format the user's input message
|
31 |
messages = f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n\nConvert text to sql: {input_message} {db_info}\n\n### Response:\n\n"
|
32 |
|
|
|
64 |
print('Waiting for 10 seconds before retrying...')
|
65 |
sleep(10)
|
66 |
|
67 |
+
# Gradio UI Code
|
68 |
with gr.Blocks(theme='gradio/soft') as demo:
|
69 |
header = gr.HTML("""
|
70 |
<h1 style="text-align: center">SQL Skeleton WizardCoder Demo</h1>
|
71 |
+
<h3 style="text-align: center">π·οΈβ οΈπ§ββοΈ Generate SQL queries from Natural Language π·οΈβ οΈπ§ββοΈ</h3>
|
72 |
""")
|
73 |
|
74 |
output_box = gr.Code(label="Generated SQL", lines=2, interactive=True)
|
|
|
82 |
repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.08, step=0.01)
|
83 |
|
84 |
run_button = gr.Button("Generate SQL", variant="primary")
|
85 |
+
run_button.click(fn=generate, inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty], outputs=output_box, api_name="txt2sql")
|
86 |
|
87 |
with gr.Accordion("Examples", open=True):
|
88 |
examples = gr.Examples([
|
|
|
91 |
["What are the number of concerts that occurred in the stadium with the largest capacity ?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"],
|
92 |
["How many male singers performed in concerts in the year 2023?", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"],
|
93 |
["List the names of all singers who performed in a concert with the theme 'Rock'", "| stadium : stadium_id , location , name , capacity , highest , lowest , average | singer : singer_id , name , country , song_name , song_release_year , age , is_male | concert : concert_id , concert_name , theme , stadium_id , year | singer_in_concert : concert_id , singer_id | concert.stadium_id = stadium.stadium_id | singer_in_concert.singer_id = singer.singer_id | singer_in_concert.concert_id = concert.concert_id |"]
|
94 |
+
], inputs=[input_text, db_info, temperature, top_p, top_k, repetition_penalty], fn=generate, cache_examples=True, outputs=output_box)
|
95 |
|
96 |
quantized_model = "richardr1126/spider-skeleton-wizard-coder-ggml"
|
97 |
merged_model = "richardr1126/spider-skeleton-wizard-coder-merged"
|
|
|
104 |
<p>π Leveraging the <a href='https://huggingface.co/{quantized_model}'><strong>4-bit GGML version</strong></a> of <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a> model.</p>
|
105 |
<p>π How it's made: <a href='https://huggingface.co/{initial_model}'><strong>{initial_model}</strong></a> was finetuned to create <a href='https://huggingface.co/{lora_model}'><strong>{lora_model}</strong></a>, then merged together to create <a href='https://huggingface.co/{merged_model}'><strong>{merged_model}</strong></a>.</p>
|
106 |
<p>π Fine-tuning was performed using QLoRA techniques on the <a href='https://huggingface.co/datasets/{dataset}'><strong>{dataset}</strong></a> dataset. You can view training metrics on the <a href='https://huggingface.co/{lora_model}'><strong>QLoRa adapter HF Repo</strong></a>.</p>
|
107 |
+
|
108 |
""")
|
109 |
|
110 |
+
readme_content = requests.get(f"https://huggingface.co/{merged_model}/raw/main/README.md").text
|
111 |
+
readme_content = re.sub('---.*?---', '', readme_content, flags=re.DOTALL) #Remove YAML front matter
|
112 |
|
113 |
+
with gr.Accordion("π Model Readme", open=True):
|
114 |
+
readme = gr.Markdown(
|
115 |
+
readme_content,
|
116 |
+
)
|
117 |
|
118 |
demo.queue(concurrency_count=1, max_size=10).launch(debug=True)
|