copy picard space over
Browse files- .gitignore +1 -0
- README.md +7 -5
- app.py +26 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.venv
|
README.md
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Picard
|
3 |
+
emoji: 🔥
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: gray
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.8.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
13 |
+
|
14 |
+
<div style=\"white-space: pre-wrap; text-align: center\"> <h4> Training Data </h4> The model has been fine-tuned on the 7000 training examples in the Spider text-to-SQL dataset. The model solves Spider's zero-shot text-to-SQL translation task, and that means that it can generalize to unseen SQL databases. <h4> Training Objective </h4> This model was initialized with T5-3B and fine-tuned with the text-to-text generation objective. Questions are always grounded in a database schema, and the model is trained to predict the SQL query that would be used to answer the question. The input to the model is composed of the user's natural language question, the database identifier, and a list of tables and their columns: \n <small> [question] | [db_id] | [table] : [column] ( [content] , [content] ) , [column] ( ... ) , [...] | [table] : ... | ... </small> \n The model outputs the database identifier and the SQL query that will be executed on the database to answer the user's question: \n <small> [db_id] | [sql] </small> <h4> Performance </h4> Out of the box, this model achieves 71.5 % exact-set match accuracy and 74.4 % execution accuracy on the Spider development set. On the test set, the model achieves 68.0 % exact-set match accuracy and 70.1 % execution accuracy. Using the PICARD constrained decoding method (see the official PICARD implementation), the model's performance can be improved to 75.5 % exact-set match accuracy and 79.3 % execution accuracy on the Spider development set. On the test set and with PICARD, the model achieves 71.9 % exact-set match accuracy and 75.1 % execution accuracy. </div>
|
app.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
|
5 |
+
#retrieved from user generated Access Token on HuggingFace
|
6 |
+
API_TOKEN = os.environ['API_TOKEN']
|
7 |
+
|
8 |
+
API_URL = "https://api-inference.huggingface.co/models/tscholak/cxmefzzi"
|
9 |
+
headers = {"Authorization": f"Bearer {API_TOKEN}"}
|
10 |
+
|
11 |
+
def query(payload):
|
12 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
13 |
+
return response.json()
|
14 |
+
|
15 |
+
iface = gr.Interface(
|
16 |
+
fn=query,
|
17 |
+
inputs="text",
|
18 |
+
examples= [["How many singers do we have? | concert_singer | stadium : stadium_id, location, name, capacity, highest, lowest, average | singer : singer_id, name, country, song_name, song_release_year, age, is_male | concert : concert_id, concert_name, theme, stadium_id, year | singer_in_concert : concert_id, singer_id"]],
|
19 |
+
outputs="text",
|
20 |
+
article="\n <div style=\"white-space: pre-wrap; text-align: center\"> <h4> Training Data </h4> The model has been fine-tuned on the 7000 training examples in the Spider text-to-SQL dataset. The model solves Spider's zero-shot text-to-SQL translation task, and that means that it can generalize to unseen SQL databases. <h4> Training Objective </h4> This model was initialized with T5-3B and fine-tuned with the text-to-text generation objective. Questions are always grounded in a database schema, and the model is trained to predict the SQL query that would be used to answer the question. The input to the model is composed of the user's natural language question, the database identifier, and a list of tables and their columns: \n <small> [question] | [db_id] | [table] : [column] ( [content] , [content] ) , [column] ( ... ) , [...] | [table] : ... | ... </small> \n The model outputs the database identifier and the SQL query that will be executed on the database to answer the user's question: \n <small> [db_id] | [sql] </small> <h4> Performance </h4> Out of the box, this model achieves 71.5 % exact-set match accuracy and 74.4 % execution accuracy on the Spider development set. On the test set, the model achieves 68.0 % exact-set match accuracy and 70.1 % execution accuracy. Using the PICARD constrained decoding method (see the official PICARD implementation), the model's performance can be improved to 75.5 % exact-set match accuracy and 79.3 % execution accuracy on the Spider development set. On the test set and with PICARD, the model achieves 71.9 % exact-set match accuracy and 75.1 % execution accuracy. </div>")
|
21 |
+
|
22 |
+
iface.launch(share=True)
|
23 |
+
|
24 |
+
#output = query({
|
25 |
+
# "inputs": "The answer to the universe is",
|
26 |
+
#})
|