Spaces:
Runtime error
Runtime error
mwitiderrick
commited on
Commit
•
4be4880
1
Parent(s):
34da136
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from deepsparse import Pipeline
|
2 |
+
import time
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
markdownn = '''
|
6 |
+
# Text Classification Pipeline with DeepSparse
|
7 |
+
DeepSparse is sparsity-aware inference runtime offering GPU-class performance on CPUs and APIs to integrate ML into your application. DeepSparse provides sparsified pipelines for computer vision and NLP.
|
8 |
+
The pipelines are similar to Hugging Face pipelines but are faster because they have been pruned and quantized. Here is a sample code for a question-answering pipeline:
|
9 |
+
```
|
10 |
+
from deepsparse import Pipeline
|
11 |
+
pipeline = pipeline.create(task="text-classification", model_path="zoo:nlp/text_classification/bert-base_cased/pytorch/huggingface/mnli/pruned90_quant-none")
|
12 |
+
inference = pipeline(text)
|
13 |
+
print(inference)
|
14 |
+
```
|
15 |
+
'''
|
16 |
+
task = "text-classification"
|
17 |
+
dense_classification_pipeline = Pipeline.create(
|
18 |
+
task=task,
|
19 |
+
model_path="zoo:nlp/text_classification/bert-base_cased/pytorch/huggingface/mnli/base-nonee",
|
20 |
+
)
|
21 |
+
|
22 |
+
sparse_classification_pipeline = Pipeline.create(
|
23 |
+
task=task,
|
24 |
+
model_path="zoo:nlp/text_classification/bert-base_cased/pytorch/huggingface/mnli/pruned90_quant-none",
|
25 |
+
)
|
26 |
+
def run_pipeline(text):
|
27 |
+
dense_start = time.perf_counter()
|
28 |
+
|
29 |
+
dense_output = dense_qa_pipeline([text])
|
30 |
+
dense_result = dict(dense_output)
|
31 |
+
dense_end = time.perf_counter()
|
32 |
+
dense_duration = (dense_end - dense_start) * 1000.0
|
33 |
+
|
34 |
+
sparse_start = time.perf_counter()
|
35 |
+
|
36 |
+
sparse_output = sparse_qa_pipeline([text])
|
37 |
+
sparse_result = dict(sparse_output)
|
38 |
+
sparse_end = time.perf_counter()
|
39 |
+
sparse_duration = (sparse_end - sparse_start) * 1000.0
|
40 |
+
|
41 |
+
return sparse_result, sparse_duration, dense_result, dense_duration
|
42 |
+
|
43 |
+
|
44 |
+
with gr.Blocks() as demo:
|
45 |
+
gr.Markdown(markdownn)
|
46 |
+
|
47 |
+
with gr.Row():
|
48 |
+
text = gr.Text(label="Text")
|
49 |
+
|
50 |
+
with gr.Row():
|
51 |
+
with gr.Column():
|
52 |
+
dense_duration = gr.Number(label="Dense Latency (ms):")
|
53 |
+
dense_answers = gr.Textbox(label="Dense model answer")
|
54 |
+
|
55 |
+
with gr.Column():
|
56 |
+
sparse_duration = gr.Number(label="Sparse Latency (ms):")
|
57 |
+
sparse_answers = gr.Textbox(label="Sparse model answers")
|
58 |
+
|
59 |
+
btn = gr.Button("Submit")
|
60 |
+
btn.click(
|
61 |
+
run_pipeline,
|
62 |
+
inputs=[text],
|
63 |
+
outputs=[sparse_answers,sparse_duration,dense_answers,dense_duration],
|
64 |
+
)
|
65 |
+
|
66 |
+
|
67 |
+
gr.Examples(
|
68 |
+
[
|
69 |
+
["DeepSparse is sparsity-aware inference runtime offering GPU-class performance on CPUs and APIs to integrate ML into your application"],
|
70 |
+
["SparseML is a Library for applying sparsification recipes to neural networks with a few lines of code, enabling faster and smaller models"],
|
71 |
+
["Gradio is an open-source Python package that allows you to quickly create easy-to-use, customizable UI components for your ML model, any API, or even an arbitrary Python function using a few lines of code. You can integrate the Gradio GUI directly into your Jupyter notebook or share it as a link with anyone."],
|
72 |
+
],
|
73 |
+
inputs=[text],
|
74 |
+
)
|
75 |
+
|
76 |
+
|
77 |
+
if __name__ == "__main__":
|
78 |
+
demo.launch()
|