mwitiderrick commited on
Commit
4be4880
1 Parent(s): 34da136

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from deepsparse import Pipeline
2
+ import time
3
+ import gradio as gr
4
+
5
+ markdownn = '''
6
+ # Text Classification Pipeline with DeepSparse
7
+ DeepSparse is sparsity-aware inference runtime offering GPU-class performance on CPUs and APIs to integrate ML into your application. DeepSparse provides sparsified pipelines for computer vision and NLP.
8
+ The pipelines are similar to Hugging Face pipelines but are faster because they have been pruned and quantized. Here is a sample code for a question-answering pipeline:
9
+ ```
10
+ from deepsparse import Pipeline
11
+ pipeline = pipeline.create(task="text-classification", model_path="zoo:nlp/text_classification/bert-base_cased/pytorch/huggingface/mnli/pruned90_quant-none")
12
+ inference = pipeline(text)
13
+ print(inference)
14
+ ```
15
+ '''
16
+ task = "text-classification"
17
+ dense_classification_pipeline = Pipeline.create(
18
+ task=task,
19
+ model_path="zoo:nlp/text_classification/bert-base_cased/pytorch/huggingface/mnli/base-nonee",
20
+ )
21
+
22
+ sparse_classification_pipeline = Pipeline.create(
23
+ task=task,
24
+ model_path="zoo:nlp/text_classification/bert-base_cased/pytorch/huggingface/mnli/pruned90_quant-none",
25
+ )
26
+ def run_pipeline(text):
27
+ dense_start = time.perf_counter()
28
+
29
+ dense_output = dense_qa_pipeline([text])
30
+ dense_result = dict(dense_output)
31
+ dense_end = time.perf_counter()
32
+ dense_duration = (dense_end - dense_start) * 1000.0
33
+
34
+ sparse_start = time.perf_counter()
35
+
36
+ sparse_output = sparse_qa_pipeline([text])
37
+ sparse_result = dict(sparse_output)
38
+ sparse_end = time.perf_counter()
39
+ sparse_duration = (sparse_end - sparse_start) * 1000.0
40
+
41
+ return sparse_result, sparse_duration, dense_result, dense_duration
42
+
43
+
44
+ with gr.Blocks() as demo:
45
+ gr.Markdown(markdownn)
46
+
47
+ with gr.Row():
48
+ text = gr.Text(label="Text")
49
+
50
+ with gr.Row():
51
+ with gr.Column():
52
+ dense_duration = gr.Number(label="Dense Latency (ms):")
53
+ dense_answers = gr.Textbox(label="Dense model answer")
54
+
55
+ with gr.Column():
56
+ sparse_duration = gr.Number(label="Sparse Latency (ms):")
57
+ sparse_answers = gr.Textbox(label="Sparse model answers")
58
+
59
+ btn = gr.Button("Submit")
60
+ btn.click(
61
+ run_pipeline,
62
+ inputs=[text],
63
+ outputs=[sparse_answers,sparse_duration,dense_answers,dense_duration],
64
+ )
65
+
66
+
67
+ gr.Examples(
68
+ [
69
+ ["DeepSparse is sparsity-aware inference runtime offering GPU-class performance on CPUs and APIs to integrate ML into your application"],
70
+ ["SparseML is a Library for applying sparsification recipes to neural networks with a few lines of code, enabling faster and smaller models"],
71
+ ["Gradio is an open-source Python package that allows you to quickly create easy-to-use, customizable UI components for your ML model, any API, or even an arbitrary Python function using a few lines of code. You can integrate the Gradio GUI directly into your Jupyter notebook or share it as a link with anyone."],
72
+ ],
73
+ inputs=[text],
74
+ )
75
+
76
+
77
+ if __name__ == "__main__":
78
+ demo.launch()