Spaces:
Sleeping
Sleeping
Initial commit
Browse files- .gitattributes +0 -35
- .gitignore +6 -0
- README.md +0 -12
- app.py +76 -0
- requirements.txt +13 -0
- utils.py +97 -0
.gitattributes
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
**/*
|
2 |
+
|
3 |
+
!app.py
|
4 |
+
!utils.py
|
5 |
+
!.gitignore
|
6 |
+
!requirements.txt
|
README.md
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: QNLPDemoApp
|
3 |
-
emoji: 📊
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: pink
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.39.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from utils import QNLP
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
st.write("### QNLP demo")
|
8 |
+
lang = st.selectbox("Select Language", ("EN", "ZH"))
|
9 |
+
input = st.text_input("Text Input")
|
10 |
+
|
11 |
+
def plot_data(array:np.ndarray):
|
12 |
+
fig, ax = plt.subplots()
|
13 |
+
|
14 |
+
ax.set_xlabel("Value")
|
15 |
+
ax.set_ylabel("% Total")
|
16 |
+
value, count = np.unique(array,return_counts=True)
|
17 |
+
count = count * 100 / len(array)
|
18 |
+
ax.set_xlim([-5,260])
|
19 |
+
ax.bar(value, count, edgecolor="blue", align="edge")
|
20 |
+
|
21 |
+
return fig
|
22 |
+
|
23 |
+
if input.strip():
|
24 |
+
results = QNLP(lang.lower()).process_sentence(input)
|
25 |
+
subtabs = []
|
26 |
+
if len(results) > 1:
|
27 |
+
subtabs = [f"Sentence {n+1}" for n in range(len(results))]
|
28 |
+
|
29 |
+
tabs = st.tabs(["Overall"] + subtabs)
|
30 |
+
|
31 |
+
overall_tab = tabs[0]
|
32 |
+
detail_tabs = tabs[1:]
|
33 |
+
|
34 |
+
|
35 |
+
with overall_tab:
|
36 |
+
st.header("Overall")
|
37 |
+
whole_array = np.concatenate([np.sum(res.array, axis=-1) for res in results if res.job.done()])
|
38 |
+
|
39 |
+
total = len(whole_array)
|
40 |
+
value, count = np.unique(whole_array,return_counts=True)
|
41 |
+
count = count/total*100
|
42 |
+
|
43 |
+
df = pd.DataFrame({
|
44 |
+
"Bit Value" : value,
|
45 |
+
"Percentage" : count,
|
46 |
+
})
|
47 |
+
|
48 |
+
col1, col2 = st.columns([1,2])
|
49 |
+
with col1:
|
50 |
+
st.dataframe(df, hide_index=True)
|
51 |
+
with col2:
|
52 |
+
st.pyplot(plot_data(whole_array))
|
53 |
+
|
54 |
+
for idx, (tab, result) in enumerate(zip(detail_tabs, results)):
|
55 |
+
with tab:
|
56 |
+
st.header(f"Sentence {idx}")
|
57 |
+
st.write(' '.join(result.tokens))
|
58 |
+
|
59 |
+
if result.valid:
|
60 |
+
value, count = np.unique(result.array,return_counts=True)
|
61 |
+
count = count/total*100
|
62 |
+
|
63 |
+
df = pd.DataFrame({
|
64 |
+
"Bit Value" : value,
|
65 |
+
"Percentage" : count,
|
66 |
+
})
|
67 |
+
|
68 |
+
col1, col2 = st.columns([1,2])
|
69 |
+
with col1:
|
70 |
+
st.dataframe(df, hide_index=True)
|
71 |
+
with col2:
|
72 |
+
st.pyplot(plot_data(result.array))
|
73 |
+
else:
|
74 |
+
st.write(f"Sentence Discarded due to lack of qubits to process")
|
75 |
+
else:
|
76 |
+
st.write("Choose a langauge and input some sentences to start !")
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ipython
|
2 |
+
joblib
|
3 |
+
lambeq
|
4 |
+
matplotlib
|
5 |
+
numpy
|
6 |
+
pandas
|
7 |
+
pytket
|
8 |
+
pytket_qiskit
|
9 |
+
qiskit
|
10 |
+
qiskit_aer
|
11 |
+
spacy
|
12 |
+
streamlit
|
13 |
+
tqdm
|
utils.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from qiskit.primitives.primitive_job import PrimitiveJob
|
3 |
+
from qiskit import QuantumCircuit
|
4 |
+
import re
|
5 |
+
import spacy
|
6 |
+
import spacy.lang.en
|
7 |
+
import spacy.cli
|
8 |
+
import logging
|
9 |
+
from lambeq import Rewriter, BobcatParser
|
10 |
+
from tqdm import tqdm
|
11 |
+
from lambeq import AtomicType, IQPAnsatz
|
12 |
+
from pytket.extensions.qiskit import tk_to_qiskit
|
13 |
+
from qiskit_aer.primitives import SamplerV2
|
14 |
+
from dataclasses import dataclass
|
15 |
+
from joblib import Parallel, delayed
|
16 |
+
|
17 |
+
LANGUAGE_2_SPACY = dict(
|
18 |
+
en = 'en_core_web_sm',
|
19 |
+
zh = 'zh_core_web_sm',
|
20 |
+
)
|
21 |
+
|
22 |
+
MAPPING = {
|
23 |
+
AtomicType.PREPOSITIONAL_PHRASE: 0,
|
24 |
+
AtomicType.NOUN: 0,
|
25 |
+
AtomicType.SENTENCE: 1,
|
26 |
+
AtomicType.CONJUNCTION: 0,
|
27 |
+
AtomicType.PUNCTUATION: 0,
|
28 |
+
}
|
29 |
+
|
30 |
+
ANSATZ = IQPAnsatz(MAPPING, n_layers=1, discard=True)
|
31 |
+
LOGGER = logging.getLogger(__name__)
|
32 |
+
|
33 |
+
@dataclass
|
34 |
+
class QNLP_OUTPUT():
|
35 |
+
tokens: str
|
36 |
+
circuit: QuantumCircuit
|
37 |
+
job: PrimitiveJob
|
38 |
+
|
39 |
+
@property
|
40 |
+
def array(self):
|
41 |
+
if not self.job.done():
|
42 |
+
return np.array([])
|
43 |
+
return self.job.result()[0].data.meas.array
|
44 |
+
|
45 |
+
@property
|
46 |
+
def valid(self):
|
47 |
+
return self.job.done()
|
48 |
+
|
49 |
+
class QNLP():
|
50 |
+
def __init__(self, langauge = "en") -> None:
|
51 |
+
model = LANGUAGE_2_SPACY.get(langauge)
|
52 |
+
|
53 |
+
try:
|
54 |
+
self.nlp = spacy.load(model)
|
55 |
+
except OSError:
|
56 |
+
LOGGER.warning('Downloading SpaCy tokeniser. '
|
57 |
+
'This action only has to happen once.')
|
58 |
+
spacy.cli.download(model)
|
59 |
+
self.nlp = spacy.load(model)
|
60 |
+
|
61 |
+
self.rewriter = Rewriter([
|
62 |
+
'auxiliary',
|
63 |
+
'connector',
|
64 |
+
'determiner',
|
65 |
+
'postadverb',
|
66 |
+
'preadverb',
|
67 |
+
'prepositional_phrase',
|
68 |
+
])
|
69 |
+
self.parser = BobcatParser()
|
70 |
+
|
71 |
+
def process_sentence(self,
|
72 |
+
input_sentence: str,
|
73 |
+
shots = 1024
|
74 |
+
) -> list[QNLP_OUTPUT]:
|
75 |
+
input_sentence = re.sub(r'\n+', '', input_sentence)
|
76 |
+
docs = self.nlp(input_sentence)
|
77 |
+
sentences = list([str(s).strip() for s in chunks] for chunks in docs.sents)
|
78 |
+
|
79 |
+
def sentence2diagrams(sent: list[str], pb: tqdm = None):
|
80 |
+
diagram = self.parser.sentence2diagram(sent, tokenised=True)
|
81 |
+
diagram = self.rewriter(diagram).normal_form()
|
82 |
+
|
83 |
+
if pb: pb.update(1)
|
84 |
+
return diagram
|
85 |
+
|
86 |
+
pb = tqdm(total = len(sentences), desc="Splitting sentences")
|
87 |
+
diagrams = Parallel(4, require='sharedmem')(delayed(sentence2diagrams)(s, pb) for s in sentences)
|
88 |
+
|
89 |
+
qiskit_circuits = list(tk_to_qiskit(ANSATZ(diagram).to_tk()) for diagram in diagrams)
|
90 |
+
for qc in qiskit_circuits : qc.measure_all()
|
91 |
+
|
92 |
+
sampler = SamplerV2()
|
93 |
+
jobs = [sampler.run([(qc, [1] * qc.num_parameters)], shots=shots)
|
94 |
+
for qc in qiskit_circuits]
|
95 |
+
|
96 |
+
return [QNLP_OUTPUT(*params) for params in zip(sentences, qiskit_circuits, jobs)]
|
97 |
+
|