Yapp99 commited on
Commit
258f0c2
·
1 Parent(s): 50c456e

Initial commit

Browse files
Files changed (6) hide show
  1. .gitattributes +0 -35
  2. .gitignore +6 -0
  3. README.md +0 -12
  4. app.py +76 -0
  5. requirements.txt +13 -0
  6. utils.py +97 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ **/*
2
+
3
+ !app.py
4
+ !utils.py
5
+ !.gitignore
6
+ !requirements.txt
README.md DELETED
@@ -1,12 +0,0 @@
1
- ---
2
- title: QNLPDemoApp
3
- emoji: 📊
4
- colorFrom: red
5
- colorTo: pink
6
- sdk: streamlit
7
- sdk_version: 1.39.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utils import QNLP
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import pandas as pd
6
+
7
+ st.write("### QNLP demo")
8
+ lang = st.selectbox("Select Language", ("EN", "ZH"))
9
+ input = st.text_input("Text Input")
10
+
11
+ def plot_data(array:np.ndarray):
12
+ fig, ax = plt.subplots()
13
+
14
+ ax.set_xlabel("Value")
15
+ ax.set_ylabel("% Total")
16
+ value, count = np.unique(array,return_counts=True)
17
+ count = count * 100 / len(array)
18
+ ax.set_xlim([-5,260])
19
+ ax.bar(value, count, edgecolor="blue", align="edge")
20
+
21
+ return fig
22
+
23
+ if input.strip():
24
+ results = QNLP(lang.lower()).process_sentence(input)
25
+ subtabs = []
26
+ if len(results) > 1:
27
+ subtabs = [f"Sentence {n+1}" for n in range(len(results))]
28
+
29
+ tabs = st.tabs(["Overall"] + subtabs)
30
+
31
+ overall_tab = tabs[0]
32
+ detail_tabs = tabs[1:]
33
+
34
+
35
+ with overall_tab:
36
+ st.header("Overall")
37
+ whole_array = np.concatenate([np.sum(res.array, axis=-1) for res in results if res.job.done()])
38
+
39
+ total = len(whole_array)
40
+ value, count = np.unique(whole_array,return_counts=True)
41
+ count = count/total*100
42
+
43
+ df = pd.DataFrame({
44
+ "Bit Value" : value,
45
+ "Percentage" : count,
46
+ })
47
+
48
+ col1, col2 = st.columns([1,2])
49
+ with col1:
50
+ st.dataframe(df, hide_index=True)
51
+ with col2:
52
+ st.pyplot(plot_data(whole_array))
53
+
54
+ for idx, (tab, result) in enumerate(zip(detail_tabs, results)):
55
+ with tab:
56
+ st.header(f"Sentence {idx}")
57
+ st.write(' '.join(result.tokens))
58
+
59
+ if result.valid:
60
+ value, count = np.unique(result.array,return_counts=True)
61
+ count = count/total*100
62
+
63
+ df = pd.DataFrame({
64
+ "Bit Value" : value,
65
+ "Percentage" : count,
66
+ })
67
+
68
+ col1, col2 = st.columns([1,2])
69
+ with col1:
70
+ st.dataframe(df, hide_index=True)
71
+ with col2:
72
+ st.pyplot(plot_data(result.array))
73
+ else:
74
+ st.write(f"Sentence Discarded due to lack of qubits to process")
75
+ else:
76
+ st.write("Choose a langauge and input some sentences to start !")
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ipython
2
+ joblib
3
+ lambeq
4
+ matplotlib
5
+ numpy
6
+ pandas
7
+ pytket
8
+ pytket_qiskit
9
+ qiskit
10
+ qiskit_aer
11
+ spacy
12
+ streamlit
13
+ tqdm
utils.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from qiskit.primitives.primitive_job import PrimitiveJob
3
+ from qiskit import QuantumCircuit
4
+ import re
5
+ import spacy
6
+ import spacy.lang.en
7
+ import spacy.cli
8
+ import logging
9
+ from lambeq import Rewriter, BobcatParser
10
+ from tqdm import tqdm
11
+ from lambeq import AtomicType, IQPAnsatz
12
+ from pytket.extensions.qiskit import tk_to_qiskit
13
+ from qiskit_aer.primitives import SamplerV2
14
+ from dataclasses import dataclass
15
+ from joblib import Parallel, delayed
16
+
17
+ LANGUAGE_2_SPACY = dict(
18
+ en = 'en_core_web_sm',
19
+ zh = 'zh_core_web_sm',
20
+ )
21
+
22
+ MAPPING = {
23
+ AtomicType.PREPOSITIONAL_PHRASE: 0,
24
+ AtomicType.NOUN: 0,
25
+ AtomicType.SENTENCE: 1,
26
+ AtomicType.CONJUNCTION: 0,
27
+ AtomicType.PUNCTUATION: 0,
28
+ }
29
+
30
+ ANSATZ = IQPAnsatz(MAPPING, n_layers=1, discard=True)
31
+ LOGGER = logging.getLogger(__name__)
32
+
33
+ @dataclass
34
+ class QNLP_OUTPUT():
35
+ tokens: str
36
+ circuit: QuantumCircuit
37
+ job: PrimitiveJob
38
+
39
+ @property
40
+ def array(self):
41
+ if not self.job.done():
42
+ return np.array([])
43
+ return self.job.result()[0].data.meas.array
44
+
45
+ @property
46
+ def valid(self):
47
+ return self.job.done()
48
+
49
+ class QNLP():
50
+ def __init__(self, langauge = "en") -> None:
51
+ model = LANGUAGE_2_SPACY.get(langauge)
52
+
53
+ try:
54
+ self.nlp = spacy.load(model)
55
+ except OSError:
56
+ LOGGER.warning('Downloading SpaCy tokeniser. '
57
+ 'This action only has to happen once.')
58
+ spacy.cli.download(model)
59
+ self.nlp = spacy.load(model)
60
+
61
+ self.rewriter = Rewriter([
62
+ 'auxiliary',
63
+ 'connector',
64
+ 'determiner',
65
+ 'postadverb',
66
+ 'preadverb',
67
+ 'prepositional_phrase',
68
+ ])
69
+ self.parser = BobcatParser()
70
+
71
+ def process_sentence(self,
72
+ input_sentence: str,
73
+ shots = 1024
74
+ ) -> list[QNLP_OUTPUT]:
75
+ input_sentence = re.sub(r'\n+', '', input_sentence)
76
+ docs = self.nlp(input_sentence)
77
+ sentences = list([str(s).strip() for s in chunks] for chunks in docs.sents)
78
+
79
+ def sentence2diagrams(sent: list[str], pb: tqdm = None):
80
+ diagram = self.parser.sentence2diagram(sent, tokenised=True)
81
+ diagram = self.rewriter(diagram).normal_form()
82
+
83
+ if pb: pb.update(1)
84
+ return diagram
85
+
86
+ pb = tqdm(total = len(sentences), desc="Splitting sentences")
87
+ diagrams = Parallel(4, require='sharedmem')(delayed(sentence2diagrams)(s, pb) for s in sentences)
88
+
89
+ qiskit_circuits = list(tk_to_qiskit(ANSATZ(diagram).to_tk()) for diagram in diagrams)
90
+ for qc in qiskit_circuits : qc.measure_all()
91
+
92
+ sampler = SamplerV2()
93
+ jobs = [sampler.run([(qc, [1] * qc.num_parameters)], shots=shots)
94
+ for qc in qiskit_circuits]
95
+
96
+ return [QNLP_OUTPUT(*params) for params in zip(sentences, qiskit_circuits, jobs)]
97
+