freemt commited on
Commit
a4a35d8
1 Parent(s): 25a8a17

Update altair

Browse files
.gitignore CHANGED
@@ -1,2 +1,6 @@
1
  .venv
2
  **/__pycache__
 
 
 
 
1
  .venv
2
  **/__pycache__
3
+ app.build
4
+ app.dist
5
+ build
6
+ app.exe
app.cmd ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+
2
+ @echo off
3
+ rem This script was created by Nuitka to execute 'app.exe' with Python DLL being found.
4
+ set PATH=c:\python\python37;%PATH%
5
+ set PYTHONHOME=c:\python\python37
6
+ "%~dp0.\app.exe"
app.exe ADDED
Binary file (702 kB). View file
app.py CHANGED
@@ -1,15 +1,25 @@
1
  """Talk to spaces VM via subprocess.check_output."""
 
 
 
 
 
 
2
  # import httpx
3
  import subprocess as sp
4
  from shlex import split
 
5
 
6
  # from textwrap import dedent
7
  from inspect import cleandoc
8
  import gradio as gr
 
9
  from logzero import logger
10
 
11
  from gradiobee.seg_text import seg_text
12
 
 
 
13
 
14
  # def greet(command):
15
  def process(command):
@@ -42,17 +52,54 @@ def process(command):
42
  ).strip()
43
  if not out:
44
  out = "No output, that's all we know."
45
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  # not is_command or not flag: text, do seg_text
48
  _ = "\n\n".join(seg_text(command.strip()))
 
 
 
 
 
 
49
  # _ = seg_text(command.strip())
50
- return cleandoc(
51
  f"""seg_text output (segmented sents):
52
  {_}
53
  """
54
  ).strip()
55
 
 
 
 
 
56
 
57
  iface = gr.Interface(
58
  # fn=greet,
@@ -65,7 +112,13 @@ iface = gr.Interface(
65
  default="python -m site",
66
  label="command or multiline text",
67
  ),
68
- outputs="text",
 
 
 
 
 
 
69
  examples=[
70
  "cat /proc/version",
71
  "free # show free memory",
@@ -76,7 +129,8 @@ iface = gr.Interface(
76
  ],
77
  title="probe the system",
78
  description="Talk to the system via subprocess.check_output ",
 
79
  )
80
 
81
- # iface.launch(share=True, debug=True)
82
- iface.launch(debug=True)
1
  """Talk to spaces VM via subprocess.check_output."""
2
+ # pylint: disable=wrong-import-position
3
+ import sys
4
+ from pathlib import Path
5
+ if "." not in sys.path:
6
+ sys.path.insert(0, ".")
7
+
8
  # import httpx
9
  import subprocess as sp
10
  from shlex import split
11
+ import pandas as pd
12
 
13
  # from textwrap import dedent
14
  from inspect import cleandoc
15
  import gradio as gr
16
+ import logzero
17
  from logzero import logger
18
 
19
  from gradiobee.seg_text import seg_text
20
 
21
+ logzero.loglevel() # default to 10
22
+
23
 
24
  # def greet(command):
25
  def process(command):
52
  ).strip()
53
  if not out:
54
  out = "No output, that's all we know."
55
+ return out, None
56
+
57
+ # quick test altair altair-save tooltip
58
+ # from PIL import Image
59
+ import altair as alt
60
+ from altair_saver import save
61
+ df_ = pd.DataFrame(data={'x': [1, 2], 'y': [3, 4], "cos": [0.1, 0.5]})
62
+ chart_df = alt.Chart(df_).mark_circle(size=60).encode(
63
+ x='x',
64
+ y='y',
65
+ color='cos',
66
+ # tooltip=['x', 'y', 'cos', ]
67
+ )
68
+ # .interactive()
69
+
70
+ # save(chart_df, "chart_df.html")
71
+ # chart_df_html = Path("chart_df.html").read_text("utf")
72
+ # save(chart_df, "chart_df.png")
73
+ # chart_df_png = Path("chart_df.png").read_bytes()
74
+
75
+ # chart_df_png = Image.open("chart_df.png")
76
+ # chart_df_png = "chart_df.png"
77
+
78
+ # scatter_plot.save('simple_scatter_plot_with_altairchart.html')
79
+ # chart_df.save("chart_df.html") # does not work, constains js
80
+ # chart_df_html = Path("chart_df.html").read_text("utf")
81
+ chart_df.save("chart_df.png") #
82
+ chart_df_png = "chart_df.png"
83
 
84
  # not is_command or not flag: text, do seg_text
85
  _ = "\n\n".join(seg_text(command.strip()))
86
+
87
+ logger.debug(_)
88
+ # logger.debug(chart_df_html)
89
+ # print(_)
90
+ # print(chart_df_html)
91
+
92
  # _ = seg_text(command.strip())
93
+ _ = cleandoc(
94
  f"""seg_text output (segmented sents):
95
  {_}
96
  """
97
  ).strip()
98
 
99
+ # return _, chart_df_html
100
+
101
+ return _, chart_df_png
102
+
103
 
104
  iface = gr.Interface(
105
  # fn=greet,
112
  default="python -m site",
113
  label="command or multiline text",
114
  ),
115
+ # outputs="text",
116
+ # outputs=["text",],
117
+ # outputs=["text", "html"],
118
+ outputs=[
119
+ "text",
120
+ gr.outputs.Image("auto"),
121
+ ],
122
  examples=[
123
  "cat /proc/version",
124
  "free # show free memory",
129
  ],
130
  title="probe the system",
131
  description="Talk to the system via subprocess.check_output ",
132
+ layout="vertical",
133
  )
134
 
135
+ iface.launch(share=True, debug=True)
136
+ # iface.launch()
app.spec ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- mode: python ; coding: utf-8 -*-
2
+
3
+
4
+ block_cipher = None
5
+
6
+
7
+ a = Analysis(['app.py'],
8
+ pathex=[],
9
+ binaries=[],
10
+ datas=[],
11
+ hiddenimports=[
12
+ "gradio",
13
+ "requests",
14
+ "Flask-Login",
15
+ "markdown2",
16
+ "analytics-python",
17
+ "Flask",
18
+ "Flask-Cors",
19
+ "flask-cachebuster",
20
+ "paramiko",
21
+ "tornado",
22
+ "matplotlib",
23
+ "pycryptodome",
24
+ "pandas",
25
+ "pillow",
26
+ "pydub",
27
+ "ffmpy",
28
+ ],
29
+ hookspath=[],
30
+ hooksconfig={},
31
+ runtime_hooks=[],
32
+ excludes=[
33
+ "Ipython",
34
+ "wx",
35
+ "wx",
36
+ ],
37
+ win_no_prefer_redirects=False,
38
+ win_private_assemblies=False,
39
+ cipher=block_cipher,
40
+ noarchive=False)
41
+ pyz = PYZ(a.pure, a.zipped_data,
42
+ cipher=block_cipher)
43
+
44
+ exe = EXE(pyz,
45
+ a.scripts,
46
+ [],
47
+ exclude_binaries=True,
48
+ name='app',
49
+ debug=False,
50
+ bootloader_ignore_signals=False,
51
+ strip=False,
52
+ upx=True,
53
+ console=True,
54
+ disable_windowed_traceback=False,
55
+ target_arch=None,
56
+ codesign_identity=None,
57
+ entitlements_file=None )
58
+ coll = COLLECT(exe,
59
+ a.binaries,
60
+ a.zipfiles,
61
+ a.datas,
62
+ strip=False,
63
+ upx=True,
64
+ upx_exclude=[],
65
+ name='app')
chart_df.html ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <style>
5
+ .error {
6
+ color: red;
7
+ }
8
+ </style>
9
+ <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega@5"></script>
10
+ <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega-lite@4.17.0"></script>
11
+ <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega-embed@6"></script>
12
+ </head>
13
+ <body>
14
+ <div id="vis"></div>
15
+ <script>
16
+ (function(vegaEmbed) {
17
+ var spec = {"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, "data": {"name": "data-e58adc7548bdded6fd09c49a28ff71ba"}, "mark": {"type": "circle", "size": 60}, "encoding": {"color": {"field": "cos", "type": "quantitative"}, "x": {"field": "x", "type": "quantitative"}, "y": {"field": "y", "type": "quantitative"}}, "$schema": "https://vega.github.io/schema/vega-lite/v4.17.0.json", "datasets": {"data-e58adc7548bdded6fd09c49a28ff71ba": [{"x": 1, "y": 3, "cos": 0.1}, {"x": 2, "y": 4, "cos": 0.5}]}};
18
+ var embedOpt = {"mode": "vega-lite"};
19
+
20
+ function showError(el, error){
21
+ el.innerHTML = ('<div class="error" style="color:red;">'
22
+ + '<p>JavaScript Error: ' + error.message + '</p>'
23
+ + "<p>This usually means there's a typo in your chart specification. "
24
+ + "See the javascript console for the full traceback.</p>"
25
+ + '</div>');
26
+ throw error;
27
+ }
28
+ const el = document.getElementById('vis');
29
+ vegaEmbed("#vis", spec, embedOpt)
30
+ .catch(error => showError(el, error));
31
+ })(vegaEmbed);
32
+
33
+ </script>
34
+ </body>
35
+ </html>
chart_df.png ADDED
flagged/Output 2/0.png ADDED
flagged/log.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ command or multiline text,Output 1,Output 2,timestamp
2
+ pyth1on -m site,"seg_text output (segmented sents):
3
+ pyth1on -m site",Output 2/0.png,2022-01-08 14:04:38.035704
gradiobee/seg_text.py CHANGED
@@ -7,8 +7,11 @@ else use polyglot.text.Text
7
  !install pyicu pycld2 Morfessor
8
  !pip install polyglot sentence_splitter
9
  """
10
- from typing import List, Optional
11
 
 
 
 
12
  from tqdm.auto import tqdm
13
  from polyglot.detect.base import logger as polyglot_logger
14
  from polyglot.text import Detector, Text
@@ -27,34 +30,39 @@ LANG_S = ["ca", "cs", "da", "nl", "en", "fi", "fr", "de",
27
  "pt", "ro", "ru", "sk", "sl", "es", "sv", "tr"]
28
 
29
 
30
- def seg_text(
31
  text: str,
32
  lang: Optional[str] = None,
33
- qmode: bool = False,
34
  maxlines: int = 1000
35
  ) -> List[str]:
36
  # fmt: on
37
- """
38
- Split text to sentences.
39
 
40
  Use sentence_splitter if supported,
41
  else use polyglot.text.Text.sentences
 
42
 
43
- qmode: skip split_text_into_sentences if True, default False
44
  vectors for all books are based on qmode=False.
45
  qmode=True is for quick test purpose only
46
 
47
- maxlines (default 1000), threhold for turn on tqdm progressbar
48
  set to <1 or a large number to turn it off
49
  """
50
  if lang is None:
51
  try:
52
  lang = Detector(text).language.code
53
  except Exception as exc:
54
- logger.warning("polyglot.text.Detector exc: %s, setting to 'en'", exc)
 
 
 
 
55
  lang = "en"
56
 
57
- if not qmode and lang in LANG_S:
 
58
  _ = []
59
  lines = text.splitlines()
60
  # if maxlines > 1 and len(lines) > maxlines:
@@ -70,4 +78,42 @@ def seg_text(
70
 
71
  # return split_text_into_sentences(text, lang)
72
 
 
 
 
 
73
  return [elm.string for elm in Text(text, lang).sentences]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  !install pyicu pycld2 Morfessor
8
  !pip install polyglot sentence_splitter
9
  """
10
+ # pylint: disable=
11
 
12
+ from typing import List, Optional, Union
13
+
14
+ import re
15
  from tqdm.auto import tqdm
16
  from polyglot.detect.base import logger as polyglot_logger
17
  from polyglot.text import Detector, Text
30
  "pt", "ro", "ru", "sk", "sl", "es", "sv", "tr"]
31
 
32
 
33
+ def _seg_text(
34
  text: str,
35
  lang: Optional[str] = None,
36
+ # qmode: bool = False,
37
  maxlines: int = 1000
38
  ) -> List[str]:
39
  # fmt: on
40
+ """Split text to sentences.
 
41
 
42
  Use sentence_splitter if supported,
43
  else use polyglot.text.Text.sentences
44
+ Blank lines will be removed.
45
 
46
+ qmode: quick mode, skip split_text_into_sentences if True, default False
47
  vectors for all books are based on qmode=False.
48
  qmode=True is for quick test purpose only
49
 
50
+ maxlines (default 1000), threshold for turn on tqdm progressbar
51
  set to <1 or a large number to turn it off
52
  """
53
  if lang is None:
54
  try:
55
  lang = Detector(text).language.code
56
  except Exception as exc:
57
+ logger.info("text[:30]: %s", text[:30])
58
+ logger.warning(
59
+ "polyglot.text.Detector exc: %s, setting to 'en'",
60
+ exc
61
+ )
62
  lang = "en"
63
 
64
+ # if not qmode and lang in LANG_S:
65
+ if lang in LANG_S:
66
  _ = []
67
  lines = text.splitlines()
68
  # if maxlines > 1 and len(lines) > maxlines:
78
 
79
  # return split_text_into_sentences(text, lang)
80
 
81
+ # empty "" text or blank to avoid Exception
82
+ if not text.strip():
83
+ return []
84
+
85
  return [elm.string for elm in Text(text, lang).sentences]
86
+
87
+
88
+ # fmt: off
89
+ def seg_text(
90
+ lst: Union[str, List[str]],
91
+ lang: Optional[str] = None,
92
+ maxlines: int = 1000,
93
+ extra: Optional[str] = None,
94
+ ) -> List[str]:
95
+ # fmt:on
96
+ """Split a list of text.
97
+
98
+ Arguments:
99
+ lst: text or text list
100
+ extra: re.split(rf"{extra}, text) first
101
+ Returns:
102
+ list of splitted text.
103
+ """
104
+ if isinstance(lst, str):
105
+ lst = [lst]
106
+
107
+ if extra:
108
+ # insert \n
109
+ lst = [re.sub(rf"({extra})", r"\1\n", elm) for elm in lst]
110
+
111
+ res = []
112
+ for elm in lst:
113
+ res.extend(_seg_text(
114
+ elm,
115
+ lang=lang,
116
+ maxlines=maxlines,
117
+ ))
118
+
119
+ return res
requirements.in CHANGED
@@ -10,3 +10,5 @@ pycld2
10
  tqdm
11
  polyglot
12
  sentence_splitter
 
 
10
  tqdm
11
  polyglot
12
  sentence_splitter
13
+ altair
14
+ altair_saver
requirements.txt CHANGED
@@ -4,6 +4,10 @@
4
  #
5
  # pip-compile requirements.in
6
  #
 
 
 
 
7
  blis==0.7.5
8
  # via
9
  # spacy
@@ -34,18 +38,26 @@ cymem==2.0.6
34
  # thinc
35
  cytoolz==0.11.2
36
  # via textacy
 
 
37
  fonttools==4.28.5
38
  # via matplotlib
39
  idna==3.3
40
  # via requests
 
 
41
  jellyfish==0.8.9
42
  # via textacy
43
  jinja2==3.0.3
44
- # via spacy
 
 
45
  joblib==1.1.0
46
  # via
47
  # scikit-learn
48
  # textacy
 
 
49
  kiwisolver==1.3.2
50
  # via matplotlib
51
  langcodes==3.3.0
@@ -69,6 +81,7 @@ networkx==2.6.3
69
  # via textacy
70
  numpy==1.21.5
71
  # via
 
72
  # blis
73
  # matplotlib
74
  # pandas
@@ -83,7 +96,9 @@ packaging==21.3
83
  # matplotlib
84
  # spacy
85
  pandas==1.3.5
86
- # via seaborn
 
 
87
  pathy==0.6.1
88
  # via spacy
89
  pillow==8.4.0
@@ -108,6 +123,8 @@ pyparsing==3.0.6
108
  # packaging
109
  pyphen==0.12.0
110
  # via textacy
 
 
111
  python-dateutil==2.8.2
112
  # via
113
  # matplotlib
@@ -156,7 +173,9 @@ thinc==8.0.13
156
  threadpoolctl==3.0.0
157
  # via scikit-learn
158
  toolz==0.11.2
159
- # via cytoolz
 
 
160
  tqdm==4.62.3
161
  # via
162
  # -r requirements.in
@@ -175,7 +194,9 @@ wasabi==0.9.0
175
  # spacy
176
  # spacy-loggers
177
  # thinc
 
 
178
 
179
  # The following packages are considered to be unsafe in a requirements file:
180
  # setuptools
181
- pyicu
4
  #
5
  # pip-compile requirements.in
6
  #
7
+ altair==4.2.0
8
+ # via -r requirements.in
9
+ attrs==21.4.0
10
+ # via jsonschema
11
  blis==0.7.5
12
  # via
13
  # spacy
38
  # thinc
39
  cytoolz==0.11.2
40
  # via textacy
41
+ entrypoints==0.3
42
+ # via altair
43
  fonttools==4.28.5
44
  # via matplotlib
45
  idna==3.3
46
  # via requests
47
+ importlib-resources==5.4.0
48
+ # via jsonschema
49
  jellyfish==0.8.9
50
  # via textacy
51
  jinja2==3.0.3
52
+ # via
53
+ # altair
54
+ # spacy
55
  joblib==1.1.0
56
  # via
57
  # scikit-learn
58
  # textacy
59
+ jsonschema==4.3.3
60
+ # via altair
61
  kiwisolver==1.3.2
62
  # via matplotlib
63
  langcodes==3.3.0
81
  # via textacy
82
  numpy==1.21.5
83
  # via
84
+ # altair
85
  # blis
86
  # matplotlib
87
  # pandas
96
  # matplotlib
97
  # spacy
98
  pandas==1.3.5
99
+ # via
100
+ # altair
101
+ # seaborn
102
  pathy==0.6.1
103
  # via spacy
104
  pillow==8.4.0
123
  # packaging
124
  pyphen==0.12.0
125
  # via textacy
126
+ pyrsistent==0.18.0
127
+ # via jsonschema
128
  python-dateutil==2.8.2
129
  # via
130
  # matplotlib
173
  threadpoolctl==3.0.0
174
  # via scikit-learn
175
  toolz==0.11.2
176
+ # via
177
+ # altair
178
+ # cytoolz
179
  tqdm==4.62.3
180
  # via
181
  # -r requirements.in
194
  # spacy
195
  # spacy-loggers
196
  # thinc
197
+ zipp==3.7.0
198
+ # via importlib-resources
199
 
200
  # The following packages are considered to be unsafe in a requirements file:
201
  # setuptools
202
+ altair_saver
run-nuitka.bat ADDED
@@ -0,0 +1,2 @@
 
 
1
+ REM python -m nuitka app.py
2
+ python -m nuitka --nofollow-imports app.py
run-pyinstaller-spec.bat ADDED
@@ -0,0 +1 @@
 
1
+ pyinstaller -y app.spec
run-python-app_py.bat CHANGED
@@ -1,3 +1,3 @@
1
  REM nodemon -w app.py -x .venv\Scripts\python app.py
2
  REM nodemon -w app.py -x py -3.7 app.py
3
- nodemon -w app.py -x py -3.8 app.py
1
  REM nodemon -w app.py -x .venv\Scripts\python app.py
2
  REM nodemon -w app.py -x py -3.7 app.py
3
+ nodemon -w app.py -x "pyright app.py && py -3.8 app.py"
tests/__init__.py ADDED
File without changes
tests/test_seg_text.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test seg_text."""
2
+ import pytest
3
+ from gradiobee.seg_text import seg_text
4
+
5
+
6
+ def test_seg_text1():
7
+ """Test seg_text 1."""
8
+ text = " text 1\n\n test 2. test 3"
9
+ _ = seg_text(text)
10
+ assert len(_) == 2
11
+
12
+ text = " text 1\n\n test 2. Test 3"
13
+ _ = seg_text(text)
14
+ assert len(_) == 3
15
+
16
+
17
+ @pytest.mark.parametrize(
18
+ "test_input,expected", [
19
+ ("", []),
20
+ (" ", []),
21
+ (" \n ", []),
22
+ ]
23
+ )
24
+ def test_seg_text_blanks(test_input, expected):
25
+ """Test blanks."""
26
+ assert seg_text(test_input) == expected
27
+
28
+
29
+ def test_seg_text_semicolon ():
30
+ """Test semicolon."""
31
+ text = """ “元宇宙”,英文為“Metaverse”。該詞出自1992年;的科幻小說《雪崩》。 """
32
+ assert len(seg_text(text)) == 2
33
+ assert len(seg_text(text, 'zh')) == 2
34
+ assert len(seg_text(text, 'ja')) == 2
35
+ assert len(seg_text(text, 'ko')) == 2
36
+ assert len(seg_text(text, 'en')) == 1
37
+
38
+
39
+ def test_seg_text_semicolon_extra ():
40
+ """Test semicolon."""
41
+ extra = "[;;]"
42
+ text = """ “元宇宙”,英文為“Metaverse”。該詞出自1992年;的科幻小說《雪崩》。 """
43
+ assert len(seg_text(text, extra=extra)) == 2 + 1
44
+ assert len(seg_text(text, 'zh', extra=extra)) == 2 + 1
45
+ assert len(seg_text(text, 'ja', extra=extra)) == 2 + 1
46
+ assert len(seg_text(text, 'ko', extra=extra)) == 2 + 1
47
+ assert len(seg_text(text, 'en', extra=extra)) == 1 + 1