freemt commited on
Commit
d7cdc67
1 Parent(s): 5821b23

Switch to blocks, attempt

Browse files
Files changed (10) hide show
  1. .stignore +101 -0
  2. install-sw.sh +23 -0
  3. install-sw1.sh +25 -0
  4. okteto.yml +44 -0
  5. poetry.toml +3 -0
  6. requirements.txt +8 -9
  7. ubee/__main__.py +7 -9
  8. ubee/seg_text.py +3 -4
  9. ubee/ubee.py +4 -3
  10. ubee/uclas.py +7 -7
.stignore ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ # Byte-compiled / optimized / DLL files
3
+ __pycache__
4
+ *.py[cod]
5
+ *$py.class
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build
13
+ develop-eggs
14
+ dist
15
+ downloads
16
+ eggs
17
+ .eggs
18
+ lib
19
+ lib64
20
+ parts
21
+ sdist
22
+ var
23
+ wheels
24
+ pip-wheel-metadata
25
+ share/python-wheels
26
+ *.egg-info
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Translations
42
+ *.mo
43
+ *.pot
44
+
45
+ # Django stuff:
46
+ *.log
47
+ local_settings.py
48
+ db.sqlite3
49
+
50
+ # Flask stuff:
51
+ instance
52
+ .webassets-cache
53
+
54
+ # Scrapy stuff:
55
+ .scrapy
56
+
57
+ # Sphinx documentation
58
+ docs/_build
59
+
60
+ # PyBuilder
61
+ target
62
+
63
+ # Jupyter Notebook
64
+ .ipynb_checkpoints
65
+
66
+ # IPython
67
+ profile_default
68
+ ipython_config.py
69
+
70
+ # pyenv
71
+ .python-version
72
+
73
+ # celery beat schedule file
74
+ celerybeat-schedule
75
+
76
+ # SageMath parsed files
77
+ *.sage.py
78
+
79
+ # Environments
80
+ .env
81
+ .venv
82
+ env
83
+ venv
84
+ ENV
85
+ env.bak
86
+ venv.bak
87
+
88
+ # Spyder project settings
89
+ .spyderproject
90
+ .spyproject
91
+
92
+ # Rope project settings
93
+ .ropeproject
94
+
95
+ # mypy
96
+ .mypy_cache
97
+ .dmypy.json
98
+ dmypy.json
99
+
100
+ # Pyre type checker
101
+ .pyre
install-sw.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install pipx
2
+ # pipx install poetry
3
+ # pipx ensurepath
4
+ # source ~/.bashrc
5
+
6
+ # curl -sSL https://install.python-poetry.org | python3 -
7
+ # -C- continue -S show error -o output
8
+ curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
9
+ python install-poetry.py
10
+ rm install-poetry.py
11
+ echo export PATH=~/.local/bin:$PATH > ~/.bashrc
12
+ source ~/.bashrc
13
+ # ~/.local/bin/poetry install
14
+
15
+ wget -c https://deb.nodesource.com/setup_12.x
16
+ bash setup_12.x
17
+ apt-get install -y nodejs
18
+ npm install -g npm@latest
19
+ npm install -g nodemon
20
+ rm setup_12.x
21
+
22
+ # apt upate # alerady done in apt-get install -y nodejs
23
+ apt install byobu -y > /dev/null 2>&1
install-sw1.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install pipx
2
+ # pipx install poetry
3
+ # pipx ensurepath
4
+ # source ~/.bashrc
5
+
6
+ # curl -sSL https://install.python-poetry.org | python3 -
7
+ # -C- continue -S show error -o output
8
+ curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
9
+ python install-poetry.py
10
+ rm install-poetry.py
11
+ echo export PATH=~/.local/bin:$PATH > ~/.bashrc
12
+ source ~/.bashrc
13
+ # ~/.local/bin/poetry install
14
+
15
+ wget -c https://deb.nodesource.com/setup_12.x
16
+ bash setup_12.x
17
+ apt-get install -y nodejs
18
+ npm install -g npm@latest
19
+ npm install -g nodemon
20
+ rm setup_12.x
21
+
22
+ # apt update # alerady done in apt-get install -y nodejs
23
+ apt install byobu -y > /dev/null 2>&1
24
+ byobu-enable
25
+ byobu
okteto.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: gradio-cmat
2
+
3
+ # The build section defines how to build the images of
4
+ # your development environment
5
+ # More info: https://www.okteto.com/docs/reference/manifest/#build
6
+ # build:
7
+ # my-service:
8
+ # context: .
9
+
10
+ # The deploy section defines how to deploy your development environment
11
+ # More info: https://www.okteto.com/docs/reference/manifest/#deploy
12
+ # deploy:
13
+ # commands:
14
+ # - name: Deploy
15
+ # command: echo 'Replace this line with the proper 'helm'
16
+
17
+ # or 'kubectl' commands to deploy your development environment'
18
+
19
+ # The dependencies section defines other git repositories to be
20
+ # deployed as part of your development environment
21
+ # More info: https://www.okteto.com/docs/reference/manifest/#dependencies
22
+ # dependencies:
23
+ # - https://github.com/okteto/sample
24
+ # The dev section defines how to activate a development container
25
+ # More info: https://www.okteto.com/docs/reference/manifest/#dev
26
+ dev:
27
+ gradio-cmat:
28
+ # image: okteto/dev:latest
29
+ # image: python:3.8.13-bullseye
30
+ # image: simbachain/poetry-3.8
31
+ image: python:3.8
32
+ command: bash
33
+ workdir: /usr/src/app
34
+ sync:
35
+ - .:/usr/src/app
36
+ environment:
37
+ - name=$USER
38
+ forward:
39
+ - 7861:7861
40
+ - 7860:7860
41
+ - 8501:8501
42
+ reverse:
43
+ - 9000:9000
44
+ autocreate: true
poetry.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [virtualenvs]
2
+ create = true
3
+ in-project = true
requirements.txt CHANGED
@@ -3,10 +3,8 @@ install
3
  transformers
4
  sentencepiece
5
  sklearn
6
- git+https://github.com/ffreemt/fast-langid
7
  git+https://github.com/ffreemt/align-model-pool
8
  sentence-transformers
9
- sentence_splitter
10
  logzero
11
  icecream
12
  alive-progress
@@ -14,10 +12,11 @@ more_itertools
14
  #
15
  openpyxl
16
  # --- seg_text
17
- Morfessor
18
- pyicu
19
- pycld2
20
- tqdm
21
- polyglot
22
- sentence_splitter
23
- pyfunctional
 
 
3
  transformers
4
  sentencepiece
5
  sklearn
 
6
  git+https://github.com/ffreemt/align-model-pool
7
  sentence-transformers
 
8
  logzero
9
  icecream
10
  alive-progress
 
12
  #
13
  openpyxl
14
  # --- seg_text
15
+ # Morfessor
16
+ # pyicu
17
+ # pycld2
18
+ # tqdm
19
+ # polyglot
20
+ # sentence_splitter
21
+ # pyfunctional
22
+ # git+https://github.com/ffreemt/fast-langid
ubee/__main__.py CHANGED
@@ -1,20 +1,18 @@
1
  """Gen ubee main."""
2
- # pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except
3
 
4
- from typing import Tuple, Optional
5
-
6
- from pathlib import Path
7
  import sys
8
- from random import shuffle
9
-
10
  from itertools import zip_longest
 
 
11
  from textwrap import dedent
 
12
 
13
  import gradio as gr
14
-
15
- import pandas as pd
16
- from icecream import install as ic_install, ic
17
  import logzero
 
 
 
18
  from logzero import logger
19
 
20
  # for embeddable python
 
1
  """Gen ubee main."""
2
+ # pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except, line-too-long
3
 
 
 
 
4
  import sys
 
 
5
  from itertools import zip_longest
6
+ from pathlib import Path
7
+ from random import shuffle
8
  from textwrap import dedent
9
+ from typing import Optional, Tuple
10
 
11
  import gradio as gr
 
 
 
12
  import logzero
13
+ import pandas as pd
14
+ from icecream import ic
15
+ from icecream import install as ic_install
16
  from logzero import logger
17
 
18
  # for embeddable python
ubee/seg_text.py CHANGED
@@ -9,15 +9,14 @@ else use polyglot.text.Text
9
  """
10
  # pylint: disable=
11
 
 
12
  from typing import List, Optional, Union
13
 
14
- import re
15
- from tqdm.auto import tqdm
16
  from polyglot.detect.base import logger as polyglot_logger
17
  from polyglot.text import Detector, Text
18
  from sentence_splitter import split_text_into_sentences
19
-
20
- from logzero import logger
21
 
22
  # turn of polyglot.text.Detector warning
23
  polyglot_logger.setLevel("ERROR")
 
9
  """
10
  # pylint: disable=
11
 
12
+ import re
13
  from typing import List, Optional, Union
14
 
15
+ from logzero import logger
 
16
  from polyglot.detect.base import logger as polyglot_logger
17
  from polyglot.text import Detector, Text
18
  from sentence_splitter import split_text_into_sentences
19
+ from tqdm.auto import tqdm
 
20
 
21
  # turn of polyglot.text.Detector warning
22
  polyglot_logger.setLevel("ERROR")
ubee/ubee.py CHANGED
@@ -1,11 +1,12 @@
1
  """Align via ubee,"""
2
  # pylint: disable=
3
- from typing import Iterable, List, Tuple
4
  from itertools import zip_longest
 
5
 
 
6
  from logzero import logger
 
7
  from ubee.uclas import uclas
8
- from icecream import ic
9
 
10
 
11
  def ubee(
@@ -17,7 +18,7 @@ def ubee(
17
 
18
  Args:
19
  sents_zh: list of text, can be any langauge supported by clas-l-user
20
- sents_zh: ditto
21
  Returns:
22
  three tuples of aligned blocked
23
  leftovers (unaligned)
 
1
  """Align via ubee,"""
2
  # pylint: disable=
 
3
  from itertools import zip_longest
4
+ from typing import Iterable, List, Tuple
5
 
6
+ from icecream import ic
7
  from logzero import logger
8
+
9
  from ubee.uclas import uclas
 
10
 
11
 
12
  def ubee(
 
18
 
19
  Args:
20
  sents_zh: list of text, can be any langauge supported by clas-l-user
21
+ sents_en: ditto
22
  Returns:
23
  three tuples of aligned blocked
24
  leftovers (unaligned)
ubee/uclas.py CHANGED
@@ -2,16 +2,16 @@
2
  # pylint: disable=invalid-name
3
 
4
  from typing import List, Tuple, Union
5
- import numpy as np
6
- from sklearn.metrics.pairwise import cosine_similarity
7
- from joblib import Memory
8
-
9
- from model_pool import fetch_check_aux # pylint: disable=import-error
10
- from model_pool.model_s import load_model_s # pylint: disable=import-error
11
- from model_pool.load_model import load_model # pylint: disable=import-error
12
 
13
  import logzero
 
 
14
  from logzero import logger
 
 
 
 
 
15
 
16
  logzero.loglevel(20)
17
 
 
2
  # pylint: disable=invalid-name
3
 
4
  from typing import List, Tuple, Union
 
 
 
 
 
 
 
5
 
6
  import logzero
7
+ import numpy as np
8
+ from joblib import Memory
9
  from logzero import logger
10
+ # set PYTHONPATH=..\align-model-pool # in win10
11
+ from model_pool.fetch_check_aux import fetch_check_aux
12
+ from model_pool.load_model import load_model
13
+ from model_pool.model_s import load_model_s
14
+ from sklearn.metrics.pairwise import cosine_similarity
15
 
16
  logzero.loglevel(20)
17