Spaces:
Sleeping
Sleeping
Yarik
commited on
Commit
•
acee69e
1
Parent(s):
61d74e2
Add application file
Browse files- accentor_lib/-setup.py +48 -0
- accentor_lib/LICENSE.md +21 -0
- accentor_lib/README.md +27 -0
- accentor_lib/requirements/requirements.txt +4 -0
- accentor_lib/requirements/test_requirements.txt +2 -0
- accentor_lib/tests/sentences.txt +0 -0
- accentor_lib/tests/test_accentor.py +47 -0
- accentor_lib/ukrainian_accentor_transformer/__init__.py +195 -0
- accentor_lib/ukrainian_accentor_transformer/sequence_utils.py +34 -0
- accentor_lib/version.py +1 -0
- accentor_lib/version_bump.py +26 -0
accentor_lib/-setup.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from os import path, getenv
|
2 |
+
|
3 |
+
from setuptools import setup, find_packages
|
4 |
+
|
5 |
+
|
6 |
+
def get_requirements(requirements_filename: str):
|
7 |
+
requirements_file = path.join(path.abspath(path.dirname(__file__)), "requirements", requirements_filename)
|
8 |
+
with open(requirements_file, 'r', encoding='utf-8') as r:
|
9 |
+
requirements = r.readlines()
|
10 |
+
requirements = [r.strip() for r in requirements if r.strip() and not r.strip().startswith("#")]
|
11 |
+
|
12 |
+
for i in range(0, len(requirements)):
|
13 |
+
r = requirements[i]
|
14 |
+
if "@" in r:
|
15 |
+
parts = [p.lower() if p.strip().startswith("git+http") else p for p in r.split('@')]
|
16 |
+
r = "@".join(parts)
|
17 |
+
if getenv("GITHUB_TOKEN"):
|
18 |
+
if "github.com" in r:
|
19 |
+
r = r.replace("github.com", f"{getenv('GITHUB_TOKEN')}@github.com")
|
20 |
+
requirements[i] = r
|
21 |
+
return requirements
|
22 |
+
|
23 |
+
|
24 |
+
with open("README.md", "r") as f:
|
25 |
+
long_description = f.read()
|
26 |
+
|
27 |
+
with open("./version.py", "r", encoding="utf-8") as v:
|
28 |
+
for line in v.readlines():
|
29 |
+
if line.startswith("__version__"):
|
30 |
+
if '"' in line:
|
31 |
+
version = line.split('"')[1]
|
32 |
+
else:
|
33 |
+
version = line.split("'")[1]
|
34 |
+
|
35 |
+
setup(
|
36 |
+
name='ukrainian-accentor-transformer',
|
37 |
+
version=version,
|
38 |
+
description='Adds word stress for texts in Ukrainian',
|
39 |
+
long_description=long_description,
|
40 |
+
long_description_content_type='text/markdown',
|
41 |
+
url='https://github.com/Theodotus1243/ukrainian-accentor-transformer',
|
42 |
+
author='Theodotus1243',
|
43 |
+
license='MIT',
|
44 |
+
packages=find_packages(),
|
45 |
+
install_requires=get_requirements("requirements.txt"),
|
46 |
+
zip_safe=True,
|
47 |
+
keywords='ukrainian accent stress nlp transformer linguistics',
|
48 |
+
)
|
accentor_lib/LICENSE.md
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Bohdan Mykhailenko
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
accentor_lib/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ukrainian Accentor Transformer
|
2 |
+
|
3 |
+
This repository contains a model to make accents in Ukrainian words.
|
4 |
+
|
5 |
+
## Installation
|
6 |
+
|
7 |
+
```bash
|
8 |
+
pip install git+https://github.com/Theodotus1243/ukrainian-accentor-transformer.git
|
9 |
+
```
|
10 |
+
|
11 |
+
## Example
|
12 |
+
|
13 |
+
```python
|
14 |
+
>>> from ukrainian_accentor_transformer import Accentor
|
15 |
+
>>> text = "Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району."
|
16 |
+
>>> accentor = Accentor()
|
17 |
+
>>> accentor(text)
|
18 |
+
|
19 |
+
"Кам'яне́ць-Поді́льський - мі́сто в Хмельни́цькій о́бласті Украї́ни, центр Кам'яне́ць-Поді́льської місько́ї об'є́днаної територіа́льної грома́ди і Кам'яне́ць-Поді́льського райо́ну."
|
20 |
+
```
|
21 |
+
|
22 |
+
## Attribution
|
23 |
+
|
24 |
+
Trained on dataset - [News corpus](https://lang.org.ua/en/corpora/#anchor5)
|
25 |
+
by [Dmytro Chaplynskyi](https://github.com/dchaplinsky) from [lang-uk](https://github.com/lang-uk)\
|
26 |
+
Stressed using [ukrainian-word-stress](https://github.com/lang-uk/ukrainian-word-stress)
|
27 |
+
by [Oleksiy Syvokon](https://github.com/asivokon)
|
accentor_lib/requirements/requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ctranslate2
|
2 |
+
sentencepiece
|
3 |
+
# huggingface
|
4 |
+
huggingface-hub
|
accentor_lib/requirements/test_requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
pytest
|
2 |
+
pytest-timeout
|
accentor_lib/tests/sentences.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
accentor_lib/tests/test_accentor.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import unittest
|
4 |
+
|
5 |
+
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
6 |
+
from ukrainian_accentor_transformer import Accentor
|
7 |
+
|
8 |
+
|
9 |
+
class TestAccentor(unittest.TestCase):
|
10 |
+
|
11 |
+
@classmethod
|
12 |
+
def setUpClass(TestAccentor):
|
13 |
+
TestAccentor.accentor = Accentor()
|
14 |
+
|
15 |
+
def test_simple_accent(self):
|
16 |
+
text = "Привіт хлопче, як справи."
|
17 |
+
accented = self.accentor(text)
|
18 |
+
self.assertEqual(text, accented.replace("\u0301", ""))
|
19 |
+
|
20 |
+
def test_batch_accent(self):
|
21 |
+
text1 = "Привіт хлопче, як справи."
|
22 |
+
text2 = "в мене все добре, дякую."
|
23 |
+
accented1, accented2 = self.accentor([text1, text2])
|
24 |
+
self.assertEqual(text1, accented1.replace("\u0301", ""))
|
25 |
+
self.assertEqual(text2, accented2.replace("\u0301", ""))
|
26 |
+
|
27 |
+
def test_long_sentence(self):
|
28 |
+
text = "Адже як би не оцінював галичан один страшно інтелігентний виходець з радянсько єврейських середовищ київського Подолу самі галичани вважають свою культуру і традицію політичну і релігійну побутову й господарську на голову вищою від усього що за Збручем"
|
29 |
+
accented = self.accentor(text)
|
30 |
+
self.assertEqual(text, accented.replace("\u0301", ""))
|
31 |
+
|
32 |
+
def test_long_sentence(self):
|
33 |
+
text = "Веселка також райдуга атмосферне оптичне явище що являє собою одну дві чи декілька спектральних дуг або кіл якщо дивитися з повітря що спостерігаються на тлі хмари якщо вона розташована проти Сонця Червоний колір спектру ми бачимо з зовнішнього боку первинної веселки а фіолетовий із внутрішнього"
|
34 |
+
accented = self.accentor(text)
|
35 |
+
self.assertEqual(text, accented.replace("\u0301", ""))
|
36 |
+
|
37 |
+
def test_corpus(self):
|
38 |
+
with open("tests/sentences.txt") as sentences_file:
|
39 |
+
sentences = sentences_file.readlines()
|
40 |
+
accented = self.accentor(sentences)
|
41 |
+
clean_sentences = self.accentor._clean_accents(accented)
|
42 |
+
for sentence, clean_sentence in zip(sentences, clean_sentences):
|
43 |
+
self.assertEqual(sentence, clean_sentence)
|
44 |
+
|
45 |
+
|
46 |
+
if __name__ == '__main__':
|
47 |
+
unittest.main()
|
accentor_lib/ukrainian_accentor_transformer/__init__.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Union, Tuple
|
2 |
+
|
3 |
+
import ctranslate2
|
4 |
+
import sentencepiece as spm
|
5 |
+
from huggingface_hub import snapshot_download
|
6 |
+
|
7 |
+
from .sequence_utils import diff_fix
|
8 |
+
|
9 |
+
|
10 |
+
class Accentor:
|
11 |
+
_hf_repo = "theodotus/ukrainian-accentor-transformer@v0.1"
|
12 |
+
|
13 |
+
max_len = 30
|
14 |
+
split_tokens = set([".", ",", "!", "?"])
|
15 |
+
|
16 |
+
_init_config = {
|
17 |
+
"inter_threads": 2,
|
18 |
+
"intra_threads": 4
|
19 |
+
}
|
20 |
+
|
21 |
+
_run_config = {
|
22 |
+
"repetition_penalty": 1.2,
|
23 |
+
"max_batch_size": 8
|
24 |
+
}
|
25 |
+
|
26 |
+
def __init__(self, device: str = "cpu"):
|
27 |
+
self._init_model(device=device)
|
28 |
+
|
29 |
+
def __call__(self, sentence: Union[List[str], str],
|
30 |
+
symbol: str = "stress", mode: str = "reduced") -> Union[List[str], str]:
|
31 |
+
"""
|
32 |
+
Add word stress to texts in Ukrainian
|
33 |
+
Args:
|
34 |
+
sentence: sentence to accent
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
accented_sentence
|
38 |
+
|
39 |
+
Examples:
|
40 |
+
Simple usage.
|
41 |
+
|
42 |
+
>>> from ukrainian_accentor_transformer import Accentor
|
43 |
+
>>> accentor = Accentor()
|
44 |
+
>>> accented_sentence = accentor("Привіт хлопче")
|
45 |
+
"""
|
46 |
+
|
47 |
+
if (type(sentence) is str):
|
48 |
+
sentences = [sentence]
|
49 |
+
elif (type(sentence) is list):
|
50 |
+
sentences = sentence
|
51 |
+
|
52 |
+
accented_sentences = self._accent(sentences=sentences, symbol=symbol, mode=mode)
|
53 |
+
|
54 |
+
if (type(sentence) is str):
|
55 |
+
accented_sentence = accented_sentences[0]
|
56 |
+
elif (type(sentence) is list):
|
57 |
+
accented_sentence = accented_sentences
|
58 |
+
|
59 |
+
return accented_sentence
|
60 |
+
|
61 |
+
def _accent(self, sentences: List[str], symbol: str, mode: str) -> List[str]:
|
62 |
+
"""
|
63 |
+
Internal accent function
|
64 |
+
Args:
|
65 |
+
sentences: list of sentences to accent
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
accented_sentences
|
69 |
+
"""
|
70 |
+
|
71 |
+
clean_sentences = self._clean_accents(sentences)
|
72 |
+
|
73 |
+
tokenized_sentences = self.sp.encode(clean_sentences, out_type=str)
|
74 |
+
splitted_sentences = self._split_punctuation(tokenized_sentences)
|
75 |
+
short_sentences = self._split_long(splitted_sentences)
|
76 |
+
|
77 |
+
translation_batch, join_list = self._to_translation_batch(short_sentences)
|
78 |
+
results = self.model.translate_batch(translation_batch, **self._run_config)
|
79 |
+
accented_tokens = [result.hypotheses[0] for result in results]
|
80 |
+
|
81 |
+
join_sentences = self._join_long(accented_tokens, join_list)
|
82 |
+
accented_sentences = self.sp.decode(join_sentences)
|
83 |
+
|
84 |
+
fixed_sentences = self._diff_fix(clean_sentences, accented_sentences)
|
85 |
+
|
86 |
+
return fixed_sentences
|
87 |
+
|
88 |
+
def _clean_accents(self, sentences: List[str]) -> List[str]:
|
89 |
+
clean_sentences = [sentence.replace("\u0301", "") for sentence in sentences]
|
90 |
+
return clean_sentences
|
91 |
+
|
92 |
+
def _split_punctuation(self, tokenized_sentences: List[List[str]]) -> List[List[List[str]]]:
|
93 |
+
splitted_sentences = []
|
94 |
+
for tokenized in tokenized_sentences:
|
95 |
+
splitted = self._split_punctuation_sentence(tokenized)
|
96 |
+
splitted_sentences.append(splitted)
|
97 |
+
return splitted_sentences
|
98 |
+
|
99 |
+
def _split_punctuation_sentence(self, tokenized: List[str]) -> List[List[str]]:
|
100 |
+
splitted = []
|
101 |
+
start_idx = 0
|
102 |
+
for idx, token in enumerate(tokenized, start=1):
|
103 |
+
if token in self.split_tokens:
|
104 |
+
splitted.append(tokenized[start_idx:idx])
|
105 |
+
start_idx = idx
|
106 |
+
else:
|
107 |
+
if (start_idx < len(tokenized)):
|
108 |
+
splitted.append(tokenized[start_idx:])
|
109 |
+
return splitted
|
110 |
+
|
111 |
+
def _split_long(self, splitted_sentences: List[List[List[str]]]) -> List[List[List[str]]]:
|
112 |
+
while True:
|
113 |
+
short_sentences = []
|
114 |
+
for tokenized in splitted_sentences:
|
115 |
+
short = self._split_long_sentence(tokenized)
|
116 |
+
short_sentences.append(short)
|
117 |
+
if splitted_sentences == short_sentences:
|
118 |
+
break
|
119 |
+
else:
|
120 |
+
splitted_sentences = short_sentences
|
121 |
+
return short_sentences
|
122 |
+
|
123 |
+
def _split_long_sentence(self, splitted: List[List[str]]) -> List[List[str]]:
|
124 |
+
short = []
|
125 |
+
for sentence in splitted:
|
126 |
+
if (len(sentence) < self.max_len):
|
127 |
+
short.append(sentence)
|
128 |
+
else:
|
129 |
+
middle_idx = self._find_middle_space(sentence)
|
130 |
+
short.append(sentence[:middle_idx])
|
131 |
+
short.append(sentence[middle_idx:])
|
132 |
+
return short
|
133 |
+
|
134 |
+
@staticmethod
|
135 |
+
def _find_middle_space(sentence: List[str]) -> int:
|
136 |
+
middle_idx = len(sentence) // 2
|
137 |
+
max_shift = len(sentence) // 10
|
138 |
+
for i in range(max_shift):
|
139 |
+
left_idx = middle_idx - i
|
140 |
+
right_idx = middle_idx + i
|
141 |
+
if (sentence[left_idx][0] == "▁"):
|
142 |
+
return left_idx
|
143 |
+
if (sentence[right_idx][0] == "▁"):
|
144 |
+
return right_idx
|
145 |
+
else:
|
146 |
+
return middle_idx
|
147 |
+
|
148 |
+
def _to_translation_batch(self, splitted_sentences: List[List[List[str]]]) -> Tuple[List[List[str]], List[int]]:
|
149 |
+
join_list = [len(sentence) for sentence in splitted_sentences]
|
150 |
+
translation_batch = sum(splitted_sentences, [])
|
151 |
+
return translation_batch, join_list
|
152 |
+
|
153 |
+
def _join_long(self, splitted_sentences: List[List[str]], join_list: List[int]) -> List[List[str]]:
|
154 |
+
join_sentences = []
|
155 |
+
sentence_idx = 0
|
156 |
+
for join_len in join_list:
|
157 |
+
sentence = sum(splitted_sentences[sentence_idx:sentence_idx + join_len], [])
|
158 |
+
join_sentences.append(sentence)
|
159 |
+
sentence_idx += join_len
|
160 |
+
return join_sentences
|
161 |
+
|
162 |
+
def _diff_fix(self, sentences: List[str], accented_sentences: List[str]):
|
163 |
+
fixed_sentences = [diff_fix(input=sentence, output=accented_sentence)
|
164 |
+
for sentence, accented_sentence in zip(sentences, accented_sentences)]
|
165 |
+
return fixed_sentences
|
166 |
+
|
167 |
+
def _init_model(self, device: str) -> None:
|
168 |
+
"""
|
169 |
+
Initialize a model and tokenizer
|
170 |
+
Args:
|
171 |
+
device: device where to run model: "cpu" or "cuda"
|
172 |
+
"""
|
173 |
+
repo_path = self._download_huggingface(self._hf_repo)
|
174 |
+
|
175 |
+
self.model = ctranslate2.Translator(f"{repo_path}/ctranslate2/", device=device, **self._init_config)
|
176 |
+
self.sp = spm.SentencePieceProcessor(model_file=f"{repo_path}/tokenizer.model")
|
177 |
+
|
178 |
+
@staticmethod
|
179 |
+
def _download_huggingface(repo_name: str) -> str:
|
180 |
+
"""
|
181 |
+
Download a file from Huggingface
|
182 |
+
Args:
|
183 |
+
repo_name: name of repository to download
|
184 |
+
|
185 |
+
Returns:
|
186 |
+
repo_path
|
187 |
+
"""
|
188 |
+
|
189 |
+
# get revision
|
190 |
+
repo_name, *suffix = repo_name.split("@")
|
191 |
+
revision = dict(enumerate(suffix)).get(0, None)
|
192 |
+
|
193 |
+
repo_path = snapshot_download(repo_name, revision=revision)
|
194 |
+
|
195 |
+
return repo_path
|
accentor_lib/ukrainian_accentor_transformer/sequence_utils.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from difflib import SequenceMatcher
|
2 |
+
|
3 |
+
|
4 |
+
def accent_flag(code: list, output: str):
|
5 |
+
flag = (
|
6 |
+
(code[2] - code[1] == 1) and
|
7 |
+
(output[code[1]:code[2]] == "\u0301") and
|
8 |
+
(code[0] == 'delete')
|
9 |
+
)
|
10 |
+
return flag
|
11 |
+
|
12 |
+
|
13 |
+
def get_opcodes(input: str, output: str):
|
14 |
+
opcodes = SequenceMatcher(a=output, b=input, autojunk=False).get_opcodes()
|
15 |
+
# Keep accent
|
16 |
+
for idx in range(len(opcodes)):
|
17 |
+
code = opcodes[idx]
|
18 |
+
if accent_flag(code, output):
|
19 |
+
opcodes[idx] = ("equal", *code[1:])
|
20 |
+
return opcodes
|
21 |
+
|
22 |
+
|
23 |
+
def diff_fix(input: str, output: str):
|
24 |
+
opcodes = get_opcodes(input=input, output=output)
|
25 |
+
fixed = ""
|
26 |
+
for code in opcodes:
|
27 |
+
operation, idxs = code[0], code[1:]
|
28 |
+
if operation == "equal":
|
29 |
+
fixed += output[idxs[0]:idxs[1]]
|
30 |
+
elif operation == "delete":
|
31 |
+
pass
|
32 |
+
elif (operation == "insert") or (operation == "replace"):
|
33 |
+
fixed += input[idxs[2]:idxs[3]]
|
34 |
+
return fixed
|
accentor_lib/version.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__version__ = "0.1.0"
|
accentor_lib/version_bump.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fileinput
|
2 |
+
from os.path import join, dirname
|
3 |
+
|
4 |
+
with open(join(dirname(__file__), "version.py"), "r", encoding="utf-8") as v:
|
5 |
+
for line in v.readlines():
|
6 |
+
if line.startswith("__version__"):
|
7 |
+
if '"' in line:
|
8 |
+
version = line.split('"')[1]
|
9 |
+
else:
|
10 |
+
version = line.split("'")[1]
|
11 |
+
|
12 |
+
if "a" not in version:
|
13 |
+
parts = version.split('.')
|
14 |
+
parts[-1] = str(int(parts[-1]) + 1)
|
15 |
+
version = '.'.join(parts)
|
16 |
+
version = f"{version}a0"
|
17 |
+
else:
|
18 |
+
post = version.split("a")[1]
|
19 |
+
new_post = int(post) + 1
|
20 |
+
version = version.replace(f"a{post}", f"a{new_post}")
|
21 |
+
|
22 |
+
for line in fileinput.input(join(dirname(__file__), "version.py"), inplace=True):
|
23 |
+
if line.startswith("__version__"):
|
24 |
+
print(f"__version__ = \"{version}\"")
|
25 |
+
else:
|
26 |
+
print(line.rstrip('\n'))
|