cclauss commited on
Commit
2c047e3
1 Parent(s): e7d4bd4

Fix flake8 issues

Browse files
scripts/calculate_coverages.py CHANGED
@@ -11,6 +11,11 @@ sys.path.insert(0, dirname(dirname(abspath(__file__))))
11
 
12
  from torchmoji.sentence_tokenizer import SentenceTokenizer, coverage
13
 
 
 
 
 
 
14
  IS_PYTHON2 = int(sys.version[0]) == 2
15
 
16
  OUTPUT_PATH = 'coverage.csv'
 
11
 
12
  from torchmoji.sentence_tokenizer import SentenceTokenizer, coverage
13
 
14
+ try:
15
+ unicode # Python 2
16
+ except NameError:
17
+ unicode = str # Python 3
18
+
19
  IS_PYTHON2 = int(sys.version[0]) == 2
20
 
21
  OUTPUT_PATH = 'coverage.csv'
scripts/convert_all_datasets.py CHANGED
@@ -14,6 +14,11 @@ from torchmoji.create_vocab import VocabBuilder
14
  from torchmoji.sentence_tokenizer import SentenceTokenizer, extend_vocab, coverage
15
  from torchmoji.tokenizer import tokenize
16
 
 
 
 
 
 
17
  IS_PYTHON2 = int(sys.version[0]) == 2
18
 
19
  DATASETS = [
 
14
  from torchmoji.sentence_tokenizer import SentenceTokenizer, extend_vocab, coverage
15
  from torchmoji.tokenizer import tokenize
16
 
17
+ try:
18
+ unicode # Python 2
19
+ except NameError:
20
+ unicode = str # Python 3
21
+
22
  IS_PYTHON2 = int(sys.version[0]) == 2
23
 
24
  DATASETS = [
torchmoji/filter_utils.py CHANGED
@@ -11,8 +11,11 @@ import numpy as np
11
  from torchmoji.tokenizer import RE_MENTION, RE_URL
12
  from torchmoji.global_variables import SPECIAL_TOKENS
13
 
14
- IS_PYTHON2 = int(sys.version[0]) == 2
15
- chr_ = unichr if IS_PYTHON2 else chr
 
 
 
16
 
17
  AtMentionRegex = re.compile(RE_MENTION)
18
  urlRegex = re.compile(RE_URL)
@@ -36,8 +39,8 @@ VARIATION_SELECTORS = [ '\ufe00',
36
  '\ufe0f']
37
 
38
  # from https://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
39
- ALL_CHARS = (chr_(i) for i in range(sys.maxunicode))
40
- CONTROL_CHARS = ''.join(map(chr_, list(range(0,32)) + list(range(127,160))))
41
  CONTROL_CHAR_REGEX = re.compile('[%s]' % re.escape(CONTROL_CHARS))
42
 
43
  def is_special_token(word):
 
11
  from torchmoji.tokenizer import RE_MENTION, RE_URL
12
  from torchmoji.global_variables import SPECIAL_TOKENS
13
 
14
+ try:
15
+ unichr # Python 2
16
+ except NameError:
17
+ unichr = chr # Python 3
18
+
19
 
20
  AtMentionRegex = re.compile(RE_MENTION)
21
  urlRegex = re.compile(RE_URL)
 
39
  '\ufe0f']
40
 
41
  # from https://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
42
+ ALL_CHARS = (unichr(i) for i in range(sys.maxunicode))
43
+ CONTROL_CHARS = ''.join(map(unichr, list(range(0,32)) + list(range(127,160))))
44
  CONTROL_CHAR_REGEX = re.compile('[%s]' % re.escape(CONTROL_CHARS))
45
 
46
  def is_special_token(word):
torchmoji/finetuning.py CHANGED
@@ -3,7 +3,6 @@
3
  """
4
  from __future__ import print_function
5
 
6
- import sys
7
  import uuid
8
  from time import sleep
9
  from io import open
@@ -28,8 +27,13 @@ from torchmoji.global_variables import (FINETUNING_METHODS,
28
  from torchmoji.tokenizer import tokenize
29
  from torchmoji.sentence_tokenizer import SentenceTokenizer
30
 
31
- IS_PYTHON2 = int(sys.version[0]) == 2
32
- unicode_ = unicode if IS_PYTHON2 else str
 
 
 
 
 
33
 
34
  def load_benchmark(path, vocab, extend_with=0):
35
  """ Loads the given benchmark dataset.
@@ -66,7 +70,7 @@ def load_benchmark(path, vocab, extend_with=0):
66
 
67
  # Decode data
68
  try:
69
- texts = [unicode_(x) for x in data['texts']]
70
  except UnicodeDecodeError:
71
  texts = [x.decode('utf-8') for x in data['texts']]
72
 
 
3
  """
4
  from __future__ import print_function
5
 
 
6
  import uuid
7
  from time import sleep
8
  from io import open
 
27
  from torchmoji.tokenizer import tokenize
28
  from torchmoji.sentence_tokenizer import SentenceTokenizer
29
 
30
+ try:
31
+ unicode
32
+ IS_PYTHON2 = True
33
+ except NameError:
34
+ unicode = str
35
+ IS_PYTHON2 = False
36
+
37
 
38
  def load_benchmark(path, vocab, extend_with=0):
39
  """ Loads the given benchmark dataset.
 
70
 
71
  # Decode data
72
  try:
73
+ texts = [unicode(x) for x in data['texts']]
74
  except UnicodeDecodeError:
75
  texts = [x.decode('utf-8') for x in data['texts']]
76
 
torchmoji/word_generator.py CHANGED
@@ -7,7 +7,6 @@
7
 
8
  from __future__ import division, print_function, unicode_literals
9
 
10
- import sys
11
  import re
12
  import unicodedata
13
  import numpy as np
@@ -26,8 +25,10 @@ from torchmoji.filter_utils import (convert_linebreaks,
26
  remove_variation_selectors,
27
  separate_emojis_and_text)
28
 
29
- IS_PYTHON2 = int(sys.version[0]) == 2
30
- unicode_ = unicode if IS_PYTHON2 else str
 
 
31
 
32
  # Only catch retweets in the beginning of the tweet as those are the
33
  # automatically added ones.
@@ -68,7 +69,7 @@ class WordGenerator():
68
  that is not allowed.
69
  """
70
 
71
- if not isinstance(sentence, unicode_):
72
  raise ValueError("All sentences should be Unicode-encoded!")
73
  sentence = sentence.strip().lower()
74
 
 
7
 
8
  from __future__ import division, print_function, unicode_literals
9
 
 
10
  import re
11
  import unicodedata
12
  import numpy as np
 
25
  remove_variation_selectors,
26
  separate_emojis_and_text)
27
 
28
+ try:
29
+ unicode # Python 2
30
+ except NameError:
31
+ unicode = str # Python 3
32
 
33
  # Only catch retweets in the beginning of the tweet as those are the
34
  # automatically added ones.
 
69
  that is not allowed.
70
  """
71
 
72
+ if not isinstance(sentence, unicode):
73
  raise ValueError("All sentences should be Unicode-encoded!")
74
  sentence = sentence.strip().lower()
75