Joshua Lochner commited on
Commit
67d0193
1 Parent(s): c0313f5

Add safe_print method

Browse files
Files changed (1) hide show
  1. src/utils.py +119 -1
src/utils.py CHANGED
@@ -1,5 +1,7 @@
1
  import re
2
-
 
 
3
 
4
  def re_findall(pattern, string):
5
  return [m.groupdict() for m in re.finditer(pattern, string)]
@@ -15,3 +17,119 @@ def jaccard(x1, x2, y1, y2):
15
  def regex_search(text, pattern, group=1, default=None):
16
  match = re.search(pattern, text)
17
  return match.group(group) if match else default
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
+ import sys
3
+ import locale
4
+ import io
5
 
6
  def re_findall(pattern, string):
7
  return [m.groupdict() for m in re.finditer(pattern, string)]
 
17
  def regex_search(text, pattern, group=1, default=None):
18
  match = re.search(pattern, text)
19
  return match.group(group) if match else default
20
+
21
+
22
+ def _windows_write_string(s, out, skip_errors=True):
23
+ """ Returns True if the string was written using special methods,
24
+ False if it has yet to be written out."""
25
+ # Adapted from http://stackoverflow.com/a/3259271/35070
26
+
27
+ import ctypes
28
+ import ctypes.wintypes
29
+
30
+ WIN_OUTPUT_IDS = {
31
+ 1: -11,
32
+ 2: -12,
33
+ }
34
+
35
+ try:
36
+ fileno = out.fileno()
37
+ except AttributeError:
38
+ # If the output stream doesn't have a fileno, it's virtual
39
+ return False
40
+ except io.UnsupportedOperation:
41
+ # Some strange Windows pseudo files?
42
+ return False
43
+ if fileno not in WIN_OUTPUT_IDS:
44
+ return False
45
+
46
+ GetStdHandle = ctypes.WINFUNCTYPE(
47
+ ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
48
+ ('GetStdHandle', ctypes.windll.kernel32))
49
+ h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
50
+
51
+ WriteConsoleW = ctypes.WINFUNCTYPE(
52
+ ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
53
+ ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
54
+ ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
55
+ written = ctypes.wintypes.DWORD(0)
56
+
57
+ GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(
58
+ ('GetFileType', ctypes.windll.kernel32))
59
+ FILE_TYPE_CHAR = 0x0002
60
+ FILE_TYPE_REMOTE = 0x8000
61
+ GetConsoleMode = ctypes.WINFUNCTYPE(
62
+ ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
63
+ ctypes.POINTER(ctypes.wintypes.DWORD))(
64
+ ('GetConsoleMode', ctypes.windll.kernel32))
65
+ INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
66
+
67
+ def not_a_console(handle):
68
+ if handle == INVALID_HANDLE_VALUE or handle is None:
69
+ return True
70
+ return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
71
+
72
+ if not_a_console(h):
73
+ return False
74
+
75
+ def next_nonbmp_pos(s):
76
+ try:
77
+ return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
78
+ except StopIteration:
79
+ return len(s)
80
+
81
+ while s:
82
+ count = min(next_nonbmp_pos(s), 1024)
83
+
84
+ ret = WriteConsoleW(
85
+ h, s, count if count else 2, ctypes.byref(written), None)
86
+ if ret == 0:
87
+ if skip_errors:
88
+ continue
89
+ else:
90
+ raise OSError('Failed to write string')
91
+ if not count: # We just wrote a non-BMP character
92
+ assert written.value == 2
93
+ s = s[1:]
94
+ else:
95
+ assert written.value > 0
96
+ s = s[written.value:]
97
+ return True
98
+
99
+ def preferredencoding():
100
+ """Get preferred encoding.
101
+ Returns the best encoding scheme for the system, based on
102
+ locale.getpreferredencoding() and some further tweaks.
103
+ """
104
+ try:
105
+ pref = locale.getpreferredencoding()
106
+ 'TEST'.encode(pref)
107
+ except Exception:
108
+ pref = 'utf-8'
109
+
110
+ return pref
111
+
112
+ def safe_print(*objects, sep=' ', end='\n', out=None, encoding=None, flush=False):
113
+ """
114
+ Ensure printing to standard output can be done safely (especially on Windows).
115
+ There are usually issues with printing emojis and non utf-8 characters.
116
+ """
117
+
118
+ output_string = sep.join(map(lambda x: str(x), objects)) + end
119
+
120
+ if out is None:
121
+ out = sys.stdout
122
+
123
+ if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
124
+ if _windows_write_string(output_string, out):
125
+ return
126
+
127
+ if 'b' in getattr(out, 'mode', '') or not hasattr(out, 'buffer'):
128
+ out.write(output_string)
129
+ else:
130
+ enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
131
+ byt = output_string.encode(enc, 'ignore')
132
+ out.buffer.write(byt)
133
+
134
+ if flush and hasattr(out, 'flush'):
135
+ out.flush()