Improve Unicode/UTF stdout handling

This commit is contained in:
allegroai 2020-03-09 12:34:48 +02:00
parent 618c2ac5c4
commit e3d0680d39
2 changed files with 19 additions and 4 deletions

View File

@ -65,7 +65,7 @@ from trains_agent.helper.base import (
is_linux_platform,
rm_file
)
from trains_agent.helper.console import ensure_text
from trains_agent.helper.console import ensure_text, print_text, decode_binary_lines
from trains_agent.helper.package.base import PackageManager
from trains_agent.helper.package.conda_api import CondaAPI
from trains_agent.helper.package.horovod_req import HorovodRequirement
@ -758,9 +758,11 @@ class Worker(ServiceCommandSection):
):
# type: (...) -> Tuple[Optional[int], TaskStopReason]
def _print_file(file_path, prev_line_count):
with open(file_path, "rt") as f:
with open(file_path, "rb") as f:
binary_text = f.read()
# skip the previously printed lines,
return f.readlines()[prev_line_count:]
blines = binary_text.split(b'\n')[prev_line_count:]
return decode_binary_lines(blines)
stdout = open(stdout_path, "wt")
stderr = open(stderr_path, "wt") if stderr_path else stdout
@ -853,7 +855,8 @@ class Worker(ServiceCommandSection):
"""
if not lines:
return 0
print("".join(lines), end="")
print_text("".join(lines))
# remove backspaces from the text log, they look bad.
for i, l in enumerate(lines):
lines[i] = l.replace('\x08', '')

View File

@ -22,6 +22,18 @@ def print_text(text, newline=True):
sys.stdout.write(data)
def decode_binary_lines(binary_lines, encoding='utf-8'):
# decode per line, if we failed decoding skip the line
lines = []
for b in binary_lines:
try:
l = b.decode(encoding=encoding, errors='replace').replace('\r', '\n')
except:
l = ''
lines.append(l + '\n' if l and l[-1] != '\n' else l)
return lines
def ensure_text(s, encoding='utf-8', errors='strict'):
"""Coerce *s* to six.text_type.
For Python 2: