From f946b22f964a85882a056687f62d3e85230e4d49 Mon Sep 17 00:00:00 2001 From: "maruel@chromium.org" Date: Tue, 22 Mar 2011 18:52:57 +0000 Subject: [PATCH] Revert r79006: "Add code to 'fix' python encoding and it's unit test." It is causing exception for some users on ubuntu 10.4 with: "category LC_ALL is not supported" TBR=dpranke Review URL: http://codereview.chromium.org/6717025 git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@79012 0039d316-1c4b-4281-b951-d872f2087c98 --- PRESUBMIT.py | 1 - fix_encoding.py | 353 ------------------------------------ gcl.py | 2 - gclient.py | 2 - presubmit_support.py | 24 ++- tests/fix_encoding_test.py | 60 ------ tests/gcl_unittest.py | 3 +- tests/presubmit_unittest.py | 3 +- tests/trychange_unittest.py | 3 +- trychange.py | 2 - 10 files changed, 14 insertions(+), 439 deletions(-) delete mode 100644 fix_encoding.py delete mode 100755 tests/fix_encoding_test.py diff --git a/PRESUBMIT.py b/PRESUBMIT.py index 6232c191b..50aa3850c 100644 --- a/PRESUBMIT.py +++ b/PRESUBMIT.py @@ -9,7 +9,6 @@ details on the presubmit API built into gcl. """ UNIT_TESTS = [ - 'tests.fix_encoding_test', 'tests.gcl_unittest', 'tests.gclient_scm_test', 'tests.gclient_smoketest', diff --git a/fix_encoding.py b/fix_encoding.py deleted file mode 100644 index 1a6fe7644..000000000 --- a/fix_encoding.py +++ /dev/null @@ -1,353 +0,0 @@ -# Copyright (c) 2011 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -"""Collection of functions and classes to fix various encoding problems on -multiple platforms with python. -""" - -import codecs -import locale -import os -import sys - - -# Prevents initializing multiple times. -_SYS_ARGV_PROCESSED = False - - -def complain(message): - """If any exception occurs in this file, we'll probably try to print it - on stderr, which makes for frustrating debugging if stderr is directed - to our wrapper. So be paranoid about catching errors and reporting them - to sys.__stderr__, so that the user has a higher chance to see them. - """ - print >> sys.__stderr__, ( - isinstance(message, str) and message or repr(message)) - - -def fix_default_encoding(): - """Forces utf8 solidly on all platforms. - - By default python execution environment is lazy and defaults to ascii - encoding. - - http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/ - """ - if sys.getdefaultencoding() == 'utf-8': - return False - - # Regenerate setdefaultencoding. - reload(sys) - # Module 'sys' has no 'setdefaultencoding' member - # pylint: disable=E1101 - sys.setdefaultencoding('utf-8') - for attr in dir(locale): - if attr[0:3] != 'LC_': - continue - aref = getattr(locale, attr) - locale.setlocale(aref, '') - lang, _ = locale.getlocale(aref) - if lang != None: - try: - locale.setlocale(aref, (lang, 'UTF-8')) - except locale.Error: - os.environ[attr] = lang + '.UTF-8' - locale.setlocale(locale.LC_ALL, '') - return True - - -############################### -# Windows specific - - -def fix_win_sys_argv(encoding): - """Converts sys.argv to 'encoding' encoded string. - - utf-8 is recommended. - - Works around . - """ - global _SYS_ARGV_PROCESSED - if _SYS_ARGV_PROCESSED: - return False - - from ctypes import byref, c_int, POINTER, windll, WINFUNCTYPE - from ctypes.wintypes import LPCWSTR, LPWSTR - - # - GetCommandLineW = WINFUNCTYPE(LPWSTR)(('GetCommandLineW', windll.kernel32)) - # - CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))( - ('CommandLineToArgvW', windll.shell32)) - - argc = c_int(0) - argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) - argv = [ - argv_unicode[i].encode(encoding, 'replace') - for i in xrange(0, argc.value)] - - if not hasattr(sys, 'frozen'): - # If this is an executable produced by py2exe or bbfreeze, then it - # will have been invoked directly. Otherwise, unicode_argv[0] is the - # Python interpreter, so skip that. - argv = argv[1:] - - # Also skip option arguments to the Python interpreter. - while len(argv) > 0: - arg = argv[0] - if not arg.startswith(u'-') or arg == u'-': - break - argv = argv[1:] - if arg == u'-m': - # sys.argv[0] should really be the absolute path of the - # module source, but never mind. - break - if arg == u'-c': - argv[0] = u'-c' - break - sys.argv = argv - _SYS_ARGV_PROCESSED = True - return True - - -def fix_win_codec(): - """Works around .""" - # - try: - codecs.lookup('cp65001') - return False - except LookupError: - codecs.register( - lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None) - return True - - -class WinUnicodeOutputBase(object): - """Base class to adapt sys.stdout or sys.stderr to behave correctly on - Windows. - - Setting encoding to utf-8 is recommended. - """ - def __init__(self, fileno, name, encoding): - # Corresponding file handle. - self._fileno = fileno - self.encoding = encoding - self.name = name - - self.closed = False - self.softspace = False - self.mode = 'w' - - @staticmethod - def isatty(): - return False - - def close(self): - # Don't really close the handle, that would only cause problems. - self.closed = True - - def fileno(self): - return self._fileno - - def flush(self): - raise NotImplementedError() - - def write(self, text): - raise NotImplementedError() - - def writelines(self, lines): - try: - for line in lines: - self.write(line) - except Exception, e: - complain('%s.writelines: %r' % (self.name, e)) - raise - - -class WinUnicodeConsoleOutput(WinUnicodeOutputBase): - """Output adapter to a Windows Console. - - Understands how to use the win32 console API. - """ - def __init__(self, console_handle, fileno, stream_name, encoding): - super(WinUnicodeConsoleOutput, self).__init__( - fileno, '' % stream_name, encoding) - # Handle to use for WriteConsoleW - self._console_handle = console_handle - - # Loads the necessary function. - from ctypes import byref, GetLastError, POINTER, windll, WINFUNCTYPE - from ctypes.wintypes import BOOL, DWORD, HANDLE, LPVOID, LPWSTR - - self._DWORD = DWORD - self._byref = byref - - # - self._WriteConsoleW = WINFUNCTYPE( - BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)( - ('WriteConsoleW', windll.kernel32)) - self._GetLastError = GetLastError - - def flush(self): - # No need to flush the console since it's immediate. - pass - - def write(self, text): - try: - if not isinstance(text, unicode): - # Convert to unicode. - text = str(text).decode(self.encoding, 'replace') - remaining = len(text) - while remaining > 0: - n = self._DWORD(0) - # There is a shorter-than-documented limitation on the length of the - # string passed to WriteConsoleW. See - # . - retval = self._WriteConsoleW( - self._console_handle, text, - min(remaining, 10000), - self._byref(n), None) - if retval == 0 or n.value == 0: - raise IOError( - 'WriteConsoleW returned %r, n.value = %r, last error = %r' % ( - retval, n.value, self._GetLastError())) - remaining -= n.value - if not remaining: - break - text = text[n.value:] - except Exception, e: - complain('%s.write: %r' % (self.name, e)) - raise - - -class WinUnicodeOutput(WinUnicodeOutputBase): - """Output adaptor to a file output on Windows. - - If the standard FileWrite function is used, it will be encoded in the current - code page. WriteConsoleW() permits writting any character. - """ - def __init__(self, stream, fileno, encoding): - super(WinUnicodeOutput, self).__init__( - fileno, '' % stream.name, encoding) - # Output stream - self._stream = stream - - # Flush right now. - self.flush() - - def flush(self): - try: - self._stream.flush() - except Exception, e: - complain('%s.flush: %r from %r' % (self.name, e, self._stream)) - raise - - def write(self, text): - try: - if isinstance(text, unicode): - # Replace characters that cannot be printed instead of failing. - text = text.encode(self.encoding, 'replace') - self._stream.write(text) - except Exception, e: - complain('%s.write: %r' % (self.name, e)) - raise - - -def win_handle_is_a_console(handle): - """Returns True if a Windows file handle is a handle to a console.""" - from ctypes import byref, POINTER, windll, WINFUNCTYPE - from ctypes.wintypes import BOOL, DWORD, HANDLE - - FILE_TYPE_CHAR = 0x0002 - FILE_TYPE_REMOTE = 0x8000 - INVALID_HANDLE_VALUE = DWORD(-1).value - - # - GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))( - ('GetConsoleMode', windll.kernel32)) - # - GetFileType = WINFUNCTYPE(DWORD, DWORD)(('GetFileType', windll.kernel32)) - - # GetStdHandle returns INVALID_HANDLE_VALUE, NULL, or a valid handle. - if handle == INVALID_HANDLE_VALUE or handle is None: - return False - return ( - (GetFileType(handle) & ~FILE_TYPE_REMOTE) == FILE_TYPE_CHAR and - GetConsoleMode(handle, byref(DWORD()))) - - -def win_get_unicode_stream(stream, excepted_fileno, output_handle, encoding): - """Returns a unicode-compatible stream. - - This function will return a direct-Console writing object only if: - - the file number is the expected console file number - - the handle the expected file handle - - the 'real' handle is in fact a handle to a console. - """ - old_fileno = getattr(stream, 'fileno', lambda: None)() - if old_fileno == excepted_fileno: - from ctypes import windll, WINFUNCTYPE - from ctypes.wintypes import DWORD, HANDLE - - # - GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(('GetStdHandle', windll.kernel32)) - - real_output_handle = GetStdHandle(DWORD(output_handle)) - if win_handle_is_a_console(real_output_handle): - # It's a console. - return WinUnicodeConsoleOutput( - real_output_handle, old_fileno, stream.name, encoding) - - # It's something else. Create an auto-encoding stream. - return WinUnicodeOutput(stream, old_fileno, encoding) - - -def fix_win_console(encoding): - """Makes Unicode console output work independently of the current code page. - - This also fixes . - Credit to Michael Kaplan - and - TZOmegaTZIOY - . - """ - if (isinstance(sys.stdout, WinUnicodeOutputBase) or - isinstance(sys.stderr, WinUnicodeOutputBase)): - return False - - try: - # SetConsoleCP and SetConsoleOutputCP could be used to change the code page - # but it's not really useful since the code here is using WriteConsoleW(). - # Also, changing the code page is 'permanent' to the console and needs to be - # reverted manually. - # In practice one needs to set the console font to a TTF font to be able to - # see all the characters but it failed for me in practice. In any case, it - # won't throw any exception when printing, which is the important part. - # -11 and -12 are defined in stdio.h - sys.stdout = win_get_unicode_stream(sys.stdout, 1, -11, encoding) - sys.stderr = win_get_unicode_stream(sys.stderr, 2, -12, encoding) - # TODO(maruel): Do sys.stdin with ReadConsoleW(). Albeit the limitation is - # "It doesn't appear to be possible to read Unicode characters in UTF-8 - # mode" and this appears to be a limitation of cmd.exe. - except Exception, e: - complain('exception %r while fixing up sys.stdout and sys.stderr' % e) - return True - - -def fix_encoding(): - """Fixes various encoding problems on all platforms. - - Should be called at the very begining of the process. - """ - ret = True - if sys.platform == 'win32': - ret &= fix_win_codec() - - ret &= fix_default_encoding() - - if sys.platform == 'win32': - encoding = sys.getdefaultencoding() - ret &= fix_win_sys_argv(encoding) - ret &= fix_win_console(encoding) - return ret diff --git a/gcl.py b/gcl.py index ace9f6f3c..20762d1be 100755 --- a/gcl.py +++ b/gcl.py @@ -40,7 +40,6 @@ import breakpad # pylint: disable=W0611 # gcl now depends on gclient. from scm import SVN -import fix_encoding import gclient_utils import owners import presubmit_support @@ -1474,5 +1473,4 @@ def main(argv): if __name__ == "__main__": - fix_encoding.fix_encoding() sys.exit(main(sys.argv[1:])) diff --git a/gclient.py b/gclient.py index c33973dfd..4b9d94cac 100644 --- a/gclient.py +++ b/gclient.py @@ -64,7 +64,6 @@ import urllib import breakpad # pylint: disable=W0611 -import fix_encoding import gclient_scm import gclient_utils from third_party.repo.progress import Progress @@ -1267,7 +1266,6 @@ def Main(argv): if '__main__' == __name__: - fix_encoding.fix_encoding() sys.exit(Main(sys.argv[1:])) # vim: ts=2:sw=2:tw=80:et: diff --git a/presubmit_support.py b/presubmit_support.py index e9a2dbd49..f1f70d9c4 100755 --- a/presubmit_support.py +++ b/presubmit_support.py @@ -50,7 +50,6 @@ except ImportError: import simplejson as json # pylint: disable=F0401 # Local imports. -import fix_encoding import gclient_utils import owners import presubmit_canned_checks @@ -143,18 +142,18 @@ class OutputApi(object): def handle(self, output): output.write(self._message) output.write('\n') - for index, item in enumerate(self._items): - output.write(' ') - # Write separately in case it's unicode. - output.write(item) - if index < len(self._items) - 1: - output.write(' \\') - output.write('\n') + if len(self._items) > 0: + output.write(' ' + ' \\\n '.join(map(str, self._items)) + '\n') if self._long_text: - output.write('\n***************\n') - # Write separately in case it's unicode. - output.write(self._long_text) - output.write('\n***************\n') + # Sometimes self._long_text is a ascii string, a codepage string + # (on windows), or a unicode object. + try: + long_text = self._long_text.decode() + except UnicodeDecodeError: + long_text = self._long_text.decode('ascii', 'replace') + + output.write('\n***************\n%s\n***************\n' % + long_text) if self.fatal: output.fail() @@ -1193,5 +1192,4 @@ def Main(argv): if __name__ == '__main__': - fix_encoding.fix_encoding() sys.exit(Main(None)) diff --git a/tests/fix_encoding_test.py b/tests/fix_encoding_test.py deleted file mode 100755 index a6ee18627..000000000 --- a/tests/fix_encoding_test.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/python -# coding=utf8 -# Copyright (c) 2011 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -"""Unit tests for fix_encoding.py.""" - -import os -import sys -import unittest - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, ROOT_DIR) - -import fix_encoding - - -class FixEncodingTest(unittest.TestCase): - # Nice mix of latin, hebrew, arabic and chinese. Doesn't mean anything. - text = u'Héllô 偉大 سيد' - - def test_code_page(self): - # Make sure printing garbage won't throw. - print self.text.encode() + '\xff' - print >> sys.stderr, self.text.encode() + '\xff' - - def test_utf8(self): - # Make sure printing utf-8 works. - print self.text.encode('utf-8') - print >> sys.stderr, self.text.encode('utf-8') - - def test_unicode(self): - # Make sure printing unicode works. - print self.text - print >> sys.stderr, self.text - - def test_default_encoding(self): - self.assertEquals('utf-8', sys.getdefaultencoding()) - - def test_win_console(self): - if sys.platform != 'win32': - return - # This should fail if redirected. Can be checked with: - # python fix_encoding_test.py > a - self.assertEquals( - sys.stdout.__class__, fix_encoding.WinUnicodeConsoleOutput) - self.assertEquals( - sys.stderr.__class__, fix_encoding.WinUnicodeConsoleOutput) - self.assertEquals(sys.stdout.encoding, sys.getdefaultencoding()) - self.assertEquals(sys.stderr.encoding, sys.getdefaultencoding()) - - def test_multiple_calls(self): - # Shouldn't do anything. - self.assertEquals(False, fix_encoding.fix_encoding()) - - -if __name__ == '__main__': - assert fix_encoding.fix_encoding() - unittest.main() diff --git a/tests/gcl_unittest.py b/tests/gcl_unittest.py index 322586987..4ab2b6fc1 100755 --- a/tests/gcl_unittest.py +++ b/tests/gcl_unittest.py @@ -90,8 +90,7 @@ class GclUnittest(GclTestsBase): 'OptionallyDoPresubmitChecks', 'REPOSITORY_ROOT', 'REVIEWERS_REGEX', 'RunShell', 'RunShellWithReturnCode', 'SVN', 'TryChange', 'UnknownFiles', 'Warn', - 'attrs', 'breakpad', 'defer_attributes', 'fix_encoding', - 'gclient_utils', 'getpass', + 'attrs', 'breakpad', 'defer_attributes', 'gclient_utils', 'getpass', 'json', 'main', 'need_change', 'need_change_and_args', 'no_args', 'optparse', 'os', 'owners', 'presubmit_support', 'random', 're', 'string', 'subprocess', 'suggest_reviewers', 'sys', 'tempfile', diff --git a/tests/presubmit_unittest.py b/tests/presubmit_unittest.py index c7ad8191d..b2cb19083 100755 --- a/tests/presubmit_unittest.py +++ b/tests/presubmit_unittest.py @@ -141,8 +141,7 @@ class PresubmitUnittest(PresubmitTestsBase): 'NotImplementedException', 'OutputApi', 'ParseFiles', 'PresubmitExecuter', 'PresubmitOutput', 'ScanSubDirs', 'SvnAffectedFile', 'SvnChange', 'cPickle', 'cStringIO', - 'exceptions', 'fix_encoding', 'fnmatch', 'gclient_utils', 'glob', 'json', - 'load_files', + 'exceptions', 'fnmatch', 'gclient_utils', 'glob', 'json', 'load_files', 'logging', 'marshal', 'normpath', 'optparse', 'os', 'owners', 'pickle', 'presubmit_canned_checks', 'random', 're', 'scm', 'subprocess', 'sys', 'tempfile', 'time', 'traceback', 'types', 'unittest', 'urllib2', diff --git a/tests/trychange_unittest.py b/tests/trychange_unittest.py index f2b52633e..c57cceea6 100755 --- a/tests/trychange_unittest.py +++ b/tests/trychange_unittest.py @@ -45,8 +45,7 @@ class TryChangeUnittest(TryChangeTestsBase): 'EPILOG', 'Escape', 'GIT', 'GuessVCS', 'GetMungedDiff', 'HELP_STRING', 'InvalidScript', 'NoTryServerAccess', 'PrintSuccess', 'SCM', 'SVN', 'TryChange', 'USAGE', - 'breakpad', 'datetime', 'errno', 'fix_encoding', 'gcl', 'gclient_utils', - 'getpass', + 'breakpad', 'datetime', 'errno', 'gcl', 'gclient_utils', 'getpass', 'json', 'logging', 'optparse', 'os', 'posixpath', 're', 'scm', 'shutil', 'sys', 'tempfile', 'urllib', ] diff --git a/trychange.py b/trychange.py index a848c5b24..fb5e935c3 100755 --- a/trychange.py +++ b/trychange.py @@ -39,7 +39,6 @@ try: except ImportError: gcl = None -import fix_encoding import gclient_utils import scm @@ -770,5 +769,4 @@ def TryChange(argv, if __name__ == "__main__": - fix_encoding.fix_encoding() sys.exit(TryChange(None, [], False))