Fix locale.getlocale() exception.
Reapply r79006 with fixes for ubuntu 10.4. The only change is in fix_default_encoding() to trap an exception locale.getlocale() may throw. R=dpranke@chromium.org BUG=none TEST=unit test Review URL: http://codereview.chromium.org/6721029 git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@79144 0039d316-1c4b-4281-b951-d872f2087c98experimental/szager/collated-output
							parent
							
								
									529e97b799
								
							
						
					
					
						commit
						35625c7e56
					
				| @ -0,0 +1,356 @@ | ||||
| # Copyright (c) 2011 The Chromium Authors. All rights reserved. | ||||
| # Use of this source code is governed by a BSD-style license that can be | ||||
| # found in the LICENSE file. | ||||
| 
 | ||||
| """Collection of functions and classes to fix various encoding problems on | ||||
| multiple platforms with python. | ||||
| """ | ||||
| 
 | ||||
| import codecs | ||||
| import locale | ||||
| import os | ||||
| import sys | ||||
| 
 | ||||
| 
 | ||||
| # Prevents initializing multiple times. | ||||
| _SYS_ARGV_PROCESSED = False | ||||
| 
 | ||||
| 
 | ||||
| def complain(message): | ||||
|   """If any exception occurs in this file, we'll probably try to print it | ||||
|   on stderr, which makes for frustrating debugging if stderr is directed | ||||
|   to our wrapper. So be paranoid about catching errors and reporting them | ||||
|   to sys.__stderr__, so that the user has a higher chance to see them. | ||||
|   """ | ||||
|   print >> sys.__stderr__, ( | ||||
|       isinstance(message, str) and message or repr(message)) | ||||
| 
 | ||||
| 
 | ||||
| def fix_default_encoding(): | ||||
|   """Forces utf8 solidly on all platforms. | ||||
| 
 | ||||
|   By default python execution environment is lazy and defaults to ascii | ||||
|   encoding. | ||||
| 
 | ||||
|   http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/ | ||||
|   """ | ||||
|   if sys.getdefaultencoding() == 'utf-8': | ||||
|     return False | ||||
| 
 | ||||
|   # Regenerate setdefaultencoding. | ||||
|   reload(sys) | ||||
|   # Module 'sys' has no 'setdefaultencoding' member | ||||
|   # pylint: disable=E1101 | ||||
|   sys.setdefaultencoding('utf-8') | ||||
|   for attr in dir(locale): | ||||
|     if attr[0:3] != 'LC_': | ||||
|       continue | ||||
|     aref = getattr(locale, attr) | ||||
|     locale.setlocale(aref, '') | ||||
|     try: | ||||
|       lang = locale.getlocale(aref)[0] | ||||
|     except TypeError: | ||||
|       lang = None | ||||
|     if lang: | ||||
|       try: | ||||
|         locale.setlocale(aref, (lang, 'UTF-8')) | ||||
|       except locale.Error: | ||||
|         os.environ[attr] = lang + '.UTF-8' | ||||
|   locale.setlocale(locale.LC_ALL, '') | ||||
|   return True | ||||
| 
 | ||||
| 
 | ||||
| ############################### | ||||
| # Windows specific | ||||
| 
 | ||||
| 
 | ||||
| def fix_win_sys_argv(encoding): | ||||
|   """Converts sys.argv to 'encoding' encoded string. | ||||
| 
 | ||||
|   utf-8 is recommended. | ||||
| 
 | ||||
|   Works around <http://bugs.python.org/issue2128>. | ||||
|   """ | ||||
|   global _SYS_ARGV_PROCESSED | ||||
|   if _SYS_ARGV_PROCESSED: | ||||
|     return False | ||||
| 
 | ||||
|   from ctypes import byref, c_int, POINTER, windll, WINFUNCTYPE | ||||
|   from ctypes.wintypes import LPCWSTR, LPWSTR | ||||
| 
 | ||||
|   # <http://msdn.microsoft.com/en-us/library/ms683156.aspx> | ||||
|   GetCommandLineW = WINFUNCTYPE(LPWSTR)(('GetCommandLineW', windll.kernel32)) | ||||
|   # <http://msdn.microsoft.com/en-us/library/bb776391.aspx> | ||||
|   CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))( | ||||
|       ('CommandLineToArgvW', windll.shell32)) | ||||
| 
 | ||||
|   argc = c_int(0) | ||||
|   argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) | ||||
|   argv = [ | ||||
|       argv_unicode[i].encode(encoding, 'replace') | ||||
|       for i in xrange(0, argc.value)] | ||||
| 
 | ||||
|   if not hasattr(sys, 'frozen'): | ||||
|     # If this is an executable produced by py2exe or bbfreeze, then it | ||||
|     # will have been invoked directly. Otherwise, unicode_argv[0] is the | ||||
|     # Python interpreter, so skip that. | ||||
|     argv = argv[1:] | ||||
| 
 | ||||
|     # Also skip option arguments to the Python interpreter. | ||||
|     while len(argv) > 0: | ||||
|       arg = argv[0] | ||||
|       if not arg.startswith(u'-') or arg == u'-': | ||||
|         break | ||||
|       argv = argv[1:] | ||||
|       if arg == u'-m': | ||||
|         # sys.argv[0] should really be the absolute path of the | ||||
|         # module source, but never mind. | ||||
|         break | ||||
|       if arg == u'-c': | ||||
|         argv[0] = u'-c' | ||||
|         break | ||||
|   sys.argv = argv | ||||
|   _SYS_ARGV_PROCESSED = True | ||||
|   return True | ||||
| 
 | ||||
| 
 | ||||
| def fix_win_codec(): | ||||
|   """Works around <http://bugs.python.org/issue6058>.""" | ||||
|   # <http://msdn.microsoft.com/en-us/library/dd317756.aspx> | ||||
|   try: | ||||
|     codecs.lookup('cp65001') | ||||
|     return False | ||||
|   except LookupError: | ||||
|     codecs.register( | ||||
|         lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None) | ||||
|     return True | ||||
| 
 | ||||
| 
 | ||||
| class WinUnicodeOutputBase(object): | ||||
|   """Base class to adapt sys.stdout or sys.stderr to behave correctly on | ||||
|   Windows. | ||||
| 
 | ||||
|   Setting encoding to utf-8 is recommended. | ||||
|   """ | ||||
|   def __init__(self, fileno, name, encoding): | ||||
|     # Corresponding file handle. | ||||
|     self._fileno = fileno | ||||
|     self.encoding = encoding | ||||
|     self.name = name | ||||
| 
 | ||||
|     self.closed = False | ||||
|     self.softspace = False | ||||
|     self.mode = 'w' | ||||
| 
 | ||||
|   @staticmethod | ||||
|   def isatty(): | ||||
|     return False | ||||
| 
 | ||||
|   def close(self): | ||||
|     # Don't really close the handle, that would only cause problems. | ||||
|     self.closed = True | ||||
| 
 | ||||
|   def fileno(self): | ||||
|     return self._fileno | ||||
| 
 | ||||
|   def flush(self): | ||||
|     raise NotImplementedError() | ||||
| 
 | ||||
|   def write(self, text): | ||||
|     raise NotImplementedError() | ||||
| 
 | ||||
|   def writelines(self, lines): | ||||
|     try: | ||||
|       for line in lines: | ||||
|         self.write(line) | ||||
|     except Exception, e: | ||||
|       complain('%s.writelines: %r' % (self.name, e)) | ||||
|       raise | ||||
| 
 | ||||
| 
 | ||||
| class WinUnicodeConsoleOutput(WinUnicodeOutputBase): | ||||
|   """Output adapter to a Windows Console. | ||||
| 
 | ||||
|   Understands how to use the win32 console API. | ||||
|   """ | ||||
|   def __init__(self, console_handle, fileno, stream_name, encoding): | ||||
|     super(WinUnicodeConsoleOutput, self).__init__( | ||||
|         fileno, '<Unicode console %s>' % stream_name, encoding) | ||||
|     # Handle to use for WriteConsoleW | ||||
|     self._console_handle = console_handle | ||||
| 
 | ||||
|     # Loads the necessary function. | ||||
|     from ctypes import byref, GetLastError, POINTER, windll, WINFUNCTYPE | ||||
|     from ctypes.wintypes import BOOL, DWORD, HANDLE, LPVOID, LPWSTR | ||||
| 
 | ||||
|     self._DWORD = DWORD | ||||
|     self._byref = byref | ||||
| 
 | ||||
|     # <http://msdn.microsoft.com/en-us/library/ms687401.aspx> | ||||
|     self._WriteConsoleW = WINFUNCTYPE( | ||||
|         BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)( | ||||
|             ('WriteConsoleW', windll.kernel32)) | ||||
|     self._GetLastError = GetLastError | ||||
| 
 | ||||
|   def flush(self): | ||||
|     # No need to flush the console since it's immediate. | ||||
|     pass | ||||
| 
 | ||||
|   def write(self, text): | ||||
|     try: | ||||
|       if not isinstance(text, unicode): | ||||
|         # Convert to unicode. | ||||
|         text = str(text).decode(self.encoding, 'replace') | ||||
|       remaining = len(text) | ||||
|       while remaining > 0: | ||||
|         n = self._DWORD(0) | ||||
|         # There is a shorter-than-documented limitation on the length of the | ||||
|         # string passed to WriteConsoleW. See | ||||
|         # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>. | ||||
|         retval = self._WriteConsoleW( | ||||
|             self._console_handle, text, | ||||
|             min(remaining, 10000), | ||||
|             self._byref(n), None) | ||||
|         if retval == 0 or n.value == 0: | ||||
|           raise IOError( | ||||
|               'WriteConsoleW returned %r, n.value = %r, last error = %r' % ( | ||||
|                 retval, n.value, self._GetLastError())) | ||||
|         remaining -= n.value | ||||
|         if not remaining: | ||||
|           break | ||||
|         text = text[n.value:] | ||||
|     except Exception, e: | ||||
|       complain('%s.write: %r' % (self.name, e)) | ||||
|       raise | ||||
| 
 | ||||
| 
 | ||||
| class WinUnicodeOutput(WinUnicodeOutputBase): | ||||
|   """Output adaptor to a file output on Windows. | ||||
| 
 | ||||
|   If the standard FileWrite function is used, it will be encoded in the current | ||||
|   code page. WriteConsoleW() permits writting any character. | ||||
|   """ | ||||
|   def __init__(self, stream, fileno, encoding): | ||||
|     super(WinUnicodeOutput, self).__init__( | ||||
|         fileno, '<Unicode redirected %s>' % stream.name, encoding) | ||||
|     # Output stream | ||||
|     self._stream = stream | ||||
| 
 | ||||
|     # Flush right now. | ||||
|     self.flush() | ||||
| 
 | ||||
|   def flush(self): | ||||
|     try: | ||||
|       self._stream.flush() | ||||
|     except Exception, e: | ||||
|       complain('%s.flush: %r from %r' % (self.name, e, self._stream)) | ||||
|       raise | ||||
| 
 | ||||
|   def write(self, text): | ||||
|     try: | ||||
|       if isinstance(text, unicode): | ||||
|         # Replace characters that cannot be printed instead of failing. | ||||
|         text = text.encode(self.encoding, 'replace') | ||||
|       self._stream.write(text) | ||||
|     except Exception, e: | ||||
|       complain('%s.write: %r' % (self.name, e)) | ||||
|       raise | ||||
| 
 | ||||
| 
 | ||||
| def win_handle_is_a_console(handle): | ||||
|   """Returns True if a Windows file handle is a handle to a console.""" | ||||
|   from ctypes import byref, POINTER, windll, WINFUNCTYPE | ||||
|   from ctypes.wintypes import BOOL, DWORD, HANDLE | ||||
| 
 | ||||
|   FILE_TYPE_CHAR   = 0x0002 | ||||
|   FILE_TYPE_REMOTE = 0x8000 | ||||
|   INVALID_HANDLE_VALUE = DWORD(-1).value | ||||
| 
 | ||||
|   # <http://msdn.microsoft.com/en-us/library/ms683167.aspx> | ||||
|   GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))( | ||||
|       ('GetConsoleMode', windll.kernel32)) | ||||
|   # <http://msdn.microsoft.com/en-us/library/aa364960.aspx> | ||||
|   GetFileType = WINFUNCTYPE(DWORD, DWORD)(('GetFileType', windll.kernel32)) | ||||
| 
 | ||||
|   # GetStdHandle returns INVALID_HANDLE_VALUE, NULL, or a valid handle. | ||||
|   if handle == INVALID_HANDLE_VALUE or handle is None: | ||||
|     return False | ||||
|   return ( | ||||
|       (GetFileType(handle) & ~FILE_TYPE_REMOTE) == FILE_TYPE_CHAR and | ||||
|        GetConsoleMode(handle, byref(DWORD()))) | ||||
| 
 | ||||
| 
 | ||||
| def win_get_unicode_stream(stream, excepted_fileno, output_handle, encoding): | ||||
|   """Returns a unicode-compatible stream. | ||||
| 
 | ||||
|   This function will return a direct-Console writing object only if: | ||||
|   - the file number is the expected console file number | ||||
|   - the handle the expected file handle | ||||
|   - the 'real' handle is in fact a handle to a console. | ||||
|   """ | ||||
|   old_fileno = getattr(stream, 'fileno', lambda: None)() | ||||
|   if old_fileno == excepted_fileno: | ||||
|     from ctypes import windll, WINFUNCTYPE | ||||
|     from ctypes.wintypes import DWORD, HANDLE | ||||
| 
 | ||||
|     # <http://msdn.microsoft.com/en-us/library/ms683231.aspx> | ||||
|     GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(('GetStdHandle', windll.kernel32)) | ||||
| 
 | ||||
|     real_output_handle = GetStdHandle(DWORD(output_handle)) | ||||
|     if win_handle_is_a_console(real_output_handle): | ||||
|       # It's a console. | ||||
|       return WinUnicodeConsoleOutput( | ||||
|           real_output_handle, old_fileno, stream.name, encoding) | ||||
| 
 | ||||
|   # It's something else. Create an auto-encoding stream. | ||||
|   return WinUnicodeOutput(stream, old_fileno, encoding) | ||||
| 
 | ||||
| 
 | ||||
| def fix_win_console(encoding): | ||||
|   """Makes Unicode console output work independently of the current code page. | ||||
| 
 | ||||
|   This also fixes <http://bugs.python.org/issue1602>. | ||||
|   Credit to Michael Kaplan | ||||
|   <http://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx> and | ||||
|   TZOmegaTZIOY | ||||
|   <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>. | ||||
|   """ | ||||
|   if (isinstance(sys.stdout, WinUnicodeOutputBase) or | ||||
|       isinstance(sys.stderr, WinUnicodeOutputBase)): | ||||
|     return False | ||||
| 
 | ||||
|   try: | ||||
|     # SetConsoleCP and SetConsoleOutputCP could be used to change the code page | ||||
|     # but it's not really useful since the code here is using WriteConsoleW(). | ||||
|     # Also, changing the code page is 'permanent' to the console and needs to be | ||||
|     # reverted manually. | ||||
|     # In practice one needs to set the console font to a TTF font to be able to | ||||
|     # see all the characters but it failed for me in practice. In any case, it | ||||
|     # won't throw any exception when printing, which is the important part. | ||||
|     # -11 and -12 are defined in stdio.h | ||||
|     sys.stdout = win_get_unicode_stream(sys.stdout, 1, -11, encoding) | ||||
|     sys.stderr = win_get_unicode_stream(sys.stderr, 2, -12, encoding) | ||||
|     # TODO(maruel): Do sys.stdin with ReadConsoleW(). Albeit the limitation is | ||||
|     # "It doesn't appear to be possible to read Unicode characters in UTF-8 | ||||
|     # mode" and this appears to be a limitation of cmd.exe. | ||||
|   except Exception, e: | ||||
|     complain('exception %r while fixing up sys.stdout and sys.stderr' % e) | ||||
|   return True | ||||
| 
 | ||||
| 
 | ||||
| def fix_encoding(): | ||||
|   """Fixes various encoding problems on all platforms. | ||||
| 
 | ||||
|   Should be called at the very begining of the process. | ||||
|   """ | ||||
|   ret = True | ||||
|   if sys.platform == 'win32': | ||||
|     ret &= fix_win_codec() | ||||
| 
 | ||||
|   ret &= fix_default_encoding() | ||||
| 
 | ||||
|   if sys.platform == 'win32': | ||||
|     encoding = sys.getdefaultencoding() | ||||
|     ret &= fix_win_sys_argv(encoding) | ||||
|     ret &= fix_win_console(encoding) | ||||
|   return ret | ||||
| @ -0,0 +1,60 @@ | ||||
| #!/usr/bin/python | ||||
| # coding=utf8 | ||||
| # Copyright (c) 2011 The Chromium Authors. All rights reserved. | ||||
| # Use of this source code is governed by a BSD-style license that can be | ||||
| # found in the LICENSE file. | ||||
| 
 | ||||
| """Unit tests for fix_encoding.py.""" | ||||
| 
 | ||||
| import os | ||||
| import sys | ||||
| import unittest | ||||
| 
 | ||||
| ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||||
| sys.path.insert(0, ROOT_DIR) | ||||
| 
 | ||||
| import fix_encoding | ||||
| 
 | ||||
| 
 | ||||
| class FixEncodingTest(unittest.TestCase): | ||||
|   # Nice mix of latin, hebrew, arabic and chinese. Doesn't mean anything. | ||||
|   text = u'Héllô 偉大 سيد' | ||||
| 
 | ||||
|   def test_code_page(self): | ||||
|     # Make sure printing garbage won't throw. | ||||
|     print self.text.encode() + '\xff' | ||||
|     print >> sys.stderr, self.text.encode() + '\xff' | ||||
| 
 | ||||
|   def test_utf8(self): | ||||
|     # Make sure printing utf-8 works. | ||||
|     print self.text.encode('utf-8') | ||||
|     print >> sys.stderr, self.text.encode('utf-8') | ||||
| 
 | ||||
|   def test_unicode(self): | ||||
|     # Make sure printing unicode works. | ||||
|     print self.text | ||||
|     print >> sys.stderr, self.text | ||||
| 
 | ||||
|   def test_default_encoding(self): | ||||
|     self.assertEquals('utf-8', sys.getdefaultencoding()) | ||||
| 
 | ||||
|   def test_win_console(self): | ||||
|     if sys.platform != 'win32': | ||||
|       return | ||||
|     # This should fail if redirected. Can be checked with: | ||||
|     # python fix_encoding_test.py > a | ||||
|     self.assertEquals( | ||||
|         sys.stdout.__class__, fix_encoding.WinUnicodeConsoleOutput) | ||||
|     self.assertEquals( | ||||
|         sys.stderr.__class__, fix_encoding.WinUnicodeConsoleOutput) | ||||
|     self.assertEquals(sys.stdout.encoding, sys.getdefaultencoding()) | ||||
|     self.assertEquals(sys.stderr.encoding, sys.getdefaultencoding()) | ||||
| 
 | ||||
|   def test_multiple_calls(self): | ||||
|     # Shouldn't do anything. | ||||
|     self.assertEquals(False, fix_encoding.fix_encoding()) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|   assert fix_encoding.fix_encoding() | ||||
|   unittest.main() | ||||
					Loading…
					
					
				
		Reference in New Issue