python3: deal with astroid's module.file_stream returning bytes

Use tokenize.tokenize() which wants a byte stream. Everywhere else, decode as necessary.
2024-09-21 07:58:04 +00:00 · 2013-06-19 13:31:16 +02:00 · 2013-06-19 13:31:16 +02:00 · fdd8a05368
commit fdd8a05368
parent 41f3f44897
4 changed files with 22 additions and 13 deletions
--- a/checkers/init.py
+++ b/checkers/init.py
@ -127,7 +127,10 @@ class BaseRawChecker(BaseChecker):
                      DeprecationWarning)
        stream = node.file_stream
        stream.seek(0) # XXX may be removed with astroid > 0.23
-        self.process_tokens(tokenize.generate_tokens(stream.readline))
+        if sys.version_info <= (3, 0):
+            self.process_tokens(tokenize.generate_tokens(stream.readline))
+        else:
+            self.process_tokens(tokenize.tokenize(stream.readline))

    def process_tokens(self, tokens):
        """should be overridden by subclasses"""
--- a/checkers/misc.py
+++ b/checkers/misc.py
@ -60,13 +60,13 @@ separated by a comma.'

    def _check_encoding(self, lineno, line, file_encoding):
        try:
-            unicode(line, file_encoding)
+            return unicode(line, file_encoding)
        except UnicodeDecodeError, e:
            self.add_message('W0512', line=lineno,
                             args=(file_encoding, e.args[2]))

    def process_module(self, module):
-        """inspect the source file to found encoding problem or fixmes like
+        """inspect the source file to find encoding problem or fixmes like
        notes
        """
        stream = module.file_stream
@ -77,8 +77,9 @@ separated by a comma.'
        else:
            encoding = 'ascii'
        for lineno, line in enumerate(stream):
-            self._check_note(notes, lineno+1, line)
-            self._check_encoding(lineno+1, line, encoding)
+            line = self._check_encoding(lineno+1, line, encoding)
+            if line is not None:
+                self._check_note(notes, lineno+1, line)

 def register(linter):
    """required method to auto register this checker"""
--- a/checkers/similar.py
+++ b/checkers/similar.py
@ -36,11 +36,15 @@ class Similar:
        self.ignore_imports = ignore_imports
        self.linesets = []

-    def append_stream(self, streamid, stream):
+    def append_stream(self, streamid, stream, encoding=None):
        """append a file to search for similarities"""
        stream.seek(0) # XXX may be removed with astroid > 0.23
+        if encoding is None:
+            readlines = stream.readlines
+        else:
+            readlines = lambda: [line.decode(encoding) for line in stream]
        self.linesets.append(LineSet(streamid,
-                                     stream.readlines(),
+                                     readlines(),
                                     self.ignore_comments,
                                     self.ignore_docstrings,
                                     self.ignore_imports))
@ -288,7 +292,7 @@ class SimilarChecker(BaseChecker, Similar):

        stream must implement the readlines method
        """
-        self.append_stream(self.linter.current_name, node.file_stream)
+        self.append_stream(self.linter.current_name, node.file_stream, node.file_encoding)

    def close(self):
        """compute and display similarities on closing (i.e. end of parsing)"""
--- a/utils.py
+++ b/utils.py
@ -112,12 +112,13 @@ def category_id(id):
 def tokenize_module(module):
    stream = module.file_stream
    stream.seek(0)
-    if sys.version_info < (3, 0) and module.file_encoding is not None:
-        readline = lambda: stream.readline().decode(module.file_encoding, 
+    readline = stream.readline
+    if sys.version_info < (3, 0):
+        if module.file_encoding is not None:
+            readline = lambda: stream.readline().decode(module.file_encoding,
                                                    'replace')
-    else:
-        readline = stream.readline
-    return list(tokenize.generate_tokens(readline))
+        return list(tokenize.generate_tokens(readline))
+    return list(tokenize.tokenize(readline))

    
 class Message(object):