diff --git a/checkers/__init__.py b/checkers/__init__.py index 13080a86a..e1254a7a4 100644 --- a/checkers/__init__.py +++ b/checkers/__init__.py @@ -127,7 +127,10 @@ class BaseRawChecker(BaseChecker): DeprecationWarning) stream = node.file_stream stream.seek(0) # XXX may be removed with astroid > 0.23 - self.process_tokens(tokenize.generate_tokens(stream.readline)) + if sys.version_info <= (3, 0): + self.process_tokens(tokenize.generate_tokens(stream.readline)) + else: + self.process_tokens(tokenize.tokenize(stream.readline)) def process_tokens(self, tokens): """should be overridden by subclasses""" diff --git a/checkers/misc.py b/checkers/misc.py index 8ecf2b541..9d7572e75 100644 --- a/checkers/misc.py +++ b/checkers/misc.py @@ -60,13 +60,13 @@ separated by a comma.' def _check_encoding(self, lineno, line, file_encoding): try: - unicode(line, file_encoding) + return unicode(line, file_encoding) except UnicodeDecodeError, e: self.add_message('W0512', line=lineno, args=(file_encoding, e.args[2])) def process_module(self, module): - """inspect the source file to found encoding problem or fixmes like + """inspect the source file to find encoding problem or fixmes like notes """ stream = module.file_stream @@ -77,8 +77,9 @@ separated by a comma.' else: encoding = 'ascii' for lineno, line in enumerate(stream): - self._check_note(notes, lineno+1, line) - self._check_encoding(lineno+1, line, encoding) + line = self._check_encoding(lineno+1, line, encoding) + if line is not None: + self._check_note(notes, lineno+1, line) def register(linter): """required method to auto register this checker""" diff --git a/checkers/similar.py b/checkers/similar.py index b7c87a7c8..e7a16c4c3 100644 --- a/checkers/similar.py +++ b/checkers/similar.py @@ -36,11 +36,15 @@ class Similar: self.ignore_imports = ignore_imports self.linesets = [] - def append_stream(self, streamid, stream): + def append_stream(self, streamid, stream, encoding=None): """append a file to search for similarities""" stream.seek(0) # XXX may be removed with astroid > 0.23 + if encoding is None: + readlines = stream.readlines + else: + readlines = lambda: [line.decode(encoding) for line in stream] self.linesets.append(LineSet(streamid, - stream.readlines(), + readlines(), self.ignore_comments, self.ignore_docstrings, self.ignore_imports)) @@ -288,7 +292,7 @@ class SimilarChecker(BaseChecker, Similar): stream must implement the readlines method """ - self.append_stream(self.linter.current_name, node.file_stream) + self.append_stream(self.linter.current_name, node.file_stream, node.file_encoding) def close(self): """compute and display similarities on closing (i.e. end of parsing)""" diff --git a/utils.py b/utils.py index 4b109dbb0..2dac831d3 100644 --- a/utils.py +++ b/utils.py @@ -112,12 +112,13 @@ def category_id(id): def tokenize_module(module): stream = module.file_stream stream.seek(0) - if sys.version_info < (3, 0) and module.file_encoding is not None: - readline = lambda: stream.readline().decode(module.file_encoding, + readline = stream.readline + if sys.version_info < (3, 0): + if module.file_encoding is not None: + readline = lambda: stream.readline().decode(module.file_encoding, 'replace') - else: - readline = stream.readline - return list(tokenize.generate_tokens(readline)) + return list(tokenize.generate_tokens(readline)) + return list(tokenize.tokenize(readline)) class Message(object):