2006-04-26 10:48:09 +00:00
|
|
|
# pylint: disable-msg=W0511
|
|
|
|
# This program is free software; you can redistribute it and/or modify it under
|
|
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
|
|
# Foundation; either version 2 of the License, or (at your option) any later
|
|
|
|
# version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License along with
|
|
|
|
# this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
2010-03-23 10:39:48 +00:00
|
|
|
""" Copyright (c) 2000-2010 LOGILAB S.A. (Paris, FRANCE).
|
2006-04-26 10:48:09 +00:00
|
|
|
http://www.logilab.fr/ -- mailto:contact@logilab.fr
|
|
|
|
|
|
|
|
Check source code is ascii only or has an encoding declaration (PEP 263)
|
|
|
|
"""
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
from pylint.interfaces import IRawChecker
|
|
|
|
from pylint.checkers import BaseChecker
|
|
|
|
|
|
|
|
def is_ascii(string):
|
|
|
|
"""return true if non ascii characters are detected in the given string
|
2009-03-23 11:40:04 +00:00
|
|
|
and line number where non-ascii has been encountered.
|
2006-04-26 10:48:09 +00:00
|
|
|
"""
|
2009-03-23 11:40:04 +00:00
|
|
|
for i, line in enumerate(string.splitlines()):
|
|
|
|
if line and max([ord(char) for char in line]) >= 128:
|
|
|
|
return False, i + 1
|
|
|
|
return True, 0
|
2010-04-16 15:52:38 +00:00
|
|
|
|
2006-04-26 10:48:09 +00:00
|
|
|
# regexp matching both emacs and vim declaration
|
|
|
|
ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)")
|
|
|
|
|
|
|
|
def guess_encoding(string):
|
|
|
|
"""try to guess encoding from a python file as string
|
|
|
|
return None if not found
|
|
|
|
"""
|
|
|
|
assert type(string) is type(''), type(string)
|
|
|
|
# check for UTF-8 byte-order mark
|
|
|
|
if string.startswith('\xef\xbb\xbf'):
|
|
|
|
return 'UTF-8'
|
|
|
|
first_lines = string.split('\n', 2)[:2]
|
|
|
|
for line in first_lines:
|
|
|
|
# check for emacs / vim encoding declaration
|
|
|
|
match = ENCODING_RGX.match(line)
|
|
|
|
if match is not None:
|
|
|
|
return match.group(1)
|
|
|
|
|
2010-04-16 15:52:38 +00:00
|
|
|
|
2006-04-26 10:48:09 +00:00
|
|
|
MSGS = {
|
|
|
|
'E0501': ('Non ascii characters found but no encoding specified (PEP 263)',
|
|
|
|
'Used when some non ascii characters are detected but now \
|
|
|
|
encoding is specified, as explicited in the PEP 263.'),
|
|
|
|
'E0502': ('Wrong encoding specified (%s)',
|
|
|
|
'Used when a known encoding is specified but the file doesn\'t \
|
|
|
|
seem to be actually in this encoding.'),
|
|
|
|
'E0503': ('Unknown encoding specified (%s)',
|
|
|
|
'Used when an encoding is specified, but it\'s unknown to Python.'
|
|
|
|
),
|
2010-04-16 15:52:38 +00:00
|
|
|
|
2006-04-26 10:48:09 +00:00
|
|
|
'W0511': ('%s',
|
|
|
|
'Used when a warning note as FIXME or XXX is detected.'),
|
|
|
|
}
|
|
|
|
|
|
|
|
class EncodingChecker(BaseChecker):
|
2010-04-16 15:52:38 +00:00
|
|
|
"""checks for:
|
|
|
|
* warning notes in the code like FIXME, XXX
|
2006-04-26 10:48:09 +00:00
|
|
|
* PEP 263: source code with non ascii character but no encoding declaration
|
|
|
|
"""
|
|
|
|
__implements__ = IRawChecker
|
|
|
|
|
|
|
|
# configuration section name
|
|
|
|
name = 'miscellaneous'
|
|
|
|
msgs = MSGS
|
|
|
|
|
|
|
|
options = (('notes',
|
|
|
|
{'type' : 'csv', 'metavar' : '<comma separated values>',
|
|
|
|
'default' : ('FIXME', 'XXX', 'TODO'),
|
|
|
|
'help' : 'List of note tags to take in consideration, \
|
2006-05-09 07:55:16 +00:00
|
|
|
separated by a comma.'
|
2010-04-16 15:52:38 +00:00
|
|
|
}),
|
2006-04-26 10:48:09 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
def __init__(self, linter=None):
|
|
|
|
BaseChecker.__init__(self, linter)
|
2010-04-16 15:52:38 +00:00
|
|
|
|
2006-04-26 10:48:09 +00:00
|
|
|
def process_module(self, stream):
|
|
|
|
"""inspect the source file to found encoding problem or fixmes like
|
|
|
|
notes
|
|
|
|
"""
|
|
|
|
# source encoding
|
|
|
|
data = stream.read()
|
2009-03-23 11:40:04 +00:00
|
|
|
ascii, lineno = is_ascii(data)
|
|
|
|
if not ascii:
|
2006-04-26 10:48:09 +00:00
|
|
|
encoding = guess_encoding(data)
|
|
|
|
if encoding is None:
|
2009-03-23 11:40:04 +00:00
|
|
|
self.add_message('E0501', line=lineno)
|
2006-04-26 10:48:09 +00:00
|
|
|
else:
|
|
|
|
try:
|
|
|
|
unicode(data, encoding)
|
|
|
|
except UnicodeError:
|
|
|
|
self.add_message('E0502', args=encoding, line=1)
|
|
|
|
except LookupError:
|
|
|
|
self.add_message('E0503', args=encoding, line=1)
|
|
|
|
del data
|
|
|
|
# warning notes in the code
|
|
|
|
stream.seek(0)
|
|
|
|
notes = []
|
|
|
|
for note in self.config.notes:
|
|
|
|
notes.append(re.compile(note))
|
|
|
|
linenum = 1
|
|
|
|
for line in stream.readlines():
|
|
|
|
for note in notes:
|
|
|
|
match = note.search(line)
|
|
|
|
if match:
|
|
|
|
self.add_message('W0511', args=line[match.start():-1],
|
|
|
|
line=linenum)
|
|
|
|
break
|
|
|
|
linenum += 1
|
2010-04-16 15:52:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2006-04-26 10:48:09 +00:00
|
|
|
def register(linter):
|
|
|
|
"""required method to auto register this checker"""
|
|
|
|
linter.register_checker(EncodingChecker(linter))
|