mirror of
https://github.com/openvswitch/ovs
synced 2025-08-31 22:35:15 +00:00
python: Make invalid UTF-8 sequence messages consistent across Python versions.
Given the invalid input <C0 22>, some versions of Python report <C0> as the invalid sequence and other versions report <C0 22> as the invalid sequence. Similarly, given input <ED 80 7F>, some report <ED 80> and others report <ED 80 7F> as the invalid sequence. This caused spurious test failures for the test "no invalid UTF-8 sequences in strings - Python", so this commit makes the messages consistent by dropping the extra trailing byte from the message. I first noticed the longer sequences <C0 22> and <ED 80 7F> on Ubuntu 10.04 with python version 2.6.5-0ubuntu1, but undoubtedly it exists elsewhere also.
This commit is contained in:
@@ -113,7 +113,8 @@ def from_string(s):
|
|||||||
try:
|
try:
|
||||||
s = unicode(s, 'utf-8')
|
s = unicode(s, 'utf-8')
|
||||||
except UnicodeDecodeError, e:
|
except UnicodeDecodeError, e:
|
||||||
seq = ' '.join(["0x%2x" % ord(c) for c in e.object[e.start:e.end]])
|
seq = ' '.join(["0x%2x" % ord(c)
|
||||||
|
for c in e.object[e.start:e.end] if ord(c) >= 0x80])
|
||||||
return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
|
return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
|
||||||
p = Parser(check_trailer=True)
|
p = Parser(check_trailer=True)
|
||||||
p.feed(s)
|
p.feed(s)
|
||||||
|
Reference in New Issue
Block a user