From 880fc52d2d4ccdcbf4a7b76f1b4ba6b9e7482dff Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Mon, 14 Jul 2014 10:21:32 +0800 Subject: [PATCH] parsemail: Fallback to common charsets when charset is None or x-unknown We recently encountered a case in our glibc patchwork instance on sourceware, where a patch was dropped because it had x-unknown charset. This change adds a fallback on a set of encodings (instead of just utf-8) when the charset is not mentioned or if it is set as x-unknown. Minor changes and testcase by Jeremy Kerr Signed-off-by: Siddhesh Poyarekar Signed-off-by: Jeremy Kerr --- apps/patchwork/bin/parsemail.py | 40 +++++++-- .../tests/mail/0010-invalid-charset.mbox | 90 +++++++++++++++++++ apps/patchwork/tests/test_patchparser.py | 11 +++ 3 files changed, 135 insertions(+), 6 deletions(-) create mode 100644 apps/patchwork/tests/mail/0010-invalid-charset.mbox diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py index b6eb97a..2a4866f 100755 --- a/apps/patchwork/bin/parsemail.py +++ b/apps/patchwork/bin/parsemail.py @@ -24,6 +24,7 @@ import re import datetime import time import operator +import codecs from email import message_from_file try: from email.header import Header, decode_header @@ -147,6 +148,13 @@ def find_pull_request(content): return match.group(1) return None +def try_decode(payload, charset): + try: + payload = unicode(payload, charset) + except UnicodeDecodeError: + return None + return payload + def find_content(project, mail): patchbuf = None commentbuf = '' @@ -157,15 +165,35 @@ def find_content(project, mail): continue payload = part.get_payload(decode=True) - charset = part.get_content_charset() subtype = part.get_content_subtype() - # if we don't have a charset, assume utf-8 - if charset is None: - charset = 'utf-8' - if not isinstance(payload, unicode): - payload = unicode(payload, charset) + charset = part.get_content_charset() + + # Check that we have a charset that we understand. Otherwise, + # ignore it and fallback to our standard set. + if charset is not None: + try: + codec = codecs.lookup(charset) + except LookupError: + charset = None + + # If there is no charset or if it is unknown, then try some common + # charsets before we fail. + if charset is None: + try_charsets = ['utf-8', 'windows-1252', 'iso-8859-1'] + else: + try_charsets = [charset] + + for cset in try_charsets: + decoded_payload = try_decode(payload, cset) + if decoded_payload is not None: + break + payload = decoded_payload + + # Could not find a valid decoded payload. Fail. + if payload is None: + return (None, None) if subtype in ['x-patch', 'x-diff']: patchbuf = payload diff --git a/apps/patchwork/tests/mail/0010-invalid-charset.mbox b/apps/patchwork/tests/mail/0010-invalid-charset.mbox new file mode 100644 index 0000000..10b369d --- /dev/null +++ b/apps/patchwork/tests/mail/0010-invalid-charset.mbox @@ -0,0 +1,90 @@ +From libc-alpha-return-50517-siddhesh=redhat.com@sourceware.org Thu Jun 5 10:36:33 2014 +Received: (qmail 11948 invoked by alias); 4 Jun 2014 17:51:01 -0000 +Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm +List-Id: +Sender: libc-alpha-owner@sourceware.org +Date: Wed, 4 Jun 2014 17:50:46 +0000 +From: "Joseph S. Myers" +To: +Subject: Fix pow overflow in non-default rounding modes (bug 16315) +Message-ID: +MIME-Version: 1.0 +Content-Type: multipart/mixed; + boundary="-1152306461-1522705971-1401904246=:3719" +Content-Length: 24171 + +---1152306461-1522705971-1401904246=:3719 +Content-Type: text/plain; charset="none" +Content-Transfer-Encoding: QUOTED-PRINTABLE + +This patch, relative to a tree with + applied, +fixes bug 16315, bad pow handling of overflow/underflow in non-default +rounding modes. Tests of pow are duly converted to ALL_RM_TEST to run +all tests in all rounding modes. + +There are two main issues here. First, various implementations +compute a negative result by negating a positive result, but this +yields inappropriate overflow / underflow values for directed +rounding, so either overflow / underflow results need recomputing in +the correct sign, or the relevant overflowing / underflowing operation +needs to be made to have a result of the correct sign. Second, the +dbl-64 implementation sets FE_TONEAREST internally; in the overflow / +underflow case, the result needs recomputing in the original rounding +mode. + +Tested x86_64 and x86 and ulps updated accordingly. + +(auto-libm-test-out diffs omitted below.) + +2014-06-04 Joseph Myers + +=09[BZ #16315] +=09* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly +=09overflowing or underflowing operations take place with sign of +=09result. +=09* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise. +=09* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise. +=09* sysdeps/ieee754/dbl-64/e_pow.c: Include . +=09(__ieee754_pow): Recompute overflowing and underflowing results in +=09original rounding mode. +=09* sysdeps/x86/fpu/powl_helper.c: Include . +=09(__powl_helper): Allow negative argument X and scale negated value +=09as needed. Avoid passing value outside [-1, 1] to f2xm1. +=09* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly +=09overflowing or underflowing operations take place with sign of +=09result. +=09* sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]: +=09Include . +=09* math/auto-libm-test-in: Add more tests of pow. +=09* math/auto-libm-test-out: Regenerated. +=09* math/libm-test.inc (pow_test): Use ALL_RM_TEST. +=09(pow_tonearest_test_data): Remove. +=09(pow_test_tonearest): Likewise. +=09(pow_towardzero_test_data): Likewise. +=09(pow_test_towardzero): Likewise. +=09(pow_downward_test_data): Likewise. +=09(pow_test_downward): Likewise. +=09(pow_upward_test_data): Likewise. +=09(pow_test_upward): Likewise. +=09(main): Don't call removed functions. +=09* sysdeps/i386/fpu/libm-test-ulps: Update. +=09* sysdeps/x86_64/fpu/libm-test-ulps: Likewise. + +diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/mult= +iarch/e_pow.c +index a740b6c..433cce0 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c +@@ -1,5 +1,6 @@ + #ifdef HAVE_FMA4_SUPPORT + # include ++# include + # include +=20 + extern double __ieee754_pow_sse2 (double, double); + +--=20 +Joseph S. Myers +joseph@codesourcery.com +---1152306461-1522705971-1401904246=:3719-- diff --git a/apps/patchwork/tests/test_patchparser.py b/apps/patchwork/tests/test_patchparser.py index 0496a69..d9a24c1 100644 --- a/apps/patchwork/tests/test_patchparser.py +++ b/apps/patchwork/tests/test_patchparser.py @@ -422,6 +422,17 @@ class CVSFormatPatchTest(MBoxPatchTest): self.assertTrue(comment is not None) self.assertTrue(patch.content.startswith('Index')) +class CharsetFallbackPatchTest(MBoxPatchTest): + """ Test mail with and invalid charset name, and check that we can parse + with one of the fallback encodings""" + + mail_file = '0010-invalid-charset.mbox' + + def testPatch(self): + (patch, comment) = find_content(self.project, self.mail) + self.assertTrue(patch is not None) + self.assertTrue(comment is not None) + class DelegateRequestTest(TestCase): patch_filename = '0001-add-line.patch' msgid = '<1@example.com>' -- 2.39.2