From 205f2a0fdc9c0a0f7486db3783b52834a48dd188 Mon Sep 17 00:00:00 2001 From: Rory Glenn Date: Sun, 15 Feb 2026 23:25:56 +0000 Subject: [PATCH] gh-53891: Include label value in IDNA 'label too long' errors Include the offending label in the reason string of UnicodeEncodeError for 'label too long' errors in the IDNA codec, making error messages more informative and easier to debug. Before: label too long After: label too long: 'aaaa...' --- Lib/encodings/idna.py | 11 +++++---- Lib/test/test_codecs.py | 23 +++++++++++++++++++ ...6-02-15-23-25-41.gh-issue-53891.JlZccA.rst | 2 ++ 3 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-02-15-23-25-41.gh-issue-53891.JlZccA.rst diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index d31ee07ab45b76..8e245342f23771 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -77,7 +77,8 @@ def ToASCII(label): # type: (str) -> bytes if len(label) == 0: raise UnicodeEncodeError("idna", label, 0, 1, "label empty") else: - raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") + raise UnicodeEncodeError("idna", label, 0, len(label), + f"label too long: {label!r}") # Step 2: nameprep label = nameprep(label) @@ -95,7 +96,8 @@ def ToASCII(label): # type: (str) -> bytes if len(label) == 0: raise UnicodeEncodeError("idna", label, 0, 1, "label empty") else: - raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") + raise UnicodeEncodeError("idna", label, 0, len(label), + f"label too long: {label!r}") # Step 5: Check ACE prefix if label.lower().startswith(sace_prefix): @@ -112,7 +114,8 @@ def ToASCII(label): # type: (str) -> bytes # do not check for empty as we prepend ace_prefix. if len(label_ascii) < 64: return label_ascii - raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") + raise UnicodeEncodeError("idna", label, 0, len(label), + f"label too long: {label!r}") def ToUnicode(label): if len(label) > 1024: @@ -201,7 +204,7 @@ def encode(self, input, errors='strict'): if len(label) >= 64: offset = sum(len(l) for l in labels[:i]) + i raise UnicodeEncodeError("idna", input, offset, offset+len(label), - "label too long") + f"label too long: {label.decode('ascii')!r}") return result, len(input) result = bytearray() diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index c31faec9ee5214..cbfc26675308e5 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1640,6 +1640,29 @@ def test_builtin_decode_length_limit(self): with self.assertRaisesRegex(UnicodeDecodeError, "too long"): (b"xn--016c"+b"a"*70).decode("idna") + def test_error_message_label_too_long(self): + # gh-53891: "label too long" errors should include the offending label + long_label = "a" * 70 + domain = f"{long_label}.com" + with self.assertRaises(UnicodeEncodeError) as cm: + domain.encode("idna") + self.assertIn(long_label, cm.exception.reason) + self.assertIn("label too long", cm.exception.reason) + + def test_error_message_label_too_long_non_ascii(self): + # gh-53891: non-ASCII labels should also be included in the message + long_label = "\xe4" * 70 + domain = f"{long_label}.com" + with self.assertRaises(UnicodeEncodeError) as cm: + domain.encode("idna") + self.assertIn("label too long", cm.exception.reason) + + def test_error_message_label_empty(self): + # gh-53891: empty label errors should have a clear reason + with self.assertRaises(UnicodeEncodeError) as cm: + "a..com".encode("idna") + self.assertEqual(cm.exception.reason, "label empty") + def test_stream(self): r = codecs.getreader("idna")(io.BytesIO(b"abc")) r.read(3) diff --git a/Misc/NEWS.d/next/Library/2026-02-15-23-25-41.gh-issue-53891.JlZccA.rst b/Misc/NEWS.d/next/Library/2026-02-15-23-25-41.gh-issue-53891.JlZccA.rst new file mode 100644 index 00000000000000..8bd9a3fdfc0cbc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-15-23-25-41.gh-issue-53891.JlZccA.rst @@ -0,0 +1,2 @@ +Include the offending label value in IDNA codec "label too long" error +messages to make them more informative and easier to debug.