diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index d31ee07ab45b76..8e245342f23771 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -77,7 +77,8 @@ def ToASCII(label): # type: (str) -> bytes if len(label) == 0: raise UnicodeEncodeError("idna", label, 0, 1, "label empty") else: - raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") + raise UnicodeEncodeError("idna", label, 0, len(label), + f"label too long: {label!r}") # Step 2: nameprep label = nameprep(label) @@ -95,7 +96,8 @@ def ToASCII(label): # type: (str) -> bytes if len(label) == 0: raise UnicodeEncodeError("idna", label, 0, 1, "label empty") else: - raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") + raise UnicodeEncodeError("idna", label, 0, len(label), + f"label too long: {label!r}") # Step 5: Check ACE prefix if label.lower().startswith(sace_prefix): @@ -112,7 +114,8 @@ def ToASCII(label): # type: (str) -> bytes # do not check for empty as we prepend ace_prefix. if len(label_ascii) < 64: return label_ascii - raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") + raise UnicodeEncodeError("idna", label, 0, len(label), + f"label too long: {label!r}") def ToUnicode(label): if len(label) > 1024: @@ -201,7 +204,7 @@ def encode(self, input, errors='strict'): if len(label) >= 64: offset = sum(len(l) for l in labels[:i]) + i raise UnicodeEncodeError("idna", input, offset, offset+len(label), - "label too long") + f"label too long: {label.decode('ascii')!r}") return result, len(input) result = bytearray() diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index c31faec9ee5214..cbfc26675308e5 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1640,6 +1640,29 @@ def test_builtin_decode_length_limit(self): with self.assertRaisesRegex(UnicodeDecodeError, "too long"): (b"xn--016c"+b"a"*70).decode("idna") + def test_error_message_label_too_long(self): + # gh-53891: "label too long" errors should include the offending label + long_label = "a" * 70 + domain = f"{long_label}.com" + with self.assertRaises(UnicodeEncodeError) as cm: + domain.encode("idna") + self.assertIn(long_label, cm.exception.reason) + self.assertIn("label too long", cm.exception.reason) + + def test_error_message_label_too_long_non_ascii(self): + # gh-53891: non-ASCII labels should also be included in the message + long_label = "\xe4" * 70 + domain = f"{long_label}.com" + with self.assertRaises(UnicodeEncodeError) as cm: + domain.encode("idna") + self.assertIn("label too long", cm.exception.reason) + + def test_error_message_label_empty(self): + # gh-53891: empty label errors should have a clear reason + with self.assertRaises(UnicodeEncodeError) as cm: + "a..com".encode("idna") + self.assertEqual(cm.exception.reason, "label empty") + def test_stream(self): r = codecs.getreader("idna")(io.BytesIO(b"abc")) r.read(3) diff --git a/Misc/NEWS.d/next/Library/2026-02-15-23-25-41.gh-issue-53891.JlZccA.rst b/Misc/NEWS.d/next/Library/2026-02-15-23-25-41.gh-issue-53891.JlZccA.rst new file mode 100644 index 00000000000000..8bd9a3fdfc0cbc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-15-23-25-41.gh-issue-53891.JlZccA.rst @@ -0,0 +1,2 @@ +Include the offending label value in IDNA codec "label too long" error +messages to make them more informative and easier to debug.