From 183dcf93a87065e976971d0d5c33565f7b7d2553 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Fri, 28 Apr 2023 12:03:03 +0200 Subject: [PATCH] General: Add support for unicode email addresses in is_email and sanitize_email MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for the unicode address extensions in RFC 6530-3 and refactors the code so there are fewer long regexes and less duplication between sanitize_email and is_email. A new class, WP_Email_Address, provides the shared parts. Opting out of unicode support is easy, default-filters.php adds unicode support by adding filters, which can be removed. sanitize_email no longer does major changes like removing an entire subdomain from someone's address, it only cleans up things like soft hyphens and whitespace — changes that happen when coping an email address from text. During testing, it became clear that antispambot() worked only for strings using a single-byte encoding, while this uses UTF8. Fixed. Fixes #31992. Props SirLouen, dmsnell, tusharbharti, mukeshpanchal27, akirk. --- src/wp-includes/class-wp-email-address.php | 246 ++++++++++++++ src/wp-includes/default-filters.php | 11 + src/wp-includes/formatting.php | 295 ++++++++--------- src/wp-settings.php | 1 + tests/phpunit/tests/auth.php | 2 +- .../phpunit/tests/formatting/antispambot.php | 3 + tests/phpunit/tests/formatting/isEmail.php | 7 +- .../tests/formatting/sanitizeEmail.php | 26 +- .../tests/privacy/wpCreateUserRequest.php | 5 +- .../rest-api/rest-comments-controller.php | 4 +- .../tests/wp-email-address/wpEmailAddress.php | 304 ++++++++++++++++++ 11 files changed, 726 insertions(+), 178 deletions(-) create mode 100644 src/wp-includes/class-wp-email-address.php create mode 100644 tests/phpunit/tests/wp-email-address/wpEmailAddress.php diff --git a/src/wp-includes/class-wp-email-address.php b/src/wp-includes/class-wp-email-address.php new file mode 100644 index 0000000000000..5c46a53b5f3aa --- /dev/null +++ b/src/wp-includes/class-wp-email-address.php @@ -0,0 +1,246 @@ +localpart = $localpart; + $this->domain = $domain; + } + + /** + * Creates a WP_Email_Address from a string. + * + * This method is intended to accept all strings that are considered valid email + * addresses by the WHATWG HTML specification for the email input type: + * + * https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email) + * + * and some additional addresses, while rejecting strings that + * are more likely to be typos, mispastes, or attacks. This class + * may reject a few address that are valid according to RFC 5322, + * but it always accepts an address if it's valid according to + * WHATWG. Put differently: If users can type an address into + * the major browsers of 2026, this class accepts them, if + * they can't (in 2026), this class may or may not. (Note that + * "