WordPress · dmsnell · May 25, 2026 · May 26, 2026 · May 26, 2026
diff --git a/src/wp-includes/compat.php b/src/wp-includes/compat.php
@@ -110,6 +110,147 @@
 	);
 }
 
+if ( ! function_exists( 'mb_chr' ) ) :
+	/**
+	 * Compat function to mimic mb_chr().
+	 *
+	 * @ignore
+	 * @since 7.1.0
+	 *
+	 * @see _mb_ord()
+	 *
+	 * @param int          $codepoint A Unicode codepoint value, e.g. 128024 for U+1F418 ELEPHANT
+	 * @param "UTF-8"|null $encoding  Must be 'UTF-8' or null.
+	 * @return string|false A string containing the requested character, if it can be represented in the specified encoding or false on failure.
+	 */
+	function mb_chr( $codepoint, $encoding = null ) {
+		return _mb_chr( $codepoint, $encoding );
+	}
+endif;
+
+/**
+ * Internal compat function to mimic mb_chr().
+ *
+ * @ignore
+ * @since 7.1.0
+ *
+ * @param int          $codepoint A Unicode codepoint value, e.g. 128024 for U+1F418 ELEPHANT
+ * @param "UTF-8"|null $encoding  Must be 'UTF-8' or null.
+ * @return string|false A string containing the requested character, if it can be represented in the specified encoding or false on failure.
+ */
+function _mb_chr( $codepoint, $encoding = null ) {
+	if ( ! is_int( $codepoint ) || ( isset( $encoding ) && 'UTF-8' !== $encoding ) ) {
+		return false;
+	}
+
+	// Pre-check to ensure a valid code point.
+	if (
+		$codepoint < 0 ||
+		( $codepoint >= 0xD800 && $codepoint <= 0xDFFF ) ||
+		$codepoint > 0x10FFFF
+	) {
+		return false;
+	}
+
+	if ( $codepoint <= 0x7F ) {
+		return chr( $codepoint );
+	}
+
+	if ( $codepoint <= 0x7FF ) {
+		$byte1 = chr( ( $codepoint >> 6 ) | 0xC0 );
+		$byte2 = chr( $codepoint & 0x3F | 0x80 );
+
+		return "{$byte1}{$byte2}";
+	}
+
+	if ( $codepoint <= 0xFFFF ) {
+		$byte1 = chr( ( $codepoint >> 12 ) | 0xE0 );
+		$byte2 = chr( ( $codepoint >> 6 ) & 0x3F | 0x80 );
+		$byte3 = chr( $codepoint & 0x3F | 0x80 );
+
+		return "{$byte1}{$byte2}{$byte3}";
+	}
+
+	// Any values above U+10FFFF are eliminated above in the pre-check.
+	$byte1 = chr( ( $codepoint >> 18 ) | 0xF0 );
+	$byte2 = chr( ( $codepoint >> 12 ) & 0x3F | 0x80 );
+	$byte3 = chr( ( $codepoint >> 6 ) & 0x3F | 0x80 );
+	$byte4 = chr( $codepoint & 0x3F | 0x80 );
+
+	return "{$byte1}{$byte2}{$byte3}{$byte4}";
+}
+
+if ( ! function_exists( 'mb_ord' ) ) :
+	/**
+	 * Compat function to mimic mb_ord().
+	 *
+	 * @ignore
+	 * @since 7.1.0
+	 *
+	 * @see _mb_ord()
+	 *
+	 * @param string       $string   Return the code point at the start of this string.
+	 * @param "UTF-8"|null $encoding Must be 'UTF-8' or null.
+	 * @return int|false The Unicode code point for the first character of string or false on failure.
+	 */
+	function mb_ord( $string, $encoding = null ) {
+		return _mb_ord( $string, $encoding );
+	}
+endif;
+
+/**
+ * Internal compat function to mimic mb_ord().
+ *
+ * @ignore
+ * @since 7.1.0
+ *
+ * @param string       $string   Return the code point at the start of this string.
+ * @param "UTF-8"|null $encoding Must be 'UTF-8' or null.
+ * @return int|false The Unicode code point for the first character of string or false on failure.
+ */
+function _mb_ord( $string, $encoding = null ) {
+	if ( ! is_string( $string ) || '' === $string || ( isset( $encoding ) && 'UTF-8' !== $encoding ) ) {
+		return false;
+	}
+
+	$byte_length    = 0;
+	$invalid_length = 0;
+	$found_count    = _wp_scan_utf8( $string, $byte_length, $invalid_length, null, 1 );
+
+	if ( 1 !== $found_count ) {
+		return false;
+	}
+
+	// These are valid code points, so no further validation is required.
+	$b0 = ord( $string[0] );
+
+	switch ( $byte_length ) {
+		case 1:
+			return $b0;
+
+		case 2:
+			return (
+				( ( $b0 & 0x1F ) << 6 ) |
+				( ( ord( $string[1] ) & 0x3F ) )
+			);
+
+		case 3:
+			return (
+				( ( $b0 & 0x0F ) << 12 ) |
+				( ( ord( $string[1] ) & 0x3F ) << 6 ) |
+				( ( ord( $string[2] ) & 0x3F ) )
+			);
+
+		case 4:
+			return (
+				( ( $b0 & 0x07 ) << 18 ) |
+				( ( ord( $string[1] ) & 0x3F ) << 12 ) |
+				( ( ord( $string[2] ) & 0x3F ) << 6 ) |
+				( ( ord( $string[3] ) & 0x3F ) )
+			);
+	}
+}
+
 if ( ! function_exists( 'mb_substr' ) ) :
 	/**
 	 * Compat function to mimic mb_substr().

diff --git a/src/wp-includes/html-api/class-wp-html-decoder.php b/src/wp-includes/html-api/class-wp-html-decoder.php
@@ -424,40 +424,8 @@ public static function read_character_reference( $context, $text, $at = 0, &$mat
 	 * @return string Converted code point, or `�` if invalid.
 	 */
 	public static function code_point_to_utf8_bytes( $code_point ): string {
-		// Pre-check to ensure a valid code point.
-		if (
-			$code_point <= 0 ||
-			( $code_point >= 0xD800 && $code_point <= 0xDFFF ) ||
-			$code_point > 0x10FFFF
-		) {
-			return '�';
-		}
-
-		if ( $code_point <= 0x7F ) {
-			return chr( $code_point );
-		}
-
-		if ( $code_point <= 0x7FF ) {
-			$byte1 = chr( ( $code_point >> 6 ) | 0xC0 );
-			$byte2 = chr( $code_point & 0x3F | 0x80 );
-
-			return "{$byte1}{$byte2}";
-		}
-
-		if ( $code_point <= 0xFFFF ) {
-			$byte1 = chr( ( $code_point >> 12 ) | 0xE0 );
-			$byte2 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 );
-			$byte3 = chr( $code_point & 0x3F | 0x80 );
-
-			return "{$byte1}{$byte2}{$byte3}";
-		}
-
-		// Any values above U+10FFFF are eliminated above in the pre-check.
-		$byte1 = chr( ( $code_point >> 18 ) | 0xF0 );
-		$byte2 = chr( ( $code_point >> 12 ) & 0x3F | 0x80 );
-		$byte3 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 );
-		$byte4 = chr( $code_point & 0x3F | 0x80 );
+		$string = mb_chr( $code_point );
 
-		return "{$byte1}{$byte2}{$byte3}{$byte4}";
+		return false !== $string ? $string : '�';
 	}
 }
diff --git a/tests/phpunit/tests/compat/mbChr.php b/tests/phpunit/tests/compat/mbChr.php
@@ -0,0 +1,28 @@
+<?php
+
+/**
+ * @group compat
+ *
+ * @covers ::mb_chr
+ */
+class Tests_Compat_mbChr extends WP_UnitTestCase {
+	/**
+	 * Ensures that the mb_chr() polyfill matches the behavior of mb_chr()
+	 * for the supported UTF-8 encoding.
+	 *
+	 * @ticket 65342
+	 */
+	public function test_mb_chr_polyfill_matches_spec() {
+		for ( $code_point = 0; $code_point <= 0x10FFFF; $code_point++ ) {
+			$this->assertSame(
+				mb_chr( $code_point ),
+				_mb_chr( $code_point ),
+				'Failed to properly decode the code point from the string.'
+			);
+		}
+
+		$this->assertFalse( _mb_chr( ord( 'A' ), 'latin1' ), 'Should have rejected non-UTF-8 encoding.' );
+		$this->assertFalse( _mb_ord( ord( 'A' ), 'utf8' ), 'Should have rejected non-UTF-8 encoding.' );
+		$this->assertSame( 'A', _mb_chr( ord( 'A' ), 'UTF-8' ), 'Should have accepted UTF-8 encoding.' );
+	}
+}
diff --git a/tests/phpunit/tests/compat/mbOrd.php b/tests/phpunit/tests/compat/mbOrd.php
@@ -0,0 +1,41 @@
+<?php
+
+/**
+ * @group compat
+ *
+ * @covers ::mb_ord
+ */
+class Tests_Compat_mbOrd extends WP_UnitTestCase {
+	/**
+	 * Ensures that the mb_ord() polyfill matches the behavior of mb_ord()
+	 * for the supported UTF-8 encoding.
+	 *
+	 * @ticket 65342
+	 */
+	public function test_mb_ord_polyfill_matches_spec() {
+		for ( $code_point = 0; $code_point <= 0x10FFFF; $code_point++ ) {
+			/*
+			 * Some code points cannot be constructed in UTF-8 because they
+			 * are invalid; notably the surrogate halves. While they could be
+			 * manually constructed here using the direct UTF-8 encoder without
+			 * its constraints, it’s sufficient to test the positive cases here
+			 * and spot-check an unpaired and incorrectly-converted surrogate
+			 * half below.
+			 */
+			if ( false !== mb_chr( $code_point ) ) {
+				$this->assertSame(
+					$code_point,
+					_mb_ord( mb_chr( $code_point ) ),
+					'Failed to properly decode the code point from the string.'
+				);
+			}
+		}
+
+		$this->assertFalse( _mb_ord( '' ), 'Should have failed on empty string.' );
+		$this->assertFalse( _mb_ord( 'hi', 'latin1' ), 'Should have rejected non-UTF-8 encoding.' );
+		$this->assertFalse( _mb_ord( 'hi', 'utf8' ), 'Should have rejected non-UTF-8 encoding.' );
+		$this->assertSame( ord( 'A' ), _mb_ord( 'A', 'UTF-8' ), 'Should have accepted UTF-8 encoding.' );
+		$this->assertFalse( _mb_ord( "\xC0" ), 'Should have rejected invalid UTF-8 code point.' );
+		$this->assertFalse( _mb_ord( substr( "\xED\xA0\x80", 0, 2 ) ), 'Should have rejected unpaired surrogate half.' );
+	}
+}