|
| 1 | +<?php |
| 2 | +/** |
| 3 | + * Tests demonstrating assertEqualHTML's HTML character reference normalization. |
| 4 | + * |
| 5 | + * Any given character has multiple valid representations in HTML: a literal |
| 6 | + * character, a named reference (¬), a decimal reference (¬), a |
| 7 | + * padded decimal (¬), a hex reference (¬), a padded hex |
| 8 | + * (¬), or even a named reference without a semicolon (¬). |
| 9 | + * assertEqualHTML treats all of them as equivalent. assertSame does not. |
| 10 | + * |
| 11 | + * @package AssertEqualHTMLExamples |
| 12 | + */ |
| 13 | + |
| 14 | +class CharacterReferenceEquivalenceTest extends WP_UnitTestCase { |
| 15 | + |
| 16 | + /** |
| 17 | + * Demonstrates why assertSame is fragile for HTML character references. |
| 18 | + * |
| 19 | + * The NOT SIGN (¬, U+00AC) can be encoded as ¬ or ¬ or ¬, |
| 20 | + * among others. To assertSame, those strings look completely different even |
| 21 | + * though every browser renders them identically. |
| 22 | + * |
| 23 | + * This test is marked skipped so the suite stays green. |
| 24 | + * Comment out markTestSkipped() to see it fail. |
| 25 | + */ |
| 26 | + public function test_assertsame_fails_for_character_references(): void { |
| 27 | + $this->markTestSkipped( |
| 28 | + 'Intentionally skipped. Remove markTestSkipped() to see how assertSame ' . |
| 29 | + 'fails when the same character is encoded differently.' |
| 30 | + ); |
| 31 | + |
| 32 | + $expected = '<meta name="¬">'; |
| 33 | + $actual = '<meta name="¬">'; |
| 34 | + |
| 35 | + // ❌ This will fail even though both encode the same character (¬). |
| 36 | + $this->assertSame( $expected, $actual ); |
| 37 | + } |
| 38 | + |
| 39 | + /** |
| 40 | + * assertEqualHTML normalizes all representations of a character to the same |
| 41 | + * value before comparing, so every encoding of ¬ (U+00AC) is equivalent. |
| 42 | + * |
| 43 | + * Representations covered: |
| 44 | + * - Literal UTF-8 character |
| 45 | + * - Named reference: ¬ |
| 46 | + * - Decimal reference: ¬ |
| 47 | + * - Padded decimal reference: ¬ |
| 48 | + * - Hex reference: ¬ |
| 49 | + * - Padded hex reference: ¬ |
| 50 | + * - Named reference without semicolon: ¬ |
| 51 | + */ |
| 52 | + public function test_all_character_reference_forms_are_equivalent(): void { |
| 53 | + $expected = <<<HTML |
| 54 | + <meta |
| 55 | + not-literal="¬" |
| 56 | + not-named="¬" |
| 57 | + not-decimal="¬" |
| 58 | + not-decimal-padded="¬" |
| 59 | + not-hex="¬" |
| 60 | + not-hex-padded="¬" |
| 61 | + also-not="¬" |
| 62 | + > |
| 63 | + HTML; |
| 64 | + |
| 65 | + $actual = <<<HTML |
| 66 | + <meta |
| 67 | + not-literal="¬" |
| 68 | + not-named="¬" |
| 69 | + not-decimal="¬" |
| 70 | + not-decimal-padded="¬" |
| 71 | + not-hex="¬" |
| 72 | + not-hex-padded="¬" |
| 73 | + also-not="¬" |
| 74 | + > |
| 75 | + HTML; |
| 76 | + |
| 77 | + // ✅ Passes because assertEqualHTML decodes all character references |
| 78 | + // before comparing — literal, named, decimal, hex, with or without |
| 79 | + // a trailing semicolon. |
| 80 | + $this->assertEqualHTML( $expected, $actual ); |
| 81 | + } |
| 82 | +} |
0 commit comments