Html5EntityDecoder.php
1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Util;
/**
* @psalm-immutable
*/
final class Html5EntityDecoder
{
/**
* @psalm-pure
*/
public static function decode(string $entity): string
{
if (\substr($entity, -1) !== ';') {
return $entity;
}
if (\substr($entity, 0, 2) === '&#') {
if (\strtolower(\substr($entity, 2, 1)) === 'x') {
return self::fromHex(\substr($entity, 3, -1));
}
return self::fromDecimal(\substr($entity, 2, -1));
}
return \html_entity_decode($entity, \ENT_QUOTES | \ENT_HTML5, 'UTF-8');
}
/**
* @param mixed $number
*
* @psalm-pure
*/
private static function fromDecimal($number): string
{
// Only convert code points within planes 0-2, excluding NULL
// phpcs:ignore Generic.PHP.ForbiddenFunctions.Found
if (empty($number) || $number > 0x2FFFF) {
return self::fromHex('fffd');
}
$entity = '&#' . $number . ';';
$converted = \mb_decode_numericentity($entity, [0x0, 0x2FFFF, 0, 0xFFFF], 'UTF-8');
if ($converted === $entity) {
return self::fromHex('fffd');
}
return $converted;
}
/**
* @psalm-pure
*/
private static function fromHex(string $hexChars): string
{
return self::fromDecimal(\hexdec($hexChars));
}
}