Html5EntityDecoder.php
1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
<?php
declare(strict_types=1);
/*
 * This file is part of the league/commonmark package.
 *
 * (c) Colin O'Dell <colinodell@gmail.com>
 *
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
 *  - (c) John MacFarlane
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */
namespace League\CommonMark\Util;
/**
 * @psalm-immutable
 */
final class Html5EntityDecoder
{
    /**
     * @psalm-pure
     */
    public static function decode(string $entity): string
    {
        if (\substr($entity, -1) !== ';') {
            return $entity;
        }
        if (\substr($entity, 0, 2) === '&#') {
            if (\strtolower(\substr($entity, 2, 1)) === 'x') {
                return self::fromHex(\substr($entity, 3, -1));
            }
            return self::fromDecimal(\substr($entity, 2, -1));
        }
        return \html_entity_decode($entity, \ENT_QUOTES | \ENT_HTML5, 'UTF-8');
    }
    /**
     * @param mixed $number
     *
     * @psalm-pure
     */
    private static function fromDecimal($number): string
    {
        // Only convert code points within planes 0-2, excluding NULL
        // phpcs:ignore Generic.PHP.ForbiddenFunctions.Found
        if (empty($number) || $number > 0x2FFFF) {
            return self::fromHex('fffd');
        }
        $entity = '&#' . $number . ';';
        $converted = \mb_decode_numericentity($entity, [0x0, 0x2FFFF, 0, 0xFFFF], 'UTF-8');
        if ($converted === $entity) {
            return self::fromHex('fffd');
        }
        return $converted;
    }
    /**
     * @psalm-pure
     */
    private static function fromHex(string $hexChars): string
    {
        return self::fromDecimal(\hexdec($hexChars));
    }
}