LinkParserHelper.php
3.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Util;
use League\CommonMark\Parser\Cursor;
/**
* @psalm-immutable
*/
final class LinkParserHelper
{
/**
* Attempt to parse link destination
*
* @return string|null The string, or null if no match
*/
public static function parseLinkDestination(Cursor $cursor): ?string
{
if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) {
// Chop off surrounding <..>:
return UrlEncoder::unescapeAndEncode(
RegexHelper::unescape(\substr($res, 1, -1))
);
}
if ($cursor->getCurrentCharacter() === '<') {
return null;
}
$destination = self::manuallyParseLinkDestination($cursor);
if ($destination === null) {
return null;
}
return UrlEncoder::unescapeAndEncode(
RegexHelper::unescape($destination)
);
}
public static function parseLinkLabel(Cursor $cursor): int
{
$match = $cursor->match('/^\[(?:[^\\\\\[\]]|\\\\.){0,1000}\]/');
if ($match === null) {
return 0;
}
$length = \mb_strlen($match, 'UTF-8');
if ($length > 1001) {
return 0;
}
return $length;
}
public static function parsePartialLinkLabel(Cursor $cursor): ?string
{
return $cursor->match('/^(?:[^\\\\\[\]]+|\\\\.?)*/');
}
/**
* Attempt to parse link title (sans quotes)
*
* @return string|null The string, or null if no match
*/
public static function parseLinkTitle(Cursor $cursor): ?string
{
if ($title = $cursor->match('/' . RegexHelper::PARTIAL_LINK_TITLE . '/')) {
// Chop off quotes from title and unescape
return RegexHelper::unescape(\substr($title, 1, -1));
}
return null;
}
public static function parsePartialLinkTitle(Cursor $cursor, string $endDelimiter): ?string
{
$endDelimiter = \preg_quote($endDelimiter, '/');
$regex = \sprintf('/(%s|[^%s\x00])*(?:%s)?/', RegexHelper::PARTIAL_ESCAPED_CHAR, $endDelimiter, $endDelimiter);
if (($partialTitle = $cursor->match($regex)) === null) {
return null;
}
return RegexHelper::unescape($partialTitle);
}
private static function manuallyParseLinkDestination(Cursor $cursor): ?string
{
$oldPosition = $cursor->getPosition();
$oldState = $cursor->saveState();
$openParens = 0;
while (($c = $cursor->getCurrentCharacter()) !== null) {
if ($c === '\\' && ($peek = $cursor->peek()) !== null && RegexHelper::isEscapable($peek)) {
$cursor->advanceBy(2);
} elseif ($c === '(') {
$cursor->advanceBy(1);
$openParens++;
} elseif ($c === ')') {
if ($openParens < 1) {
break;
}
$cursor->advanceBy(1);
$openParens--;
} elseif (\preg_match(RegexHelper::REGEX_WHITESPACE_CHAR, $c)) {
break;
} else {
$cursor->advanceBy(1);
}
}
if ($openParens !== 0) {
return null;
}
if ($cursor->getPosition() === $oldPosition && (! isset($c) || $c !== ')')) {
return null;
}
$newPos = $cursor->getPosition();
$cursor->restoreState($oldState);
$cursor->advanceBy($newPos - $cursor->getPosition());
return $cursor->getPreviousText();
}
}