MarkdownParser.php 11.9 KB
<?php

declare(strict_types=1);

/*
 * This file is part of the league/commonmark package.
 *
 * (c) Colin O'Dell <colinodell@gmail.com>
 *
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
 *  - (c) John MacFarlane
 *
 * Additional code based on commonmark-java (https://github.com/commonmark/commonmark-java)
 *  - (c) Atlassian Pty Ltd
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

namespace League\CommonMark\Parser;

use League\CommonMark\Environment\EnvironmentInterface;
use League\CommonMark\Event\DocumentParsedEvent;
use League\CommonMark\Event\DocumentPreParsedEvent;
use League\CommonMark\Exception\CommonMarkException;
use League\CommonMark\Input\MarkdownInput;
use League\CommonMark\Node\Block\Document;
use League\CommonMark\Node\Block\Paragraph;
use League\CommonMark\Parser\Block\BlockContinueParserInterface;
use League\CommonMark\Parser\Block\BlockContinueParserWithInlinesInterface;
use League\CommonMark\Parser\Block\BlockStart;
use League\CommonMark\Parser\Block\BlockStartParserInterface;
use League\CommonMark\Parser\Block\DocumentBlockParser;
use League\CommonMark\Parser\Block\ParagraphParser;
use League\CommonMark\Reference\ReferenceInterface;
use League\CommonMark\Reference\ReferenceMap;

final class MarkdownParser implements MarkdownParserInterface
{
    /** @psalm-readonly */
    private EnvironmentInterface $environment;

    /** @psalm-readonly-allow-private-mutation */
    private int $maxNestingLevel;

    /** @psalm-readonly-allow-private-mutation */
    private ReferenceMap $referenceMap;

    /** @psalm-readonly-allow-private-mutation */
    private int $lineNumber = 0;

    /** @psalm-readonly-allow-private-mutation */
    private Cursor $cursor;

    /**
     * @var array<int, BlockContinueParserInterface>
     *
     * @psalm-readonly-allow-private-mutation
     */
    private array $activeBlockParsers = [];

    /**
     * @var array<int, BlockContinueParserWithInlinesInterface>
     *
     * @psalm-readonly-allow-private-mutation
     */
    private array $closedBlockParsers = [];

    public function __construct(EnvironmentInterface $environment)
    {
        $this->environment = $environment;
    }

    private function initialize(): void
    {
        $this->referenceMap       = new ReferenceMap();
        $this->lineNumber         = 0;
        $this->activeBlockParsers = [];
        $this->closedBlockParsers = [];

        $this->maxNestingLevel = $this->environment->getConfiguration()->get('max_nesting_level');
    }

    /**
     * @throws CommonMarkException
     */
    public function parse(string $input): Document
    {
        $this->initialize();

        $documentParser = new DocumentBlockParser($this->referenceMap);
        $this->activateBlockParser($documentParser);

        $preParsedEvent = new DocumentPreParsedEvent($documentParser->getBlock(), new MarkdownInput($input));
        $this->environment->dispatch($preParsedEvent);
        $markdownInput = $preParsedEvent->getMarkdown();

        foreach ($markdownInput->getLines() as $lineNumber => $line) {
            $this->lineNumber = $lineNumber;
            $this->parseLine($line);
        }

        // finalizeAndProcess
        $this->closeBlockParsers(\count($this->activeBlockParsers), $this->lineNumber);
        $this->processInlines();

        $this->environment->dispatch(new DocumentParsedEvent($documentParser->getBlock()));

        return $documentParser->getBlock();
    }

    /**
     * Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each
     * line of input, then finalizing the document.
     */
    private function parseLine(string $line): void
    {
        $this->cursor = new Cursor($line);

        $matches = $this->parseBlockContinuation();
        if ($matches === null) {
            return;
        }

        $unmatchedBlocks = \count($this->activeBlockParsers) - $matches;
        $blockParser     = $this->activeBlockParsers[$matches - 1];
        $startedNewBlock = false;

        // Unless last matched container is a code block, try new container starts,
        // adding children to the last matched container:
        $tryBlockStarts = $blockParser->getBlock() instanceof Paragraph || $blockParser->isContainer();
        while ($tryBlockStarts) {
            // this is a little performance optimization
            if ($this->cursor->isBlank()) {
                $this->cursor->advanceToEnd();
                break;
            }

            if ($blockParser->getBlock()->getDepth() >= $this->maxNestingLevel) {
                break;
            }

            $blockStart = $this->findBlockStart($blockParser);
            if ($blockStart === null || $blockStart->isAborting()) {
                $this->cursor->advanceToNextNonSpaceOrTab();
                break;
            }

            if (($state = $blockStart->getCursorState()) !== null) {
                $this->cursor->restoreState($state);
            }

            $startedNewBlock = true;

            // We're starting a new block. If we have any previous blocks that need to be closed, we need to do it now.
            if ($unmatchedBlocks > 0) {
                $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1);
                $unmatchedBlocks = 0;
            }

            if ($blockStart->isReplaceActiveBlockParser()) {
                $this->prepareActiveBlockParserForReplacement();
            }

            foreach ($blockStart->getBlockParsers() as $newBlockParser) {
                $blockParser    = $this->addChild($newBlockParser);
                $tryBlockStarts = $newBlockParser->isContainer();
            }
        }

        // What remains at the offset is a text line. Add the text to the appropriate block.

        // First check for a lazy paragraph continuation:
        if (! $startedNewBlock && ! $this->cursor->isBlank() && $this->getActiveBlockParser()->canHaveLazyContinuationLines()) {
            $this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
        } else {
            // finalize any blocks not matched
            if ($unmatchedBlocks > 0) {
                $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber);
            }

            if (! $blockParser->isContainer()) {
                $this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
            } elseif (! $this->cursor->isBlank()) {
                $this->addChild(new ParagraphParser());
                $this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
            }
        }
    }

    private function parseBlockContinuation(): ?int
    {
        // For each containing block, try to parse the associated line start.
        // The document will always match, so we can skip the first block parser and start at 1 matches
        $matches = 1;
        for ($i = 1; $i < \count($this->activeBlockParsers); $i++) {
            $blockParser   = $this->activeBlockParsers[$i];
            $blockContinue = $blockParser->tryContinue(clone $this->cursor, $this->getActiveBlockParser());
            if ($blockContinue === null) {
                break;
            }

            if ($blockContinue->isFinalize()) {
                $this->closeBlockParsers(\count($this->activeBlockParsers) - $i, $this->lineNumber);

                return null;
            }

            if (($state = $blockContinue->getCursorState()) !== null) {
                $this->cursor->restoreState($state);
            }

            $matches++;
        }

        return $matches;
    }

    private function findBlockStart(BlockContinueParserInterface $lastMatchedBlockParser): ?BlockStart
    {
        $matchedBlockParser = new MarkdownParserState($this->getActiveBlockParser(), $lastMatchedBlockParser);

        foreach ($this->environment->getBlockStartParsers() as $blockStartParser) {
            \assert($blockStartParser instanceof BlockStartParserInterface);
            if (($result = $blockStartParser->tryStart(clone $this->cursor, $matchedBlockParser)) !== null) {
                return $result;
            }
        }

        return null;
    }

    private function closeBlockParsers(int $count, int $endLineNumber): void
    {
        for ($i = 0; $i < $count; $i++) {
            $blockParser = $this->deactivateBlockParser();
            $this->finalize($blockParser, $endLineNumber);

            // phpcs:disable SlevomatCodingStandard.ControlStructures.EarlyExit.EarlyExitNotUsed
            if ($blockParser instanceof BlockContinueParserWithInlinesInterface) {
                // Remember for inline parsing
                $this->closedBlockParsers[] = $blockParser;
            }
        }
    }

    /**
     * Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings,
     * setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference
     * definitions.
     */
    private function finalize(BlockContinueParserInterface $blockParser, int $endLineNumber): void
    {
        if ($blockParser instanceof ParagraphParser) {
            $this->updateReferenceMap($blockParser->getReferences());
        }

        $blockParser->getBlock()->setEndLine($endLineNumber);
        $blockParser->closeBlock();
    }

    /**
     * Walk through a block & children recursively, parsing string content into inline content where appropriate.
     */
    private function processInlines(): void
    {
        $p = new InlineParserEngine($this->environment, $this->referenceMap);

        foreach ($this->closedBlockParsers as $blockParser) {
            $blockParser->parseInlines($p);
        }
    }

    /**
     * Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try
     * its parent, and so on til we find a block that can accept children.
     */
    private function addChild(BlockContinueParserInterface $blockParser): BlockContinueParserInterface
    {
        $blockParser->getBlock()->setStartLine($this->lineNumber);

        while (! $this->getActiveBlockParser()->canContain($blockParser->getBlock())) {
            $this->closeBlockParsers(1, $this->lineNumber - 1);
        }

        $this->getActiveBlockParser()->getBlock()->appendChild($blockParser->getBlock());
        $this->activateBlockParser($blockParser);

        return $blockParser;
    }

    private function activateBlockParser(BlockContinueParserInterface $blockParser): void
    {
        $this->activeBlockParsers[] = $blockParser;
    }

    /**
     * @throws ParserLogicException
     */
    private function deactivateBlockParser(): BlockContinueParserInterface
    {
        $popped = \array_pop($this->activeBlockParsers);
        if ($popped === null) {
            throw new ParserLogicException('The last block parser should not be deactivated');
        }

        return $popped;
    }

    private function prepareActiveBlockParserForReplacement(): void
    {
        // Note that we don't want to parse inlines or finalize this block, as it's getting replaced.
        $old = $this->deactivateBlockParser();

        if ($old instanceof ParagraphParser) {
            $this->updateReferenceMap($old->getReferences());
        }

        $old->getBlock()->detach();
    }

    /**
     * @param ReferenceInterface[] $references
     */
    private function updateReferenceMap(iterable $references): void
    {
        foreach ($references as $reference) {
            if (! $this->referenceMap->contains($reference->getLabel())) {
                $this->referenceMap->add($reference);
            }
        }
    }

    /**
     * @throws ParserLogicException
     */
    public function getActiveBlockParser(): BlockContinueParserInterface
    {
        $active = \end($this->activeBlockParsers);
        if ($active === false) {
            throw new ParserLogicException('No active block parsers are available');
        }

        return $active;
    }
}