DeCode.php 5.0 KB
<?php

namespace Lib\Imap\Parse;



/**
 * 解析邮件
 * @author:dc
 * @time 2024/9/10 16:54
 * Class Header
 */
class DeCode{

    /**
     * Fallback Encoding
     *
     * @var string
     */
    public $fallback_encoding = 'UTF-8';


    /**
     * 进行解码
     * @author:dc
     * @time 2024/9/10 16:56
     */
    public static function decode(string $value){
        $obj = new self();
        $value = trim($value);
        $original_value = $value;
        $is_utf8_base = $obj->is_uft8($value);
        if ($is_utf8_base) {
            $value = mb_decode_mimeheader($value);
        }
        if ($obj->notDecoded($original_value, $value)) {
            $decoded_value = $obj->mime_header_decode($value);
            if (count($decoded_value) > 0) {
                if (property_exists($decoded_value[0], "text")) {
                    $value = $decoded_value[0]->text;
                }
            }
        }
        return $value;
    }

    /**
     * Decode MIME header elements
     * @link https://php.net/manual/en/function.imap-mime-header-decode.php
     * @param string $text The MIME text
     *
     * @return array The decoded elements are returned in an array of objects, where each
     * object has two properties, charset and text.
     */
    private function mime_header_decode(string $text): array {
        $charset = $this->getEncoding($text);
        return [(object)[
            "charset" => $charset,
            "text"    => $this->convertEncoding($text, $charset)
        ]];
    }

    /**
     * Convert the encoding
     * @param $str
     * @param string $from
     * @param string $to
     *
     * @return mixed|string
     */
    public function convertEncoding($str, $from = "ISO-8859-2", $to = "UTF-8") {

        $str = mb_decode_mimeheader($str);

        $from = EncodingAliases::get($from, $this->fallback_encoding);
        $to = EncodingAliases::get($to, $this->fallback_encoding);

        if ($from === $to) {
            return $str;
        }

        // We don't need to do convertEncoding() if charset is ASCII (us-ascii):
        //     ASCII is a subset of UTF-8, so all ASCII files are already UTF-8 encoded
        //     https://stackoverflow.com/a/11303410
        //
        // us-ascii is the same as ASCII:
        //     ASCII is the traditional name for the encoding system; the Internet Assigned Numbers Authority (IANA)
        //     prefers the updated name US-ASCII, which clarifies that this system was developed in the US and
        //     based on the typographical symbols predominantly in use there.
        //     https://en.wikipedia.org/wiki/ASCII
        //
        // convertEncoding() function basically means convertToUtf8(), so when we convert ASCII string into UTF-8 it gets broken.
        if (strtolower($from) == 'us-ascii' && $to == 'UTF-8') {
            return $str;
        }

        try {

            if(mb_detect_encoding($str)=='UTF-8'){
                return $str;
            }

            if (!$from) {
                return mb_convert_encoding($str, $to);
            }

            return mb_convert_encoding($str, $to, $from);

        } catch (\Exception $e) {
            if (strstr($from, '-')) {
                $from = str_replace('-', '', $from);
                return $this->convertEncoding($str, $from, $to);
            } else {
                return $str;
            }
        }
    }


    /**
     * Get the encoding of a given abject
     * @param object|string $structure
     *
     * @return string
     */
    private function getEncoding($structure): string {
        if (property_exists($structure, 'parameters')) {
            foreach ($structure->parameters as $parameter) {
                if (strtolower($parameter->attribute) == "charset") {
                    return EncodingAliases::get($parameter->value, $this->fallback_encoding);
                }
            }
        } elseif (property_exists($structure, 'charset')) {
            return EncodingAliases::get($structure->charset, $this->fallback_encoding);
        } elseif (is_string($structure) === true) {

            preg_match("/^=\?([a-z0-9-]{3,})\?/i",$structure,$code);
            if(!empty($code[1])){
                $code = EncodingAliases::get($code[1],'');
                if($code){
                    return $code;
                }
            }

            $result = mb_detect_encoding($structure);
            return $result === false ? $this->fallback_encoding : $result;
        }

        return $this->fallback_encoding;
    }


    /**
     * Check if a given pair of strings has been decoded
     * @param $encoded
     * @param $decoded
     *
     * @return bool
     */
    private function notDecoded($encoded, $decoded): bool {
        return 0 === strpos($decoded, '=?')
            && strlen($decoded) - 2 === strpos($decoded, '?=')
            && false !== strpos($encoded, $decoded);
    }



    /**
     * Test if a given value is utf-8 encoded
     * @param $value
     *
     * @return bool
     */
    private function is_uft8($value): bool {
        return strpos(strtolower($value), '=?utf-8?') === 0;
    }


}