DeCode.php
5.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
<?php
namespace Lib\Imap\Parse;
/**
* 解析邮件
* @author:dc
* @time 2024/9/10 16:54
* Class Header
*/
class DeCode{
/**
* Fallback Encoding
*
* @var string
*/
public $fallback_encoding = 'UTF-8';
/**
* 进行解码
* @author:dc
* @time 2024/9/10 16:56
*/
public static function decode(string $value){
$obj = new self();
$value = trim($value);
$original_value = $value;
$is_utf8_base = $obj->is_uft8($value);
if ($is_utf8_base) {
$value = mb_decode_mimeheader($value);
}
if ($obj->notDecoded($original_value, $value)) {
$decoded_value = $obj->mime_header_decode($value);
if (count($decoded_value) > 0) {
if (property_exists($decoded_value[0], "text")) {
$value = $decoded_value[0]->text;
}
}
}
return $value;
}
/**
* Decode MIME header elements
* @link https://php.net/manual/en/function.imap-mime-header-decode.php
* @param string $text The MIME text
*
* @return array The decoded elements are returned in an array of objects, where each
* object has two properties, charset and text.
*/
private function mime_header_decode(string $text): array {
$charset = $this->getEncoding($text);
return [(object)[
"charset" => $charset,
"text" => $this->convertEncoding($text, $charset)
]];
}
/**
* Convert the encoding
* @param $str
* @param string $from
* @param string $to
*
* @return mixed|string
*/
public function convertEncoding($str, $from = "ISO-8859-2", $to = "UTF-8") {
$str = mb_decode_mimeheader($str);
$from = EncodingAliases::get($from, $this->fallback_encoding);
$to = EncodingAliases::get($to, $this->fallback_encoding);
if ($from === $to) {
return $str;
}
// We don't need to do convertEncoding() if charset is ASCII (us-ascii):
// ASCII is a subset of UTF-8, so all ASCII files are already UTF-8 encoded
// https://stackoverflow.com/a/11303410
//
// us-ascii is the same as ASCII:
// ASCII is the traditional name for the encoding system; the Internet Assigned Numbers Authority (IANA)
// prefers the updated name US-ASCII, which clarifies that this system was developed in the US and
// based on the typographical symbols predominantly in use there.
// https://en.wikipedia.org/wiki/ASCII
//
// convertEncoding() function basically means convertToUtf8(), so when we convert ASCII string into UTF-8 it gets broken.
if (strtolower($from) == 'us-ascii' && $to == 'UTF-8') {
return $str;
}
try {
if(mb_detect_encoding($str)=='UTF-8'){
return $str;
}
if (!$from) {
return mb_convert_encoding($str, $to);
}
return mb_convert_encoding($str, $to, $from);
} catch (\Exception $e) {
if (strstr($from, '-')) {
$from = str_replace('-', '', $from);
return $this->convertEncoding($str, $from, $to);
} else {
return $str;
}
}
}
/**
* Get the encoding of a given abject
* @param object|string $structure
*
* @return string
*/
private function getEncoding($structure): string {
if (property_exists($structure, 'parameters')) {
foreach ($structure->parameters as $parameter) {
if (strtolower($parameter->attribute) == "charset") {
return EncodingAliases::get($parameter->value, $this->fallback_encoding);
}
}
} elseif (property_exists($structure, 'charset')) {
return EncodingAliases::get($structure->charset, $this->fallback_encoding);
} elseif (is_string($structure) === true) {
preg_match("/^=\?([a-z0-9-]{3,})\?/i",$structure,$code);
if(!empty($code[1])){
$code = EncodingAliases::get($code[1],'');
if($code){
return $code;
}
}
$result = mb_detect_encoding($structure);
return $result === false ? $this->fallback_encoding : $result;
}
return $this->fallback_encoding;
}
/**
* Check if a given pair of strings has been decoded
* @param $encoded
* @param $decoded
*
* @return bool
*/
private function notDecoded($encoded, $decoded): bool {
return 0 === strpos($decoded, '=?')
&& strlen($decoded) - 2 === strpos($decoded, '?=')
&& false !== strpos($encoded, $decoded);
}
/**
* Test if a given value is utf-8 encoded
* @param $value
*
* @return bool
*/
private function is_uft8($value): bool {
return strpos(strtolower($value), '=?utf-8?') === 0;
}
}