|
@@ -37,16 +37,7 @@ class MailFun { |
|
@@ -37,16 +37,7 @@ class MailFun { |
37
|
* @time 2023/3/27 13:55
|
37
|
* @time 2023/3/27 13:55
|
38
|
*/
|
38
|
*/
|
39
|
public static function mb_coding(string $str,array $appcode=[]){
|
39
|
public static function mb_coding(string $str,array $appcode=[]){
|
40
|
- try {
|
|
|
41
|
- $encode = @mb_detect_encoding($str, array_merge(array("ASCII",'UTF-8',"GB2312","GBK",'BIG5'),$appcode));
|
|
|
42
|
- if($encode){
|
|
|
43
|
- return @mb_convert_encoding($str, 'UTF-8', $encode);
|
|
|
44
|
- }
|
|
|
45
|
- }catch (\Throwable $e){
|
|
|
46
|
-
|
|
|
47
|
- }
|
|
|
48
|
-
|
|
|
49
|
- return $str;
|
40
|
+ return (new Header())->decode($str);
|
50
|
}
|
41
|
}
|
51
|
|
42
|
|
52
|
|
43
|
|
|
@@ -409,3 +400,742 @@ class MailFun { |
|
@@ -409,3 +400,742 @@ class MailFun { |
409
|
|
400
|
|
410
|
|
401
|
|
411
|
}
|
402
|
}
|
|
|
403
|
+
|
|
|
404
|
+
|
|
|
405
|
+
|
|
|
406
|
+
|
|
|
407
|
+/**
|
|
|
408
|
+ * Class Header
|
|
|
409
|
+ *
|
|
|
410
|
+ * @package Webklex\PHPIMAP
|
|
|
411
|
+ */
|
|
|
412
|
+class Header
|
|
|
413
|
+{
|
|
|
414
|
+
|
|
|
415
|
+
|
|
|
416
|
+ /**
|
|
|
417
|
+ * Fallback Encoding
|
|
|
418
|
+ *
|
|
|
419
|
+ * @var string
|
|
|
420
|
+ */
|
|
|
421
|
+ public $fallback_encoding = 'UTF-8';
|
|
|
422
|
+
|
|
|
423
|
+
|
|
|
424
|
+ /**
|
|
|
425
|
+ * Decode MIME header elements
|
|
|
426
|
+ * @link https://php.net/manual/en/function.imap-mime-header-decode.php
|
|
|
427
|
+ * @param string $text The MIME text
|
|
|
428
|
+ *
|
|
|
429
|
+ * @return array The decoded elements are returned in an array of objects, where each
|
|
|
430
|
+ * object has two properties, charset and text.
|
|
|
431
|
+ */
|
|
|
432
|
+ public function mime_header_decode(string $text): array
|
|
|
433
|
+ {
|
|
|
434
|
+ if (extension_loaded('imap')) {
|
|
|
435
|
+ $result = \imap_mime_header_decode($text);
|
|
|
436
|
+ return is_array($result) ? $result : [];
|
|
|
437
|
+ }
|
|
|
438
|
+ $charset = $this->getEncoding($text);
|
|
|
439
|
+ return [(object)[
|
|
|
440
|
+ "charset" => $charset,
|
|
|
441
|
+ "text" => $this->convertEncoding($text, $charset)
|
|
|
442
|
+ ]];
|
|
|
443
|
+ }
|
|
|
444
|
+
|
|
|
445
|
+ /**
|
|
|
446
|
+ * Check if a given pair of strings has been decoded
|
|
|
447
|
+ * @param $encoded
|
|
|
448
|
+ * @param $decoded
|
|
|
449
|
+ *
|
|
|
450
|
+ * @return bool
|
|
|
451
|
+ */
|
|
|
452
|
+ private function notDecoded($encoded, $decoded): bool
|
|
|
453
|
+ {
|
|
|
454
|
+ return 0 === strpos($decoded, '=?')
|
|
|
455
|
+ && strlen($decoded) - 2 === strpos($decoded, '?=')
|
|
|
456
|
+ && false !== strpos($encoded, $decoded);
|
|
|
457
|
+ }
|
|
|
458
|
+
|
|
|
459
|
+ /**
|
|
|
460
|
+ * Convert the encoding
|
|
|
461
|
+ * @param $str
|
|
|
462
|
+ * @param string $from
|
|
|
463
|
+ * @param string $to
|
|
|
464
|
+ *
|
|
|
465
|
+ * @return mixed|string
|
|
|
466
|
+ */
|
|
|
467
|
+ public function convertEncoding($str, $from = "ISO-8859-2", $to = "UTF-8")
|
|
|
468
|
+ {
|
|
|
469
|
+
|
|
|
470
|
+ $from = EncodingAliases::get($from, $this->fallback_encoding);
|
|
|
471
|
+ $to = EncodingAliases::get($to, $this->fallback_encoding);
|
|
|
472
|
+
|
|
|
473
|
+ if ($from === $to) {
|
|
|
474
|
+ return $str;
|
|
|
475
|
+ }
|
|
|
476
|
+
|
|
|
477
|
+ // We don't need to do convertEncoding() if charset is ASCII (us-ascii):
|
|
|
478
|
+ // ASCII is a subset of UTF-8, so all ASCII files are already UTF-8 encoded
|
|
|
479
|
+ // https://stackoverflow.com/a/11303410
|
|
|
480
|
+ //
|
|
|
481
|
+ // us-ascii is the same as ASCII:
|
|
|
482
|
+ // ASCII is the traditional name for the encoding system; the Internet Assigned Numbers Authority (IANA)
|
|
|
483
|
+ // prefers the updated name US-ASCII, which clarifies that this system was developed in the US and
|
|
|
484
|
+ // based on the typographical symbols predominantly in use there.
|
|
|
485
|
+ // https://en.wikipedia.org/wiki/ASCII
|
|
|
486
|
+ //
|
|
|
487
|
+ // convertEncoding() function basically means convertToUtf8(), so when we convert ASCII string into UTF-8 it gets broken.
|
|
|
488
|
+ if (strtolower($from) == 'us-ascii' && $to == 'UTF-8') {
|
|
|
489
|
+ return $str;
|
|
|
490
|
+ }
|
|
|
491
|
+
|
|
|
492
|
+ try {
|
|
|
493
|
+ if (function_exists('iconv') && $from != 'UTF-7' && $to != 'UTF-7') {
|
|
|
494
|
+ return iconv($from, $to, $str);
|
|
|
495
|
+ } else {
|
|
|
496
|
+ if (!$from) {
|
|
|
497
|
+ return mb_convert_encoding($str, $to);
|
|
|
498
|
+ }
|
|
|
499
|
+ return mb_convert_encoding($str, $to, $from);
|
|
|
500
|
+ }
|
|
|
501
|
+ } catch (\Exception $e) {
|
|
|
502
|
+ if (strstr($from, '-')) {
|
|
|
503
|
+ $from = str_replace('-', '', $from);
|
|
|
504
|
+ return $this->convertEncoding($str, $from, $to);
|
|
|
505
|
+ } else {
|
|
|
506
|
+ return $str;
|
|
|
507
|
+ }
|
|
|
508
|
+ }
|
|
|
509
|
+ }
|
|
|
510
|
+
|
|
|
511
|
+ /**
|
|
|
512
|
+ * Get the encoding of a given abject
|
|
|
513
|
+ * @param object|string $structure
|
|
|
514
|
+ *
|
|
|
515
|
+ * @return string
|
|
|
516
|
+ */
|
|
|
517
|
+ public function getEncoding($structure): string
|
|
|
518
|
+ {
|
|
|
519
|
+ if (property_exists($structure, 'parameters')) {
|
|
|
520
|
+ foreach ($structure->parameters as $parameter) {
|
|
|
521
|
+ if (strtolower($parameter->attribute) == "charset") {
|
|
|
522
|
+ return EncodingAliases::get($parameter->value, $this->fallback_encoding);
|
|
|
523
|
+ }
|
|
|
524
|
+ }
|
|
|
525
|
+ } elseif (property_exists($structure, 'charset')) {
|
|
|
526
|
+ return EncodingAliases::get($structure->charset, $this->fallback_encoding);
|
|
|
527
|
+ } elseif (is_string($structure) === true) {
|
|
|
528
|
+ $result = mb_detect_encoding($structure);
|
|
|
529
|
+ return $result === false ? $this->fallback_encoding : $result;
|
|
|
530
|
+ }
|
|
|
531
|
+
|
|
|
532
|
+ return $this->fallback_encoding;
|
|
|
533
|
+ }
|
|
|
534
|
+
|
|
|
535
|
+ /**
|
|
|
536
|
+ * Test if a given value is utf-8 encoded
|
|
|
537
|
+ * @param $value
|
|
|
538
|
+ *
|
|
|
539
|
+ * @return bool
|
|
|
540
|
+ */
|
|
|
541
|
+ private function is_uft8($value): bool
|
|
|
542
|
+ {
|
|
|
543
|
+ return strpos(strtolower($value), '=?utf-8?') === 0;
|
|
|
544
|
+ }
|
|
|
545
|
+
|
|
|
546
|
+ /**
|
|
|
547
|
+ * Try to decode a specific header
|
|
|
548
|
+ * @param mixed $value
|
|
|
549
|
+ *
|
|
|
550
|
+ * @return mixed
|
|
|
551
|
+ */
|
|
|
552
|
+ public function decode($value)
|
|
|
553
|
+ {
|
|
|
554
|
+ if (is_array($value)) {
|
|
|
555
|
+ return $this->decodeArray($value);
|
|
|
556
|
+ }
|
|
|
557
|
+ $original_value = $value;
|
|
|
558
|
+ $decoder = 'utf-8';
|
|
|
559
|
+
|
|
|
560
|
+ if ($value !== null) {
|
|
|
561
|
+ $is_utf8_base = $this->is_uft8($value);
|
|
|
562
|
+
|
|
|
563
|
+ if ($decoder === 'utf-8' && extension_loaded('imap')) {
|
|
|
564
|
+ $value = \imap_utf8($value);
|
|
|
565
|
+ $is_utf8_base = $this->is_uft8($value);
|
|
|
566
|
+ if ($is_utf8_base) {
|
|
|
567
|
+ $value = mb_decode_mimeheader($value);
|
|
|
568
|
+ }
|
|
|
569
|
+ if ($this->notDecoded($original_value, $value)) {
|
|
|
570
|
+ $decoded_value = $this->mime_header_decode($value);
|
|
|
571
|
+ if (count($decoded_value) > 0) {
|
|
|
572
|
+ if (property_exists($decoded_value[0], "text")) {
|
|
|
573
|
+ $value = $decoded_value[0]->text;
|
|
|
574
|
+ }
|
|
|
575
|
+ }
|
|
|
576
|
+ }
|
|
|
577
|
+ } elseif ($decoder === 'iconv' && $is_utf8_base) {
|
|
|
578
|
+ $value = iconv_mime_decode($value);
|
|
|
579
|
+ } elseif ($is_utf8_base) {
|
|
|
580
|
+ $value = mb_decode_mimeheader($value);
|
|
|
581
|
+ }
|
|
|
582
|
+
|
|
|
583
|
+ if ($this->is_uft8($value)) {
|
|
|
584
|
+ $value = mb_decode_mimeheader($value);
|
|
|
585
|
+ }
|
|
|
586
|
+
|
|
|
587
|
+ if ($this->notDecoded($original_value, $value)) {
|
|
|
588
|
+ $value = $this->convertEncoding($original_value, $this->getEncoding($original_value));
|
|
|
589
|
+ }
|
|
|
590
|
+ }
|
|
|
591
|
+
|
|
|
592
|
+ return $value;
|
|
|
593
|
+ }
|
|
|
594
|
+
|
|
|
595
|
+ /**
|
|
|
596
|
+ * Decode a given array
|
|
|
597
|
+ * @param array $values
|
|
|
598
|
+ *
|
|
|
599
|
+ * @return array
|
|
|
600
|
+ */
|
|
|
601
|
+ private function decodeArray(array $values): array
|
|
|
602
|
+ {
|
|
|
603
|
+ foreach ($values as $key => $value) {
|
|
|
604
|
+ $values[$key] = $this->decode($value);
|
|
|
605
|
+ }
|
|
|
606
|
+ return $values;
|
|
|
607
|
+ }
|
|
|
608
|
+
|
|
|
609
|
+
|
|
|
610
|
+
|
|
|
611
|
+ /**
|
|
|
612
|
+ * Exception handling for invalid dates
|
|
|
613
|
+ *
|
|
|
614
|
+ * Currently known invalid formats:
|
|
|
615
|
+ * ^ Datetime ^ Problem ^ Cause
|
|
|
616
|
+ * | Mon, 20 Nov 2017 20:31:31 +0800 (GMT+8:00) | Double timezone specification | A Windows feature
|
|
|
617
|
+ * | Thu, 8 Nov 2018 08:54:58 -0200 (-02) |
|
|
|
618
|
+ * | | and invalid timezone (max 6 char) |
|
|
|
619
|
+ * | 04 Jan 2018 10:12:47 UT | Missing letter "C" | Unknown
|
|
|
620
|
+ * | Thu, 31 May 2018 18:15:00 +0800 (added by) | Non-standard details added by the | Unknown
|
|
|
621
|
+ * | | mail server |
|
|
|
622
|
+ * | Sat, 31 Aug 2013 20:08:23 +0580 | Invalid timezone | PHPMailer bug https://sourceforge.net/p/phpmailer/mailman/message/6132703/
|
|
|
623
|
+ *
|
|
|
624
|
+ * Please report any new invalid timestamps to [#45](https://github.com/Webklex/php-imap/issues)
|
|
|
625
|
+ *
|
|
|
626
|
+ * @param object $header
|
|
|
627
|
+ *
|
|
|
628
|
+ * @throws InvalidMessageDateException
|
|
|
629
|
+ */
|
|
|
630
|
+ private function parseDate($header)
|
|
|
631
|
+ {
|
|
|
632
|
+
|
|
|
633
|
+ if (property_exists($header, 'date')) {
|
|
|
634
|
+ $date = $header->date;
|
|
|
635
|
+
|
|
|
636
|
+ if (preg_match('/\+0580/', $date)) {
|
|
|
637
|
+ $date = str_replace('+0580', '+0530', $date);
|
|
|
638
|
+ }
|
|
|
639
|
+
|
|
|
640
|
+ $date = trim(rtrim($date));
|
|
|
641
|
+ try {
|
|
|
642
|
+ if (strpos($date, ' ') !== false) {
|
|
|
643
|
+ $date = str_replace(' ', ' ', $date);
|
|
|
644
|
+ }
|
|
|
645
|
+ $parsed_date = Carbon::parse($date);
|
|
|
646
|
+ } catch (\Exception $e) {
|
|
|
647
|
+ switch (true) {
|
|
|
648
|
+ case preg_match('/([0-9]{4}\.[0-9]{1,2}\.[0-9]{1,2}\-[0-9]{1,2}\.[0-9]{1,2}.[0-9]{1,2})+$/i', $date) > 0:
|
|
|
649
|
+ $date = Carbon::createFromFormat("Y.m.d-H.i.s", $date);
|
|
|
650
|
+ break;
|
|
|
651
|
+ case preg_match('/([0-9]{1,2}\ [A-Z]{2,3}\ [0-9]{4}\ [0-9]{1,2}\:[0-9]{1,2}\:[0-9]{1,2}\ UT)+$/i', $date) > 0:
|
|
|
652
|
+ case preg_match('/([A-Z]{2,3}\,\ [0-9]{1,2}\ [A-Z]{2,3}\ [0-9]{4}\ [0-9]{1,2}\:[0-9]{1,2}\:[0-9]{1,2}\ UT)+$/i', $date) > 0:
|
|
|
653
|
+ $date .= 'C';
|
|
|
654
|
+ break;
|
|
|
655
|
+ case preg_match('/([A-Z]{2,3}\,\ [0-9]{1,2}[\,]\ [A-Z]{2,3}\ [0-9]{4}\ [0-9]{1,2}\:[0-9]{1,2}\:[0-9]{1,2}\ [\-|\+][0-9]{4})+$/i', $date) > 0:
|
|
|
656
|
+ $date = str_replace(',', '', $date);
|
|
|
657
|
+ case preg_match('/([A-Z]{2,3}\,\ [0-9]{1,2}\ [A-Z]{2,3}\ [0-9]{4}\ [0-9]{1,2}\:[0-9]{1,2}\:[0-9]{1,2}\ \+[0-9]{2,4}\ \(\+[0-9]{1,2}\))+$/i', $date) > 0:
|
|
|
658
|
+ case preg_match('/([A-Z]{2,3}[\,|\ \,]\ [0-9]{1,2}\ [A-Z]{2,3}\ [0-9]{4}\ [0-9]{1,2}\:[0-9]{1,2}\:[0-9]{1,2}.*)+$/i', $date) > 0:
|
|
|
659
|
+ case preg_match('/([A-Z]{2,3}\,\ [0-9]{1,2}\ [A-Z]{2,3}\ [0-9]{4}\ [0-9]{1,2}\:[0-9]{1,2}\:[0-9]{1,2}\ [\-|\+][0-9]{4}\ \(.*)\)+$/i', $date) > 0:
|
|
|
660
|
+ case preg_match('/([A-Z]{2,3}\, \ [0-9]{1,2}\ [A-Z]{2,3}\ [0-9]{4}\ [0-9]{1,2}\:[0-9]{1,2}\:[0-9]{1,2}\ [\-|\+][0-9]{4}\ \(.*)\)+$/i', $date) > 0:
|
|
|
661
|
+ case preg_match('/([0-9]{1,2}\ [A-Z]{2,3}\ [0-9]{2,4}\ [0-9]{2}\:[0-9]{2}\:[0-9]{2}\ [A-Z]{2}\ \-[0-9]{2}\:[0-9]{2}\ \([A-Z]{2,3}\ \-[0-9]{2}:[0-9]{2}\))+$/i', $date) > 0:
|
|
|
662
|
+ $array = explode('(', $date);
|
|
|
663
|
+ $array = array_reverse($array);
|
|
|
664
|
+ $date = trim(array_pop($array));
|
|
|
665
|
+ break;
|
|
|
666
|
+ }
|
|
|
667
|
+
|
|
|
668
|
+ $parsed_date = Carbon::parse($date);
|
|
|
669
|
+
|
|
|
670
|
+ }
|
|
|
671
|
+
|
|
|
672
|
+
|
|
|
673
|
+ }
|
|
|
674
|
+ }
|
|
|
675
|
+
|
|
|
676
|
+
|
|
|
677
|
+
|
|
|
678
|
+}
|
|
|
679
|
+
|
|
|
680
|
+class EncodingAliases {
|
|
|
681
|
+
|
|
|
682
|
+ /**
|
|
|
683
|
+ * Contains email encoding mappings
|
|
|
684
|
+ *
|
|
|
685
|
+ * @var array
|
|
|
686
|
+ */
|
|
|
687
|
+ private static $aliases = [
|
|
|
688
|
+ /*
|
|
|
689
|
+ |--------------------------------------------------------------------------
|
|
|
690
|
+ | Email encoding aliases
|
|
|
691
|
+ |--------------------------------------------------------------------------
|
|
|
692
|
+ |
|
|
|
693
|
+ | Email encoding aliases used to convert to iconv supported charsets
|
|
|
694
|
+ |
|
|
|
695
|
+ |
|
|
|
696
|
+ | This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
697
|
+ | License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
698
|
+ | file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
699
|
+ |
|
|
|
700
|
+ | This Original Code has been modified by IBM Corporation.
|
|
|
701
|
+ | Modifications made by IBM described herein are
|
|
|
702
|
+ | Copyright (c) International Business Machines
|
|
|
703
|
+ | Corporation, 1999
|
|
|
704
|
+ |
|
|
|
705
|
+ | Modifications to Mozilla code or documentation
|
|
|
706
|
+ | identified per MPL Section 3.3
|
|
|
707
|
+ |
|
|
|
708
|
+ | Date Modified by Description of modification
|
|
|
709
|
+ | 12/09/1999 IBM Corp. Support for IBM codepages - 850,852,855,857,862,864
|
|
|
710
|
+ |
|
|
|
711
|
+ | Rule of this file:
|
|
|
712
|
+ | 1. key should always be in lower case ascii so we can do case insensitive
|
|
|
713
|
+ | comparison in the code faster.
|
|
|
714
|
+ | 2. value should be the one used in unicode converter
|
|
|
715
|
+ |
|
|
|
716
|
+ | 3. If the charset is not used for document charset, but font charset
|
|
|
717
|
+ | (e.g. XLFD charset- such as JIS x0201, JIS x0208), don't put here
|
|
|
718
|
+ |
|
|
|
719
|
+ */
|
|
|
720
|
+ "ascii" => "us-ascii",
|
|
|
721
|
+ "us-ascii" => "us-ascii",
|
|
|
722
|
+ "ansi_x3.4-1968" => "us-ascii",
|
|
|
723
|
+ "646" => "us-ascii",
|
|
|
724
|
+ "iso-8859-1" => "ISO-8859-1",
|
|
|
725
|
+ "iso-8859-2" => "ISO-8859-2",
|
|
|
726
|
+ "iso-8859-3" => "ISO-8859-3",
|
|
|
727
|
+ "iso-8859-4" => "ISO-8859-4",
|
|
|
728
|
+ "iso-8859-5" => "ISO-8859-5",
|
|
|
729
|
+ "iso-8859-6" => "ISO-8859-6",
|
|
|
730
|
+ "iso-8859-6-i" => "ISO-8859-6-I",
|
|
|
731
|
+ "iso-8859-6-e" => "ISO-8859-6-E",
|
|
|
732
|
+ "iso-8859-7" => "ISO-8859-7",
|
|
|
733
|
+ "iso-8859-8" => "ISO-8859-8",
|
|
|
734
|
+ "iso-8859-8-i" => "ISO-8859-8-I",
|
|
|
735
|
+ "iso-8859-8-e" => "ISO-8859-8-E",
|
|
|
736
|
+ "iso-8859-9" => "ISO-8859-9",
|
|
|
737
|
+ "iso-8859-10" => "ISO-8859-10",
|
|
|
738
|
+ "iso-8859-11" => "ISO-8859-11",
|
|
|
739
|
+ "iso-8859-13" => "ISO-8859-13",
|
|
|
740
|
+ "iso-8859-14" => "ISO-8859-14",
|
|
|
741
|
+ "iso-8859-15" => "ISO-8859-15",
|
|
|
742
|
+ "iso-8859-16" => "ISO-8859-16",
|
|
|
743
|
+ "iso-ir-111" => "ISO-IR-111",
|
|
|
744
|
+ "iso-2022-cn" => "ISO-2022-CN",
|
|
|
745
|
+ "iso-2022-cn-ext" => "ISO-2022-CN",
|
|
|
746
|
+ "iso-2022-kr" => "ISO-2022-KR",
|
|
|
747
|
+ "iso-2022-jp" => "ISO-2022-JP",
|
|
|
748
|
+ "utf-16be" => "UTF-16BE",
|
|
|
749
|
+ "utf-16le" => "UTF-16LE",
|
|
|
750
|
+ "utf-16" => "UTF-16",
|
|
|
751
|
+ "windows-1250" => "windows-1250",
|
|
|
752
|
+ "windows-1251" => "windows-1251",
|
|
|
753
|
+ "windows-1252" => "windows-1252",
|
|
|
754
|
+ "windows-1253" => "windows-1253",
|
|
|
755
|
+ "windows-1254" => "windows-1254",
|
|
|
756
|
+ "windows-1255" => "windows-1255",
|
|
|
757
|
+ "windows-1256" => "windows-1256",
|
|
|
758
|
+ "windows-1257" => "windows-1257",
|
|
|
759
|
+ "windows-1258" => "windows-1258",
|
|
|
760
|
+ "ibm866" => "IBM866",
|
|
|
761
|
+ "ibm850" => "IBM850",
|
|
|
762
|
+ "ibm852" => "IBM852",
|
|
|
763
|
+ "ibm855" => "IBM855",
|
|
|
764
|
+ "ibm857" => "IBM857",
|
|
|
765
|
+ "ibm862" => "IBM862",
|
|
|
766
|
+ "ibm864" => "IBM864",
|
|
|
767
|
+ "utf-8" => "UTF-8",
|
|
|
768
|
+ "utf-7" => "UTF-7",
|
|
|
769
|
+ "shift_jis" => "Shift_JIS",
|
|
|
770
|
+ "big5" => "Big5",
|
|
|
771
|
+ "euc-jp" => "EUC-JP",
|
|
|
772
|
+ "euc-kr" => "EUC-KR",
|
|
|
773
|
+ "gb2312" => "GB2312",
|
|
|
774
|
+ "gb18030" => "gb18030",
|
|
|
775
|
+ "viscii" => "VISCII",
|
|
|
776
|
+ "koi8-r" => "KOI8-R",
|
|
|
777
|
+ "koi8_r" => "KOI8-R",
|
|
|
778
|
+ "cskoi8r" => "KOI8-R",
|
|
|
779
|
+ "koi" => "KOI8-R",
|
|
|
780
|
+ "koi8" => "KOI8-R",
|
|
|
781
|
+ "koi8-u" => "KOI8-U",
|
|
|
782
|
+ "tis-620" => "TIS-620",
|
|
|
783
|
+ "t.61-8bit" => "T.61-8bit",
|
|
|
784
|
+ "hz-gb-2312" => "HZ-GB-2312",
|
|
|
785
|
+ "big5-hkscs" => "Big5-HKSCS",
|
|
|
786
|
+ "gbk" => "gbk",
|
|
|
787
|
+ "cns11643" => "x-euc-tw",
|
|
|
788
|
+ //
|
|
|
789
|
+ // Aliases for ISO-8859-1
|
|
|
790
|
+ //
|
|
|
791
|
+ "latin1" => "ISO-8859-1",
|
|
|
792
|
+ "iso_8859-1" => "ISO-8859-1",
|
|
|
793
|
+ "iso8859-1" => "ISO-8859-1",
|
|
|
794
|
+ "iso8859-2" => "ISO-8859-2",
|
|
|
795
|
+ "iso8859-3" => "ISO-8859-3",
|
|
|
796
|
+ "iso8859-4" => "ISO-8859-4",
|
|
|
797
|
+ "iso8859-5" => "ISO-8859-5",
|
|
|
798
|
+ "iso8859-6" => "ISO-8859-6",
|
|
|
799
|
+ "iso8859-7" => "ISO-8859-7",
|
|
|
800
|
+ "iso8859-8" => "ISO-8859-8",
|
|
|
801
|
+ "iso8859-9" => "ISO-8859-9",
|
|
|
802
|
+ "iso8859-10" => "ISO-8859-10",
|
|
|
803
|
+ "iso8859-11" => "ISO-8859-11",
|
|
|
804
|
+ "iso8859-13" => "ISO-8859-13",
|
|
|
805
|
+ "iso8859-14" => "ISO-8859-14",
|
|
|
806
|
+ "iso8859-15" => "ISO-8859-15",
|
|
|
807
|
+ "iso_8859-1:1987" => "ISO-8859-1",
|
|
|
808
|
+ "iso-ir-100" => "ISO-8859-1",
|
|
|
809
|
+ "l1" => "ISO-8859-1",
|
|
|
810
|
+ "ibm819" => "ISO-8859-1",
|
|
|
811
|
+ "cp819" => "ISO-8859-1",
|
|
|
812
|
+ "csisolatin1" => "ISO-8859-1",
|
|
|
813
|
+ //
|
|
|
814
|
+ // Aliases for ISO-8859-2
|
|
|
815
|
+ //
|
|
|
816
|
+ "latin2" => "ISO-8859-2",
|
|
|
817
|
+ "iso_8859-2" => "ISO-8859-2",
|
|
|
818
|
+ "iso_8859-2:1987" => "ISO-8859-2",
|
|
|
819
|
+ "iso-ir-101" => "ISO-8859-2",
|
|
|
820
|
+ "l2" => "ISO-8859-2",
|
|
|
821
|
+ "csisolatin2" => "ISO-8859-2",
|
|
|
822
|
+ //
|
|
|
823
|
+ // Aliases for ISO-8859-3
|
|
|
824
|
+ //
|
|
|
825
|
+ "latin3" => "ISO-8859-3",
|
|
|
826
|
+ "iso_8859-3" => "ISO-8859-3",
|
|
|
827
|
+ "iso_8859-3:1988" => "ISO-8859-3",
|
|
|
828
|
+ "iso-ir-109" => "ISO-8859-3",
|
|
|
829
|
+ "l3" => "ISO-8859-3",
|
|
|
830
|
+ "csisolatin3" => "ISO-8859-3",
|
|
|
831
|
+ //
|
|
|
832
|
+ // Aliases for ISO-8859-4
|
|
|
833
|
+ //
|
|
|
834
|
+ "latin4" => "ISO-8859-4",
|
|
|
835
|
+ "iso_8859-4" => "ISO-8859-4",
|
|
|
836
|
+ "iso_8859-4:1988" => "ISO-8859-4",
|
|
|
837
|
+ "iso-ir-110" => "ISO-8859-4",
|
|
|
838
|
+ "l4" => "ISO-8859-4",
|
|
|
839
|
+ "csisolatin4" => "ISO-8859-4",
|
|
|
840
|
+ //
|
|
|
841
|
+ // Aliases for ISO-8859-5
|
|
|
842
|
+ //
|
|
|
843
|
+ "cyrillic" => "ISO-8859-5",
|
|
|
844
|
+ "iso_8859-5" => "ISO-8859-5",
|
|
|
845
|
+ "iso_8859-5:1988" => "ISO-8859-5",
|
|
|
846
|
+ "iso-ir-144" => "ISO-8859-5",
|
|
|
847
|
+ "csisolatincyrillic" => "ISO-8859-5",
|
|
|
848
|
+ //
|
|
|
849
|
+ // Aliases for ISO-8859-6
|
|
|
850
|
+ //
|
|
|
851
|
+ "arabic" => "ISO-8859-6",
|
|
|
852
|
+ "iso_8859-6" => "ISO-8859-6",
|
|
|
853
|
+ "iso_8859-6:1987" => "ISO-8859-6",
|
|
|
854
|
+ "iso-ir-127" => "ISO-8859-6",
|
|
|
855
|
+ "ecma-114" => "ISO-8859-6",
|
|
|
856
|
+ "asmo-708" => "ISO-8859-6",
|
|
|
857
|
+ "csisolatinarabic" => "ISO-8859-6",
|
|
|
858
|
+ //
|
|
|
859
|
+ // Aliases for ISO-8859-6-I
|
|
|
860
|
+ //
|
|
|
861
|
+ "csiso88596i" => "ISO-8859-6-I",
|
|
|
862
|
+ //
|
|
|
863
|
+ // Aliases for ISO-8859-6-E",
|
|
|
864
|
+ //
|
|
|
865
|
+ "csiso88596e" => "ISO-8859-6-E",
|
|
|
866
|
+ //
|
|
|
867
|
+ // Aliases for ISO-8859-7",
|
|
|
868
|
+ //
|
|
|
869
|
+ "greek" => "ISO-8859-7",
|
|
|
870
|
+ "greek8" => "ISO-8859-7",
|
|
|
871
|
+ "sun_eu_greek" => "ISO-8859-7",
|
|
|
872
|
+ "iso_8859-7" => "ISO-8859-7",
|
|
|
873
|
+ "iso_8859-7:1987" => "ISO-8859-7",
|
|
|
874
|
+ "iso-ir-126" => "ISO-8859-7",
|
|
|
875
|
+ "elot_928" => "ISO-8859-7",
|
|
|
876
|
+ "ecma-118" => "ISO-8859-7",
|
|
|
877
|
+ "csisolatingreek" => "ISO-8859-7",
|
|
|
878
|
+ //
|
|
|
879
|
+ // Aliases for ISO-8859-8",
|
|
|
880
|
+ //
|
|
|
881
|
+ "hebrew" => "ISO-8859-8",
|
|
|
882
|
+ "iso_8859-8" => "ISO-8859-8",
|
|
|
883
|
+ "visual" => "ISO-8859-8",
|
|
|
884
|
+ "iso_8859-8:1988" => "ISO-8859-8",
|
|
|
885
|
+ "iso-ir-138" => "ISO-8859-8",
|
|
|
886
|
+ "csisolatinhebrew" => "ISO-8859-8",
|
|
|
887
|
+ //
|
|
|
888
|
+ // Aliases for ISO-8859-8-I",
|
|
|
889
|
+ //
|
|
|
890
|
+ "csiso88598i" => "ISO-8859-8-I",
|
|
|
891
|
+ "iso-8859-8i" => "ISO-8859-8-I",
|
|
|
892
|
+ "logical" => "ISO-8859-8-I",
|
|
|
893
|
+ //
|
|
|
894
|
+ // Aliases for ISO-8859-8-E",
|
|
|
895
|
+ //
|
|
|
896
|
+ "csiso88598e" => "ISO-8859-8-E",
|
|
|
897
|
+ //
|
|
|
898
|
+ // Aliases for ISO-8859-9",
|
|
|
899
|
+ //
|
|
|
900
|
+ "latin5" => "ISO-8859-9",
|
|
|
901
|
+ "iso_8859-9" => "ISO-8859-9",
|
|
|
902
|
+ "iso_8859-9:1989" => "ISO-8859-9",
|
|
|
903
|
+ "iso-ir-148" => "ISO-8859-9",
|
|
|
904
|
+ "l5" => "ISO-8859-9",
|
|
|
905
|
+ "csisolatin5" => "ISO-8859-9",
|
|
|
906
|
+ //
|
|
|
907
|
+ // Aliases for UTF-8",
|
|
|
908
|
+ //
|
|
|
909
|
+ "unicode-1-1-utf-8" => "UTF-8",
|
|
|
910
|
+ // nl_langinfo(CODESET) in HP/UX returns 'utf8' under UTF-8 locales",
|
|
|
911
|
+ "utf8" => "UTF-8",
|
|
|
912
|
+ //
|
|
|
913
|
+ // Aliases for Shift_JIS",
|
|
|
914
|
+ //
|
|
|
915
|
+ "x-sjis" => "Shift_JIS",
|
|
|
916
|
+ "shift-jis" => "Shift_JIS",
|
|
|
917
|
+ "ms_kanji" => "Shift_JIS",
|
|
|
918
|
+ "csshiftjis" => "Shift_JIS",
|
|
|
919
|
+ "windows-31j" => "Shift_JIS",
|
|
|
920
|
+ "cp932" => "Shift_JIS",
|
|
|
921
|
+ "sjis" => "Shift_JIS",
|
|
|
922
|
+ //
|
|
|
923
|
+ // Aliases for EUC_JP",
|
|
|
924
|
+ //
|
|
|
925
|
+ "cseucpkdfmtjapanese" => "EUC-JP",
|
|
|
926
|
+ "x-euc-jp" => "EUC-JP",
|
|
|
927
|
+ //
|
|
|
928
|
+ // Aliases for ISO-2022-JP",
|
|
|
929
|
+ //
|
|
|
930
|
+ "csiso2022jp" => "ISO-2022-JP",
|
|
|
931
|
+ // The following are really not aliases ISO-2022-JP, but sharing the same decoder",
|
|
|
932
|
+ "iso-2022-jp-2" => "ISO-2022-JP",
|
|
|
933
|
+ "csiso2022jp2" => "ISO-2022-JP",
|
|
|
934
|
+ //
|
|
|
935
|
+ // Aliases for Big5",
|
|
|
936
|
+ //
|
|
|
937
|
+ "csbig5" => "Big5",
|
|
|
938
|
+ "cn-big5" => "Big5",
|
|
|
939
|
+ // x-x-big5 is not really a alias for Big5, add it only for MS FrontPage",
|
|
|
940
|
+ "x-x-big5" => "Big5",
|
|
|
941
|
+ // Sun Solaris",
|
|
|
942
|
+ "zh_tw-big5" => "Big5",
|
|
|
943
|
+ //
|
|
|
944
|
+ // Aliases for EUC-KR",
|
|
|
945
|
+ //
|
|
|
946
|
+ "cseuckr" => "EUC-KR",
|
|
|
947
|
+ "ks_c_5601-1987" => "EUC-KR",
|
|
|
948
|
+ "iso-ir-149" => "EUC-KR",
|
|
|
949
|
+ "ks_c_5601-1989" => "EUC-KR",
|
|
|
950
|
+ "ksc_5601" => "EUC-KR",
|
|
|
951
|
+ "ksc5601" => "EUC-KR",
|
|
|
952
|
+ "korean" => "EUC-KR",
|
|
|
953
|
+ "csksc56011987" => "EUC-KR",
|
|
|
954
|
+ "5601" => "EUC-KR",
|
|
|
955
|
+ "windows-949" => "EUC-KR",
|
|
|
956
|
+ //
|
|
|
957
|
+ // Aliases for GB2312",
|
|
|
958
|
+ //
|
|
|
959
|
+ // The following are really not aliases GB2312, add them only for MS FrontPage",
|
|
|
960
|
+ "gb_2312-80" => "GB2312",
|
|
|
961
|
+ "iso-ir-58" => "GB2312",
|
|
|
962
|
+ "chinese" => "GB2312",
|
|
|
963
|
+ "csiso58gb231280" => "GB2312",
|
|
|
964
|
+ "csgb2312" => "GB2312",
|
|
|
965
|
+ "zh_cn.euc" => "GB2312",
|
|
|
966
|
+ // Sun Solaris",
|
|
|
967
|
+ "gb_2312" => "GB2312",
|
|
|
968
|
+ //
|
|
|
969
|
+ // Aliases for windows-125x ",
|
|
|
970
|
+ //
|
|
|
971
|
+ "x-cp1250" => "windows-1250",
|
|
|
972
|
+ "x-cp1251" => "windows-1251",
|
|
|
973
|
+ "x-cp1252" => "windows-1252",
|
|
|
974
|
+ "x-cp1253" => "windows-1253",
|
|
|
975
|
+ "x-cp1254" => "windows-1254",
|
|
|
976
|
+ "x-cp1255" => "windows-1255",
|
|
|
977
|
+ "x-cp1256" => "windows-1256",
|
|
|
978
|
+ "x-cp1257" => "windows-1257",
|
|
|
979
|
+ "x-cp1258" => "windows-1258",
|
|
|
980
|
+ //
|
|
|
981
|
+ // Aliases for windows-874 ",
|
|
|
982
|
+ //
|
|
|
983
|
+ "windows-874" => "windows-874",
|
|
|
984
|
+ "ibm874" => "windows-874",
|
|
|
985
|
+ "dos-874" => "windows-874",
|
|
|
986
|
+ //
|
|
|
987
|
+ // Aliases for macintosh",
|
|
|
988
|
+ //
|
|
|
989
|
+ "macintosh" => "macintosh",
|
|
|
990
|
+ "x-mac-roman" => "macintosh",
|
|
|
991
|
+ "mac" => "macintosh",
|
|
|
992
|
+ "csmacintosh" => "macintosh",
|
|
|
993
|
+ //
|
|
|
994
|
+ // Aliases for IBM866",
|
|
|
995
|
+ //
|
|
|
996
|
+ "cp866" => "IBM866",
|
|
|
997
|
+ "cp-866" => "IBM866",
|
|
|
998
|
+ "866" => "IBM866",
|
|
|
999
|
+ "csibm866" => "IBM866",
|
|
|
1000
|
+ //
|
|
|
1001
|
+ // Aliases for IBM850",
|
|
|
1002
|
+ //
|
|
|
1003
|
+ "cp850" => "IBM850",
|
|
|
1004
|
+ "850" => "IBM850",
|
|
|
1005
|
+ "csibm850" => "IBM850",
|
|
|
1006
|
+ //
|
|
|
1007
|
+ // Aliases for IBM852",
|
|
|
1008
|
+ //
|
|
|
1009
|
+ "cp852" => "IBM852",
|
|
|
1010
|
+ "852" => "IBM852",
|
|
|
1011
|
+ "csibm852" => "IBM852",
|
|
|
1012
|
+ //
|
|
|
1013
|
+ // Aliases for IBM855",
|
|
|
1014
|
+ //
|
|
|
1015
|
+ "cp855" => "IBM855",
|
|
|
1016
|
+ "855" => "IBM855",
|
|
|
1017
|
+ "csibm855" => "IBM855",
|
|
|
1018
|
+ //
|
|
|
1019
|
+ // Aliases for IBM857",
|
|
|
1020
|
+ //
|
|
|
1021
|
+ "cp857" => "IBM857",
|
|
|
1022
|
+ "857" => "IBM857",
|
|
|
1023
|
+ "csibm857" => "IBM857",
|
|
|
1024
|
+ //
|
|
|
1025
|
+ // Aliases for IBM862",
|
|
|
1026
|
+ //
|
|
|
1027
|
+ "cp862" => "IBM862",
|
|
|
1028
|
+ "862" => "IBM862",
|
|
|
1029
|
+ "csibm862" => "IBM862",
|
|
|
1030
|
+ //
|
|
|
1031
|
+ // Aliases for IBM864",
|
|
|
1032
|
+ //
|
|
|
1033
|
+ "cp864" => "IBM864",
|
|
|
1034
|
+ "864" => "IBM864",
|
|
|
1035
|
+ "csibm864" => "IBM864",
|
|
|
1036
|
+ "ibm-864" => "IBM864",
|
|
|
1037
|
+ //
|
|
|
1038
|
+ // Aliases for T.61-8bit",
|
|
|
1039
|
+ //
|
|
|
1040
|
+ "t.61" => "T.61-8bit",
|
|
|
1041
|
+ "iso-ir-103" => "T.61-8bit",
|
|
|
1042
|
+ "csiso103t618bit" => "T.61-8bit",
|
|
|
1043
|
+ //
|
|
|
1044
|
+ // Aliases for UTF-7",
|
|
|
1045
|
+ //
|
|
|
1046
|
+ "x-unicode-2-0-utf-7" => "UTF-7",
|
|
|
1047
|
+ "unicode-2-0-utf-7" => "UTF-7",
|
|
|
1048
|
+ "unicode-1-1-utf-7" => "UTF-7",
|
|
|
1049
|
+ "csunicode11utf7" => "UTF-7",
|
|
|
1050
|
+ //
|
|
|
1051
|
+ // Aliases for ISO-10646-UCS-2",
|
|
|
1052
|
+ //
|
|
|
1053
|
+ "csunicode" => "UTF-16BE",
|
|
|
1054
|
+ "csunicode11" => "UTF-16BE",
|
|
|
1055
|
+ "iso-10646-ucs-basic" => "UTF-16BE",
|
|
|
1056
|
+ "csunicodeascii" => "UTF-16BE",
|
|
|
1057
|
+ "iso-10646-unicode-latin1" => "UTF-16BE",
|
|
|
1058
|
+ "csunicodelatin1" => "UTF-16BE",
|
|
|
1059
|
+ "iso-10646" => "UTF-16BE",
|
|
|
1060
|
+ "iso-10646-j-1" => "UTF-16BE",
|
|
|
1061
|
+ //
|
|
|
1062
|
+ // Aliases for ISO-8859-10",
|
|
|
1063
|
+ //
|
|
|
1064
|
+ "latin6" => "ISO-8859-10",
|
|
|
1065
|
+ "iso-ir-157" => "ISO-8859-10",
|
|
|
1066
|
+ "l6" => "ISO-8859-10",
|
|
|
1067
|
+ // Currently .properties cannot handle : in key",
|
|
|
1068
|
+ //iso_8859-10:1992" => "ISO-8859-10",
|
|
|
1069
|
+ "csisolatin6" => "ISO-8859-10",
|
|
|
1070
|
+ //
|
|
|
1071
|
+ // Aliases for ISO-8859-15",
|
|
|
1072
|
+ //
|
|
|
1073
|
+ "iso_8859-15" => "ISO-8859-15",
|
|
|
1074
|
+ "csisolatin9" => "ISO-8859-15",
|
|
|
1075
|
+ "l9" => "ISO-8859-15",
|
|
|
1076
|
+ //
|
|
|
1077
|
+ // Aliases for ISO-IR-111",
|
|
|
1078
|
+ //
|
|
|
1079
|
+ "ecma-cyrillic" => "ISO-IR-111",
|
|
|
1080
|
+ "csiso111ecmacyrillic" => "ISO-IR-111",
|
|
|
1081
|
+ //
|
|
|
1082
|
+ // Aliases for ISO-2022-KR",
|
|
|
1083
|
+ //
|
|
|
1084
|
+ "csiso2022kr" => "ISO-2022-KR",
|
|
|
1085
|
+ //
|
|
|
1086
|
+ // Aliases for VISCII",
|
|
|
1087
|
+ //
|
|
|
1088
|
+ "csviscii" => "VISCII",
|
|
|
1089
|
+ //
|
|
|
1090
|
+ // Aliases for x-euc-tw",
|
|
|
1091
|
+ //
|
|
|
1092
|
+ "zh_tw-euc" => "x-euc-tw",
|
|
|
1093
|
+ //
|
|
|
1094
|
+ // Following names appears in unix nl_langinfo(CODESET)",
|
|
|
1095
|
+ // They can be compiled as platform specific if necessary",
|
|
|
1096
|
+ // DONT put things here if it does not look generic enough (like hp15CN)",
|
|
|
1097
|
+ //
|
|
|
1098
|
+ "iso88591" => "ISO-8859-1",
|
|
|
1099
|
+ "iso88592" => "ISO-8859-2",
|
|
|
1100
|
+ "iso88593" => "ISO-8859-3",
|
|
|
1101
|
+ "iso88594" => "ISO-8859-4",
|
|
|
1102
|
+ "iso88595" => "ISO-8859-5",
|
|
|
1103
|
+ "iso88596" => "ISO-8859-6",
|
|
|
1104
|
+ "iso88597" => "ISO-8859-7",
|
|
|
1105
|
+ "iso88598" => "ISO-8859-8",
|
|
|
1106
|
+ "iso88599" => "ISO-8859-9",
|
|
|
1107
|
+ "iso885910" => "ISO-8859-10",
|
|
|
1108
|
+ "iso885911" => "ISO-8859-11",
|
|
|
1109
|
+ "iso885912" => "ISO-8859-12",
|
|
|
1110
|
+ "iso885913" => "ISO-8859-13",
|
|
|
1111
|
+ "iso885914" => "ISO-8859-14",
|
|
|
1112
|
+ "iso885915" => "ISO-8859-15",
|
|
|
1113
|
+ "cp1250" => "windows-1250",
|
|
|
1114
|
+ "cp1251" => "windows-1251",
|
|
|
1115
|
+ "cp1252" => "windows-1252",
|
|
|
1116
|
+ "cp1253" => "windows-1253",
|
|
|
1117
|
+ "cp1254" => "windows-1254",
|
|
|
1118
|
+ "cp1255" => "windows-1255",
|
|
|
1119
|
+ "cp1256" => "windows-1256",
|
|
|
1120
|
+ "cp1257" => "windows-1257",
|
|
|
1121
|
+ "cp1258" => "windows-1258",
|
|
|
1122
|
+ "x-gbk" => "gbk",
|
|
|
1123
|
+ "windows-936" => "gbk",
|
|
|
1124
|
+ "ansi-1251" => "windows-1251",
|
|
|
1125
|
+ ];
|
|
|
1126
|
+
|
|
|
1127
|
+ /**
|
|
|
1128
|
+ * Returns proper encoding mapping, if exsists. If it doesn't, return unchanged $encoding
|
|
|
1129
|
+ * @param string|null $encoding
|
|
|
1130
|
+ * @param string|null $fallback
|
|
|
1131
|
+ *
|
|
|
1132
|
+ * @return string
|
|
|
1133
|
+ */
|
|
|
1134
|
+ public static function get($encoding, string $fallback = null): string {
|
|
|
1135
|
+ if (isset(self::$aliases[strtolower($encoding ?? '')])) {
|
|
|
1136
|
+ return self::$aliases[strtolower($encoding ?? '')];
|
|
|
1137
|
+ }
|
|
|
1138
|
+ return $fallback !== null ? $fallback : $encoding;
|
|
|
1139
|
+ }
|
|
|
1140
|
+
|
|
|
1141
|
+} |