作者 lyh
@@ -780,13 +780,30 @@ class ProjectUpdate extends Command @@ -780,13 +780,30 @@ class ProjectUpdate extends Command
780 $image = $item['images'] ?? ''; 780 $image = $item['images'] ?? '';
781 } 781 }
782 $new_img = $this->source_download($image, $project_id, $domain, $web_url_domain, $home_url); 782 $new_img = $this->source_download($image, $project_id, $domain, $web_url_domain, $home_url);
  783 + //描述
  784 + if (isset($item['description'])) {
  785 + //匹配描述资源
  786 + $source_list = $this->html_preg($item['description'], $project_id, $domain, $web_url_domain, $home_url);
  787 + if ($source_list) {
  788 + foreach ($source_list as $vs) {
  789 + if ($vs['download']) {
  790 + //需要下载资源
  791 + $down_url = $this->source_download($vs['url_complete'], $project_id, $domain, $web_url_domain, $home_url);
  792 + } else {
  793 + //已经下载过资源
  794 + $down_url = $vs['url_complete'];
  795 + }
  796 + $item['description'] = str_replace($vs['url'], $down_url, $item['description']);
  797 + }
  798 + }
  799 + }
783 $parent_id = $model->addReturnId([ 800 $parent_id = $model->addReturnId([
784 'project_id' => $project_id, 801 'project_id' => $project_id,
785 'title' => $item['name'], 802 'title' => $item['name'],
786 'image' => $new_img, 803 'image' => $new_img,
787 'pid' => $pid, 804 'pid' => $pid,
788 'keywords' => $item['keywords'] ?? '', 805 'keywords' => $item['keywords'] ?? '',
789 - 'describe' => $item['description'] ?? '', 806 + 'describe' => (isset($item['description']) && $item['description']) ? $item['description'] : '',
790 'original_id' => $item['id'], 807 'original_id' => $item['id'],
791 'route' => $route 808 'route' => $route
792 ]); 809 ]);
@@ -978,4 +995,101 @@ class ProjectUpdate extends Command @@ -978,4 +995,101 @@ class ProjectUpdate extends Command
978 return $key . $i; 995 return $key . $i;
979 } 996 }
980 } 997 }
  998 +
  999 + //正则匹配html资源
  1000 + protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
  1001 + {
  1002 + $source = [];
  1003 +
  1004 + if (!$html) {
  1005 + return $source;
  1006 + }
  1007 +
  1008 + //image
  1009 + preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
  1010 + $img = $result_img[2] ?? [];
  1011 + foreach ($img as $vi) {
  1012 + $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url);
  1013 + if ($check_vi && (!in_array($check_vi, $source))) {
  1014 + $source[] = $check_vi;
  1015 + }
  1016 + }
  1017 +
  1018 + //video
  1019 + preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
  1020 + $video = $result_video[2] ?? [];
  1021 + foreach ($video as $vv) {
  1022 + $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url);
  1023 + if ($check_vv && (!in_array($check_vv, $source))) {
  1024 + $source[] = $check_vv;
  1025 + }
  1026 + }
  1027 + preg_match_all('/<video\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video_2);
  1028 + $video_2 = $result_video_2[2] ?? [];
  1029 + foreach ($video_2 as $vv2) {
  1030 + $check_vv2 = $this->url_check($vv2, $project_id, $domain, $web_url_domain, $home_url);
  1031 + if ($check_vv2 && (!in_array($check_vv2, $source))) {
  1032 + $source[] = $check_vv2;
  1033 + }
  1034 + }
  1035 +
  1036 + //a标签下载资源
  1037 + preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a);
  1038 + $down = $result_a[2] ?? [];
  1039 + foreach ($down as $vd) {
  1040 + $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url);
  1041 + if ($check_vd && (!in_array($check_vd, $source))) {
  1042 + $source[] = $check_vd;
  1043 + }
  1044 + }
  1045 +
  1046 + return $source;
  1047 + }
  1048 +
  1049 + //判断资源是否需要下载
  1050 + protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
  1051 + {
  1052 + $url = trim($url);
  1053 + if ($url) {
  1054 + $url = str_replace('&quot;', '', $url);
  1055 + $arr = parse_url($url);
  1056 + $scheme = $arr['scheme'] ?? '';
  1057 + $host = $arr['host'] ?? '';
  1058 + $path = $arr['path'] ?? '';
  1059 + $query = $arr['query'] ?? '';
  1060 +
  1061 + $path_arr = explode('.', $path);
  1062 + $path_end = end($path_arr);
  1063 + if (
  1064 + (empty($scheme) || $scheme == 'https' || $scheme == 'http')
  1065 + && (empty($host) || (strpos($web_url_domain, $host) !== false) || (strpos($home_url, $host) !== false))
  1066 + && $path
  1067 + && (substr($path, 0, 1) == '/')
  1068 + && (strpos($path, '.') !== false)
  1069 + && (strpos($path_end, 'html') === false)
  1070 + && (strpos($path_end, 'php') === false)
  1071 + && (strpos($path_end, 'com') === false)
  1072 + && (strpos($path_end, 'xml') === false)
  1073 + ) {
  1074 + $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
  1075 + if (!$source) {
  1076 + return [
  1077 + 'download' => true,
  1078 + 'url' => $url,
  1079 + 'url_complete' => ($scheme ?: 'https') . '://' . $domain . $path . ($query ? '?' . $query : '')
  1080 + ];
  1081 + } else {
  1082 + return [
  1083 + 'download' => false,
  1084 + 'url' => $url,
  1085 + 'url_complete' => $source['target']
  1086 + ];
  1087 + }
  1088 + } else {
  1089 + return false;
  1090 + }
  1091 + } else {
  1092 + return false;
  1093 + }
  1094 + }
981 } 1095 }