Merge branch 'master' of http://47.244.231.31:8099/zhl/globalso-v6
正在显示
3 个修改的文件
包含
66 行增加
和
25 行删除
| @@ -75,10 +75,27 @@ class HtmlCollect extends Command | @@ -75,10 +75,27 @@ class HtmlCollect extends Command | ||
| 75 | $collect_info->status = CollectTask::STATUS_ING; | 75 | $collect_info->status = CollectTask::STATUS_ING; |
| 76 | $collect_info->save(); | 76 | $collect_info->save(); |
| 77 | 77 | ||
| 78 | + //获取站点正式和测试域名 | ||
| 79 | + $web_url_domain = $collect_info->domain; | ||
| 80 | + $home_url = $collect_info->domain; | ||
| 81 | + $url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text'; | ||
| 82 | + $data_config = http_get($url_web_config, ['charset' => 'UTF-8']); | ||
| 83 | + if ($data_config) { | ||
| 84 | + $web_url_arr = parse_url($data_config['web_url_domain']); | ||
| 85 | + if (isset($web_url_arr['host'])) { | ||
| 86 | + $web_url_domain = $web_url_arr['host']; | ||
| 87 | + } | ||
| 88 | + | ||
| 89 | + $home_url_arr = parse_url($data_config['home_url']); | ||
| 90 | + if (isset($home_url_arr['host'])) { | ||
| 91 | + $home_url = $home_url_arr['host']; | ||
| 92 | + } | ||
| 93 | + } | ||
| 94 | + | ||
| 78 | //采集html页面,下载资源到本地并替换 | 95 | //采集html页面,下载资源到本地并替换 |
| 79 | try { | 96 | try { |
| 80 | $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route); | 97 | $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route); |
| 81 | - $source_list = $this->html_preg($html, $project_id, $collect_info->domain); | 98 | + $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url); |
| 82 | 99 | ||
| 83 | if ($source_list) { | 100 | if ($source_list) { |
| 84 | $html = $this->upload_source($html, $source_list, $project_id); | 101 | $html = $this->upload_source($html, $source_list, $project_id); |
| @@ -164,7 +181,7 @@ class HtmlCollect extends Command | @@ -164,7 +181,7 @@ class HtmlCollect extends Command | ||
| 164 | } | 181 | } |
| 165 | 182 | ||
| 166 | //正则匹配html资源 | 183 | //正则匹配html资源 |
| 167 | - protected function html_preg($html, $project_id, $domain) | 184 | + protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url) |
| 168 | { | 185 | { |
| 169 | $source = []; | 186 | $source = []; |
| 170 | 187 | ||
| @@ -176,7 +193,7 @@ class HtmlCollect extends Command | @@ -176,7 +193,7 @@ class HtmlCollect extends Command | ||
| 176 | preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img); | 193 | preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img); |
| 177 | $img = $result_img[2] ?? []; | 194 | $img = $result_img[2] ?? []; |
| 178 | foreach ($img as $vi) { | 195 | foreach ($img as $vi) { |
| 179 | - $check_vi = $this->url_check($vi, $project_id, $domain); | 196 | + $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url); |
| 180 | $check_vi && $source[] = $check_vi; | 197 | $check_vi && $source[] = $check_vi; |
| 181 | } | 198 | } |
| 182 | 199 | ||
| @@ -184,7 +201,7 @@ class HtmlCollect extends Command | @@ -184,7 +201,7 @@ class HtmlCollect extends Command | ||
| 184 | preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js); | 201 | preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js); |
| 185 | $js = $result_js[2] ?? []; | 202 | $js = $result_js[2] ?? []; |
| 186 | foreach ($js as $vj) { | 203 | foreach ($js as $vj) { |
| 187 | - $check_vj = $this->url_check($vj, $project_id, $domain); | 204 | + $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url); |
| 188 | $check_vj && $source[] = $check_vj; | 205 | $check_vj && $source[] = $check_vj; |
| 189 | } | 206 | } |
| 190 | 207 | ||
| @@ -192,7 +209,7 @@ class HtmlCollect extends Command | @@ -192,7 +209,7 @@ class HtmlCollect extends Command | ||
| 192 | preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video); | 209 | preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video); |
| 193 | $video = $result_video[2] ?? []; | 210 | $video = $result_video[2] ?? []; |
| 194 | foreach ($video as $vv) { | 211 | foreach ($video as $vv) { |
| 195 | - $check_vv = $this->url_check($vv, $project_id, $domain); | 212 | + $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url); |
| 196 | $check_vv && $source[] = $check_vv; | 213 | $check_vv && $source[] = $check_vv; |
| 197 | } | 214 | } |
| 198 | 215 | ||
| @@ -200,7 +217,7 @@ class HtmlCollect extends Command | @@ -200,7 +217,7 @@ class HtmlCollect extends Command | ||
| 200 | preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css); | 217 | preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css); |
| 201 | $css = $result_css[2] ?? []; | 218 | $css = $result_css[2] ?? []; |
| 202 | foreach ($css as $vc) { | 219 | foreach ($css as $vc) { |
| 203 | - $check_vc = $this->url_check($vc, $project_id, $domain); | 220 | + $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url); |
| 204 | $check_vc && $source[] = $check_vc; | 221 | $check_vc && $source[] = $check_vc; |
| 205 | } | 222 | } |
| 206 | 223 | ||
| @@ -208,7 +225,7 @@ class HtmlCollect extends Command | @@ -208,7 +225,7 @@ class HtmlCollect extends Command | ||
| 208 | preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b); | 225 | preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b); |
| 209 | $css_b = $result_css_b[1] ?? []; | 226 | $css_b = $result_css_b[1] ?? []; |
| 210 | foreach ($css_b as $vc_b) { | 227 | foreach ($css_b as $vc_b) { |
| 211 | - $check_vc_b = $this->url_check($vc_b, $project_id, $domain); | 228 | + $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url); |
| 212 | $check_vc_b && $source[] = $check_vc_b; | 229 | $check_vc_b && $source[] = $check_vc_b; |
| 213 | } | 230 | } |
| 214 | 231 | ||
| @@ -217,7 +234,7 @@ class HtmlCollect extends Command | @@ -217,7 +234,7 @@ class HtmlCollect extends Command | ||
| 217 | } | 234 | } |
| 218 | 235 | ||
| 219 | //判断资源是否需要下载 | 236 | //判断资源是否需要下载 |
| 220 | - protected function url_check($url, $project_id, $domain) | 237 | + protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url) |
| 221 | { | 238 | { |
| 222 | if ($url) { | 239 | if ($url) { |
| 223 | $arr = parse_url($url); | 240 | $arr = parse_url($url); |
| @@ -227,11 +244,10 @@ class HtmlCollect extends Command | @@ -227,11 +244,10 @@ class HtmlCollect extends Command | ||
| 227 | $query = $arr['query'] ?? ''; | 244 | $query = $arr['query'] ?? ''; |
| 228 | 245 | ||
| 229 | if ( | 246 | if ( |
| 230 | - (strpos($host, '.globalso.') === false) && | ||
| 231 | - (strpos($host, '.goodao.') === false) && | ||
| 232 | - $path && (strpos($path, '.') !== false) | 247 | + (empty($host) || $host == $web_url_domain || $host == $home_url) |
| 248 | + && $path | ||
| 249 | + && (strpos($path, '.') !== false) | ||
| 233 | ) { | 250 | ) { |
| 234 | - | ||
| 235 | $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); | 251 | $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); |
| 236 | if (!$source) { | 252 | if (!$source) { |
| 237 | return [ | 253 | return [ |
| @@ -290,6 +306,9 @@ class HtmlCollect extends Command | @@ -290,6 +306,9 @@ class HtmlCollect extends Command | ||
| 290 | if (!$vcs) { | 306 | if (!$vcs) { |
| 291 | continue; | 307 | continue; |
| 292 | } | 308 | } |
| 309 | + if (strpos($vcs, '.') === false) { | ||
| 310 | + continue; | ||
| 311 | + } | ||
| 293 | 312 | ||
| 294 | $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first(); | 313 | $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first(); |
| 295 | if ($source_info) { | 314 | if ($source_info) { |
| @@ -75,10 +75,26 @@ class HtmlLanguageCollect extends Command | @@ -75,10 +75,26 @@ class HtmlLanguageCollect extends Command | ||
| 75 | $collect_info->status = CollectTask::STATUS_ING; | 75 | $collect_info->status = CollectTask::STATUS_ING; |
| 76 | $collect_info->save(); | 76 | $collect_info->save(); |
| 77 | 77 | ||
| 78 | + $web_url_domain = $collect_info->domain; | ||
| 79 | + $home_url = $collect_info->domain; | ||
| 80 | + $url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text'; | ||
| 81 | + $data_config = http_get($url_web_config, ['charset' => 'UTF-8']); | ||
| 82 | + if ($data_config) { | ||
| 83 | + $web_url_arr = parse_url($data_config['web_url_domain']); | ||
| 84 | + if (isset($web_url_arr['host'])) { | ||
| 85 | + $web_url_domain = $web_url_arr['host']; | ||
| 86 | + } | ||
| 87 | + | ||
| 88 | + $home_url_arr = parse_url($data_config['home_url']); | ||
| 89 | + if (isset($home_url_arr['host'])) { | ||
| 90 | + $home_url = $home_url_arr['host']; | ||
| 91 | + } | ||
| 92 | + } | ||
| 93 | + | ||
| 78 | //采集html页面,下载资源到本地并替换 | 94 | //采集html页面,下载资源到本地并替换 |
| 79 | try { | 95 | try { |
| 80 | $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route); | 96 | $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route); |
| 81 | - $source_list = $this->html_preg($html, $project_id, $collect_info->domain); | 97 | + $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url); |
| 82 | 98 | ||
| 83 | if ($source_list) { | 99 | if ($source_list) { |
| 84 | $html = $this->upload_source($html, $source_list, $project_id); | 100 | $html = $this->upload_source($html, $source_list, $project_id); |
| @@ -164,7 +180,7 @@ class HtmlLanguageCollect extends Command | @@ -164,7 +180,7 @@ class HtmlLanguageCollect extends Command | ||
| 164 | } | 180 | } |
| 165 | 181 | ||
| 166 | //正则匹配html资源 | 182 | //正则匹配html资源 |
| 167 | - protected function html_preg($html, $project_id, $domain) | 183 | + protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url) |
| 168 | { | 184 | { |
| 169 | $source = []; | 185 | $source = []; |
| 170 | 186 | ||
| @@ -176,7 +192,7 @@ class HtmlLanguageCollect extends Command | @@ -176,7 +192,7 @@ class HtmlLanguageCollect extends Command | ||
| 176 | preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img); | 192 | preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img); |
| 177 | $img = $result_img[2] ?? []; | 193 | $img = $result_img[2] ?? []; |
| 178 | foreach ($img as $vi) { | 194 | foreach ($img as $vi) { |
| 179 | - $check_vi = $this->url_check($vi, $project_id, $domain); | 195 | + $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url); |
| 180 | $check_vi && $source[] = $check_vi; | 196 | $check_vi && $source[] = $check_vi; |
| 181 | } | 197 | } |
| 182 | 198 | ||
| @@ -184,7 +200,7 @@ class HtmlLanguageCollect extends Command | @@ -184,7 +200,7 @@ class HtmlLanguageCollect extends Command | ||
| 184 | preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js); | 200 | preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js); |
| 185 | $js = $result_js[2] ?? []; | 201 | $js = $result_js[2] ?? []; |
| 186 | foreach ($js as $vj) { | 202 | foreach ($js as $vj) { |
| 187 | - $check_vj = $this->url_check($vj, $project_id, $domain); | 203 | + $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url); |
| 188 | $check_vj && $source[] = $check_vj; | 204 | $check_vj && $source[] = $check_vj; |
| 189 | } | 205 | } |
| 190 | 206 | ||
| @@ -192,7 +208,7 @@ class HtmlLanguageCollect extends Command | @@ -192,7 +208,7 @@ class HtmlLanguageCollect extends Command | ||
| 192 | preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video); | 208 | preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video); |
| 193 | $video = $result_video[2] ?? []; | 209 | $video = $result_video[2] ?? []; |
| 194 | foreach ($video as $vv) { | 210 | foreach ($video as $vv) { |
| 195 | - $check_vv = $this->url_check($vv, $project_id, $domain); | 211 | + $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url); |
| 196 | $check_vv && $source[] = $check_vv; | 212 | $check_vv && $source[] = $check_vv; |
| 197 | } | 213 | } |
| 198 | 214 | ||
| @@ -200,7 +216,7 @@ class HtmlLanguageCollect extends Command | @@ -200,7 +216,7 @@ class HtmlLanguageCollect extends Command | ||
| 200 | preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css); | 216 | preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css); |
| 201 | $css = $result_css[2] ?? []; | 217 | $css = $result_css[2] ?? []; |
| 202 | foreach ($css as $vc) { | 218 | foreach ($css as $vc) { |
| 203 | - $check_vc = $this->url_check($vc, $project_id, $domain); | 219 | + $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url); |
| 204 | $check_vc && $source[] = $check_vc; | 220 | $check_vc && $source[] = $check_vc; |
| 205 | } | 221 | } |
| 206 | 222 | ||
| @@ -208,7 +224,7 @@ class HtmlLanguageCollect extends Command | @@ -208,7 +224,7 @@ class HtmlLanguageCollect extends Command | ||
| 208 | preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b); | 224 | preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b); |
| 209 | $css_b = $result_css_b[1] ?? []; | 225 | $css_b = $result_css_b[1] ?? []; |
| 210 | foreach ($css_b as $vc_b) { | 226 | foreach ($css_b as $vc_b) { |
| 211 | - $check_vc_b = $this->url_check($vc_b, $project_id, $domain); | 227 | + $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url); |
| 212 | $check_vc_b && $source[] = $check_vc_b; | 228 | $check_vc_b && $source[] = $check_vc_b; |
| 213 | } | 229 | } |
| 214 | 230 | ||
| @@ -217,7 +233,7 @@ class HtmlLanguageCollect extends Command | @@ -217,7 +233,7 @@ class HtmlLanguageCollect extends Command | ||
| 217 | } | 233 | } |
| 218 | 234 | ||
| 219 | //判断资源是否需要下载 | 235 | //判断资源是否需要下载 |
| 220 | - protected function url_check($url, $project_id, $domain) | 236 | + protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url) |
| 221 | { | 237 | { |
| 222 | if ($url) { | 238 | if ($url) { |
| 223 | $arr = parse_url($url); | 239 | $arr = parse_url($url); |
| @@ -227,11 +243,10 @@ class HtmlLanguageCollect extends Command | @@ -227,11 +243,10 @@ class HtmlLanguageCollect extends Command | ||
| 227 | $query = $arr['query'] ?? ''; | 243 | $query = $arr['query'] ?? ''; |
| 228 | 244 | ||
| 229 | if ( | 245 | if ( |
| 230 | - (strpos($host, '.globalso.') === false) && | ||
| 231 | - (strpos($host, '.goodao.') === false) && | ||
| 232 | - $path && (strpos($path, '.') !== false) | 246 | + (empty($host) || $host == $web_url_domain || $host == $home_url) |
| 247 | + && $path | ||
| 248 | + && (strpos($path, '.') !== false) | ||
| 233 | ) { | 249 | ) { |
| 234 | - | ||
| 235 | $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); | 250 | $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); |
| 236 | if (!$source) { | 251 | if (!$source) { |
| 237 | return [ | 252 | return [ |
| @@ -290,6 +305,9 @@ class HtmlLanguageCollect extends Command | @@ -290,6 +305,9 @@ class HtmlLanguageCollect extends Command | ||
| 290 | if (!$vcs) { | 305 | if (!$vcs) { |
| 291 | continue; | 306 | continue; |
| 292 | } | 307 | } |
| 308 | + if (strpos($vcs, '.') === false) { | ||
| 309 | + continue; | ||
| 310 | + } | ||
| 293 | 311 | ||
| 294 | $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first(); | 312 | $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first(); |
| 295 | if ($source_info) { | 313 | if ($source_info) { |
| @@ -104,7 +104,11 @@ class CosService | @@ -104,7 +104,11 @@ class CosService | ||
| 104 | 'verify_peer_name' => false, | 104 | 'verify_peer_name' => false, |
| 105 | ] | 105 | ] |
| 106 | ]; | 106 | ]; |
| 107 | - $body = file_get_contents($file_url,false,stream_context_create($opts)); | 107 | + try { |
| 108 | + $body = file_get_contents($file_url,false,stream_context_create($opts)); | ||
| 109 | + }catch (\Exception $e){ | ||
| 110 | + return ''; | ||
| 111 | + } | ||
| 108 | 112 | ||
| 109 | try { | 113 | try { |
| 110 | $cosClient->putObject([ | 114 | $cosClient->putObject([ |
-
请 注册 或 登录 后发表评论