作者 赵彬吉
@@ -75,10 +75,27 @@ class HtmlCollect extends Command @@ -75,10 +75,27 @@ class HtmlCollect extends Command
75 $collect_info->status = CollectTask::STATUS_ING; 75 $collect_info->status = CollectTask::STATUS_ING;
76 $collect_info->save(); 76 $collect_info->save();
77 77
  78 + //获取站点正式和测试域名
  79 + $web_url_domain = $collect_info->domain;
  80 + $home_url = $collect_info->domain;
  81 + $url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';
  82 + $data_config = http_get($url_web_config, ['charset' => 'UTF-8']);
  83 + if ($data_config) {
  84 + $web_url_arr = parse_url($data_config['web_url_domain']);
  85 + if (isset($web_url_arr['host'])) {
  86 + $web_url_domain = $web_url_arr['host'];
  87 + }
  88 +
  89 + $home_url_arr = parse_url($data_config['home_url']);
  90 + if (isset($home_url_arr['host'])) {
  91 + $home_url = $home_url_arr['host'];
  92 + }
  93 + }
  94 +
78 //采集html页面,下载资源到本地并替换 95 //采集html页面,下载资源到本地并替换
79 try { 96 try {
80 $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route); 97 $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route);
81 - $source_list = $this->html_preg($html, $project_id, $collect_info->domain); 98 + $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
82 99
83 if ($source_list) { 100 if ($source_list) {
84 $html = $this->upload_source($html, $source_list, $project_id); 101 $html = $this->upload_source($html, $source_list, $project_id);
@@ -164,7 +181,7 @@ class HtmlCollect extends Command @@ -164,7 +181,7 @@ class HtmlCollect extends Command
164 } 181 }
165 182
166 //正则匹配html资源 183 //正则匹配html资源
167 - protected function html_preg($html, $project_id, $domain) 184 + protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
168 { 185 {
169 $source = []; 186 $source = [];
170 187
@@ -176,7 +193,7 @@ class HtmlCollect extends Command @@ -176,7 +193,7 @@ class HtmlCollect extends Command
176 preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img); 193 preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
177 $img = $result_img[2] ?? []; 194 $img = $result_img[2] ?? [];
178 foreach ($img as $vi) { 195 foreach ($img as $vi) {
179 - $check_vi = $this->url_check($vi, $project_id, $domain); 196 + $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url);
180 $check_vi && $source[] = $check_vi; 197 $check_vi && $source[] = $check_vi;
181 } 198 }
182 199
@@ -184,7 +201,7 @@ class HtmlCollect extends Command @@ -184,7 +201,7 @@ class HtmlCollect extends Command
184 preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js); 201 preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js);
185 $js = $result_js[2] ?? []; 202 $js = $result_js[2] ?? [];
186 foreach ($js as $vj) { 203 foreach ($js as $vj) {
187 - $check_vj = $this->url_check($vj, $project_id, $domain); 204 + $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url);
188 $check_vj && $source[] = $check_vj; 205 $check_vj && $source[] = $check_vj;
189 } 206 }
190 207
@@ -192,7 +209,7 @@ class HtmlCollect extends Command @@ -192,7 +209,7 @@ class HtmlCollect extends Command
192 preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video); 209 preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
193 $video = $result_video[2] ?? []; 210 $video = $result_video[2] ?? [];
194 foreach ($video as $vv) { 211 foreach ($video as $vv) {
195 - $check_vv = $this->url_check($vv, $project_id, $domain); 212 + $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url);
196 $check_vv && $source[] = $check_vv; 213 $check_vv && $source[] = $check_vv;
197 } 214 }
198 215
@@ -200,7 +217,7 @@ class HtmlCollect extends Command @@ -200,7 +217,7 @@ class HtmlCollect extends Command
200 preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css); 217 preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css);
201 $css = $result_css[2] ?? []; 218 $css = $result_css[2] ?? [];
202 foreach ($css as $vc) { 219 foreach ($css as $vc) {
203 - $check_vc = $this->url_check($vc, $project_id, $domain); 220 + $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url);
204 $check_vc && $source[] = $check_vc; 221 $check_vc && $source[] = $check_vc;
205 } 222 }
206 223
@@ -208,7 +225,7 @@ class HtmlCollect extends Command @@ -208,7 +225,7 @@ class HtmlCollect extends Command
208 preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b); 225 preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b);
209 $css_b = $result_css_b[1] ?? []; 226 $css_b = $result_css_b[1] ?? [];
210 foreach ($css_b as $vc_b) { 227 foreach ($css_b as $vc_b) {
211 - $check_vc_b = $this->url_check($vc_b, $project_id, $domain); 228 + $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url);
212 $check_vc_b && $source[] = $check_vc_b; 229 $check_vc_b && $source[] = $check_vc_b;
213 } 230 }
214 231
@@ -217,7 +234,7 @@ class HtmlCollect extends Command @@ -217,7 +234,7 @@ class HtmlCollect extends Command
217 } 234 }
218 235
219 //判断资源是否需要下载 236 //判断资源是否需要下载
220 - protected function url_check($url, $project_id, $domain) 237 + protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
221 { 238 {
222 if ($url) { 239 if ($url) {
223 $arr = parse_url($url); 240 $arr = parse_url($url);
@@ -227,11 +244,10 @@ class HtmlCollect extends Command @@ -227,11 +244,10 @@ class HtmlCollect extends Command
227 $query = $arr['query'] ?? ''; 244 $query = $arr['query'] ?? '';
228 245
229 if ( 246 if (
230 - (strpos($host, '.globalso.') === false) &&  
231 - (strpos($host, '.goodao.') === false) &&  
232 - $path && (strpos($path, '.') !== false) 247 + (empty($host) || $host == $web_url_domain || $host == $home_url)
  248 + && $path
  249 + && (strpos($path, '.') !== false)
233 ) { 250 ) {
234 -  
235 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); 251 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
236 if (!$source) { 252 if (!$source) {
237 return [ 253 return [
@@ -290,6 +306,9 @@ class HtmlCollect extends Command @@ -290,6 +306,9 @@ class HtmlCollect extends Command
290 if (!$vcs) { 306 if (!$vcs) {
291 continue; 307 continue;
292 } 308 }
  309 + if (strpos($vcs, '.') === false) {
  310 + continue;
  311 + }
293 312
294 $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first(); 313 $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first();
295 if ($source_info) { 314 if ($source_info) {
@@ -75,10 +75,26 @@ class HtmlLanguageCollect extends Command @@ -75,10 +75,26 @@ class HtmlLanguageCollect extends Command
75 $collect_info->status = CollectTask::STATUS_ING; 75 $collect_info->status = CollectTask::STATUS_ING;
76 $collect_info->save(); 76 $collect_info->save();
77 77
  78 + $web_url_domain = $collect_info->domain;
  79 + $home_url = $collect_info->domain;
  80 + $url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';
  81 + $data_config = http_get($url_web_config, ['charset' => 'UTF-8']);
  82 + if ($data_config) {
  83 + $web_url_arr = parse_url($data_config['web_url_domain']);
  84 + if (isset($web_url_arr['host'])) {
  85 + $web_url_domain = $web_url_arr['host'];
  86 + }
  87 +
  88 + $home_url_arr = parse_url($data_config['home_url']);
  89 + if (isset($home_url_arr['host'])) {
  90 + $home_url = $home_url_arr['host'];
  91 + }
  92 + }
  93 +
78 //采集html页面,下载资源到本地并替换 94 //采集html页面,下载资源到本地并替换
79 try { 95 try {
80 $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route); 96 $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route);
81 - $source_list = $this->html_preg($html, $project_id, $collect_info->domain); 97 + $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
82 98
83 if ($source_list) { 99 if ($source_list) {
84 $html = $this->upload_source($html, $source_list, $project_id); 100 $html = $this->upload_source($html, $source_list, $project_id);
@@ -164,7 +180,7 @@ class HtmlLanguageCollect extends Command @@ -164,7 +180,7 @@ class HtmlLanguageCollect extends Command
164 } 180 }
165 181
166 //正则匹配html资源 182 //正则匹配html资源
167 - protected function html_preg($html, $project_id, $domain) 183 + protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
168 { 184 {
169 $source = []; 185 $source = [];
170 186
@@ -176,7 +192,7 @@ class HtmlLanguageCollect extends Command @@ -176,7 +192,7 @@ class HtmlLanguageCollect extends Command
176 preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img); 192 preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
177 $img = $result_img[2] ?? []; 193 $img = $result_img[2] ?? [];
178 foreach ($img as $vi) { 194 foreach ($img as $vi) {
179 - $check_vi = $this->url_check($vi, $project_id, $domain); 195 + $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url);
180 $check_vi && $source[] = $check_vi; 196 $check_vi && $source[] = $check_vi;
181 } 197 }
182 198
@@ -184,7 +200,7 @@ class HtmlLanguageCollect extends Command @@ -184,7 +200,7 @@ class HtmlLanguageCollect extends Command
184 preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js); 200 preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js);
185 $js = $result_js[2] ?? []; 201 $js = $result_js[2] ?? [];
186 foreach ($js as $vj) { 202 foreach ($js as $vj) {
187 - $check_vj = $this->url_check($vj, $project_id, $domain); 203 + $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url);
188 $check_vj && $source[] = $check_vj; 204 $check_vj && $source[] = $check_vj;
189 } 205 }
190 206
@@ -192,7 +208,7 @@ class HtmlLanguageCollect extends Command @@ -192,7 +208,7 @@ class HtmlLanguageCollect extends Command
192 preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video); 208 preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
193 $video = $result_video[2] ?? []; 209 $video = $result_video[2] ?? [];
194 foreach ($video as $vv) { 210 foreach ($video as $vv) {
195 - $check_vv = $this->url_check($vv, $project_id, $domain); 211 + $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url);
196 $check_vv && $source[] = $check_vv; 212 $check_vv && $source[] = $check_vv;
197 } 213 }
198 214
@@ -200,7 +216,7 @@ class HtmlLanguageCollect extends Command @@ -200,7 +216,7 @@ class HtmlLanguageCollect extends Command
200 preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css); 216 preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css);
201 $css = $result_css[2] ?? []; 217 $css = $result_css[2] ?? [];
202 foreach ($css as $vc) { 218 foreach ($css as $vc) {
203 - $check_vc = $this->url_check($vc, $project_id, $domain); 219 + $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url);
204 $check_vc && $source[] = $check_vc; 220 $check_vc && $source[] = $check_vc;
205 } 221 }
206 222
@@ -208,7 +224,7 @@ class HtmlLanguageCollect extends Command @@ -208,7 +224,7 @@ class HtmlLanguageCollect extends Command
208 preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b); 224 preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b);
209 $css_b = $result_css_b[1] ?? []; 225 $css_b = $result_css_b[1] ?? [];
210 foreach ($css_b as $vc_b) { 226 foreach ($css_b as $vc_b) {
211 - $check_vc_b = $this->url_check($vc_b, $project_id, $domain); 227 + $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url);
212 $check_vc_b && $source[] = $check_vc_b; 228 $check_vc_b && $source[] = $check_vc_b;
213 } 229 }
214 230
@@ -217,7 +233,7 @@ class HtmlLanguageCollect extends Command @@ -217,7 +233,7 @@ class HtmlLanguageCollect extends Command
217 } 233 }
218 234
219 //判断资源是否需要下载 235 //判断资源是否需要下载
220 - protected function url_check($url, $project_id, $domain) 236 + protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
221 { 237 {
222 if ($url) { 238 if ($url) {
223 $arr = parse_url($url); 239 $arr = parse_url($url);
@@ -227,11 +243,10 @@ class HtmlLanguageCollect extends Command @@ -227,11 +243,10 @@ class HtmlLanguageCollect extends Command
227 $query = $arr['query'] ?? ''; 243 $query = $arr['query'] ?? '';
228 244
229 if ( 245 if (
230 - (strpos($host, '.globalso.') === false) &&  
231 - (strpos($host, '.goodao.') === false) &&  
232 - $path && (strpos($path, '.') !== false) 246 + (empty($host) || $host == $web_url_domain || $host == $home_url)
  247 + && $path
  248 + && (strpos($path, '.') !== false)
233 ) { 249 ) {
234 -  
235 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); 250 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
236 if (!$source) { 251 if (!$source) {
237 return [ 252 return [
@@ -290,6 +305,9 @@ class HtmlLanguageCollect extends Command @@ -290,6 +305,9 @@ class HtmlLanguageCollect extends Command
290 if (!$vcs) { 305 if (!$vcs) {
291 continue; 306 continue;
292 } 307 }
  308 + if (strpos($vcs, '.') === false) {
  309 + continue;
  310 + }
293 311
294 $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first(); 312 $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first();
295 if ($source_info) { 313 if ($source_info) {
@@ -104,7 +104,11 @@ class CosService @@ -104,7 +104,11 @@ class CosService
104 'verify_peer_name' => false, 104 'verify_peer_name' => false,
105 ] 105 ]
106 ]; 106 ];
107 - $body = file_get_contents($file_url,false,stream_context_create($opts)); 107 + try {
  108 + $body = file_get_contents($file_url,false,stream_context_create($opts));
  109 + }catch (\Exception $e){
  110 + return '';
  111 + }
108 112
109 try { 113 try {
110 $cosClient->putObject([ 114 $cosClient->putObject([