作者 刘锟

合并分支 'akun' 到 'master'

Akun



查看合并请求 !171
@@ -39,14 +39,15 @@ class HtmlCollect extends Command @@ -39,14 +39,15 @@ class HtmlCollect extends Command
39 39
40 public function handle() 40 public function handle()
41 { 41 {
42 - while (true) { 42 +// while (true) {
43 $this->start_collect(); 43 $this->start_collect();
44 - } 44 +// }
45 } 45 }
46 46
47 protected function start_collect() 47 protected function start_collect()
48 { 48 {
49 - $task_id = $this->get_task(); 49 +// $task_id = $this->get_task();
  50 + $task_id = '595_41517';
50 if ($task_id === false) { 51 if ($task_id === false) {
51 //所有项目采集完成 52 //所有项目采集完成
52 sleep(60); 53 sleep(60);
@@ -336,62 +337,25 @@ class HtmlCollect extends Command @@ -336,62 +337,25 @@ class HtmlCollect extends Command
336 ]); 337 ]);
337 $html = str_replace($vs['url'], getImageUrl($new_source), $html); 338 $html = str_replace($vs['url'], getImageUrl($new_source), $html);
338 339
339 - if (substr($new_source, -3, 3) == 'css') {  
340 - // 下载css文件中的资源  
341 - $css_html = curl_c($vs['url_complete'], false);  
342 - preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);  
343 - $css_source = $result_css_source[1] ?? [];  
344 -  
345 - $url_arr = explode('/', $vs['url_complete']);  
346 - $target_arr = explode('/', $new_source);  
347 - foreach ($css_source as $vcs) {  
348 - $vcs = str_replace('"', '', $vcs);  
349 - $vcs_arr = parse_url($vcs);  
350 - if (isset($vcs_arr['domain'])) {  
351 - //不是相对路径,不下载  
352 - continue;  
353 - } 340 + if (substr($new_source, -3, 3) == 'css' || substr($new_source, -2, 2) == 'js') {
354 341
355 - $vcs = $vcs_arr['path'] ?? '';  
356 - if (!$vcs) {  
357 - continue;  
358 - }  
359 - if (strpos($vcs, '.') === false) {  
360 - continue;  
361 - }  
362 - $path_arr = explode('.', $vcs);  
363 - if (in_array(end($path_arr), ['html', 'php', 'com', 'xml'])) {  
364 - continue;  
365 - } 342 + $source_html = curl_c(getImageUrl($new_source), false);
366 343
367 - $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first();  
368 - if ($source_info) {  
369 - //已存在,不下载  
370 - continue;  
371 - }  
372 -  
373 - $url_arr[count($url_arr) - 1] = $vcs;  
374 - $url_css_complete = implode('/', $url_arr);  
375 - $target_arr[count($target_arr) - 1] = $vcs;  
376 - $path = implode('/', $target_arr);  
377 -  
378 - $new_source_css = CosService::uploadRemote($project_id, 'source', $url_css_complete, $path);  
379 - if ($new_source_css) {  
380 - CollectSource::insert([  
381 - 'project_id' => $project_id,  
382 - 'origin' => $vcs,  
383 - 'target' => $new_source_css,  
384 - 'created_at' => date('Y-m-d H:i:s'),  
385 - 'updated_at' => date('Y-m-d H:i:s'),  
386 - ]);  
387 - } 344 + if (substr($new_source, -3, 3) == 'css') {
  345 + preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $source_html, $result_source);
  346 + } else {
  347 + preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source);
388 } 348 }
389 - } elseif (substr($new_source, -2, 2) == 'js') {  
390 - $js_html = curl_c(getImageUrl($new_source), false);  
391 - preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $js_html, $result_js_source);  
392 - $js_source = $result_js_source[1] ?? [];  
393 - if($js_source){  
394 - foreach ($js_source as $vjs) { 349 +
  350 + $js_css_source = $result_source[1] ?? [];
  351 + if ($js_css_source) {
  352 + foreach ($js_css_source as $vjs) {
  353 + if (substr($vjs, 0, 2) == './') {
  354 + //相对路径
  355 + $url_arr = explode('/', $vs['url']);
  356 + $url_arr[count($url_arr) - 1] = substr($vjs, 2);
  357 + $vjs = implode('/', $url_arr);
  358 + }
395 $vjs_result = $this->url_check($vjs, $project_id, $domain, $web_url_domain, $home_url); 359 $vjs_result = $this->url_check($vjs, $project_id, $domain, $web_url_domain, $home_url);
396 if (!$vjs_result) { 360 if (!$vjs_result) {
397 continue; 361 continue;
@@ -407,25 +371,39 @@ class HtmlCollect extends Command @@ -407,25 +371,39 @@ class HtmlCollect extends Command
407 'created_at' => date('Y-m-d H:i:s'), 371 'created_at' => date('Y-m-d H:i:s'),
408 'updated_at' => date('Y-m-d H:i:s'), 372 'updated_at' => date('Y-m-d H:i:s'),
409 ]); 373 ]);
410 - $js_html = str_replace($vjs, getImageUrl($new_vjs), $js_html); 374 + $source_html = str_replace($vjs, getImageUrl($new_vjs), $source_html);
411 } 375 }
412 } else { 376 } else {
413 - $js_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $js_html); 377 + $source_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $source_html);
414 } 378 }
415 } 379 }
416 380
417 - CosService::uploadRemote($project_id, 'source', $new_source, $new_source, $js_html); 381 + CosService::uploadRemote($project_id, 'source', $new_source, $new_source, $source_html);
418 } 382 }
419 } 383 }
420 } 384 }
421 } else { 385 } else {
422 $html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html); 386 $html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html);
423 - if(substr($vs['url_complete'], -2, 2) == 'js'){  
424 - $js_html = curl_c(getImageUrl($vs['url_complete']), false);  
425 - preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $js_html, $result_js_source);  
426 - $js_source = $result_js_source[1] ?? [];  
427 - if($js_source){  
428 - foreach ($js_source as $vjs) { 387 +
  388 + if (substr($vs['url_complete'], -3, 3) == 'css' || substr($vs['url_complete'], -2, 2) == 'js') {
  389 +
  390 + $source_html = curl_c(getImageUrl($vs['url_complete']), false);
  391 +
  392 + if (substr($vs['url_complete'], -3, 3) == 'css') {
  393 + preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $source_html, $result_source);
  394 + } else {
  395 + preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source);
  396 + }
  397 +
  398 + $js_css_source = $result_source[1] ?? [];
  399 + if ($js_css_source) {
  400 + foreach ($js_css_source as $vjs) {
  401 + if (substr($vjs, 0, 2) == './') {
  402 + //相对路径
  403 + $url_arr = explode('/', $vs['url']);
  404 + $url_arr[count($url_arr) - 1] = substr($vjs, 2);
  405 + $vjs = implode('/', $url_arr);
  406 + }
429 $vjs_result = $this->url_check($vjs, $project_id, $domain, $web_url_domain, $home_url); 407 $vjs_result = $this->url_check($vjs, $project_id, $domain, $web_url_domain, $home_url);
430 if (!$vjs_result) { 408 if (!$vjs_result) {
431 continue; 409 continue;
@@ -441,14 +419,14 @@ class HtmlCollect extends Command @@ -441,14 +419,14 @@ class HtmlCollect extends Command
441 'created_at' => date('Y-m-d H:i:s'), 419 'created_at' => date('Y-m-d H:i:s'),
442 'updated_at' => date('Y-m-d H:i:s'), 420 'updated_at' => date('Y-m-d H:i:s'),
443 ]); 421 ]);
444 - $js_html = str_replace($vjs, getImageUrl($new_vjs), $js_html); 422 + $source_html = str_replace($vjs, getImageUrl($new_vjs), $source_html);
445 } 423 }
446 } else { 424 } else {
447 - $js_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $js_html); 425 + $source_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $source_html);
448 } 426 }
449 } 427 }
450 428
451 - CosService::uploadRemote($project_id, 'source', $vs['url_complete'], $vs['url_complete'], $js_html); 429 + CosService::uploadRemote($project_id, 'source', $vs['url_complete'], $vs['url_complete'], $source_html);
452 } 430 }
453 } 431 }
454 } 432 }