合并分支 'akun' 到 'master'
Akun 查看合并请求 !171
正在显示
1 个修改的文件
包含
46 行增加
和
68 行删除
| @@ -39,14 +39,15 @@ class HtmlCollect extends Command | @@ -39,14 +39,15 @@ class HtmlCollect extends Command | ||
| 39 | 39 | ||
| 40 | public function handle() | 40 | public function handle() |
| 41 | { | 41 | { |
| 42 | - while (true) { | 42 | +// while (true) { |
| 43 | $this->start_collect(); | 43 | $this->start_collect(); |
| 44 | - } | 44 | +// } |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | protected function start_collect() | 47 | protected function start_collect() |
| 48 | { | 48 | { |
| 49 | - $task_id = $this->get_task(); | 49 | +// $task_id = $this->get_task(); |
| 50 | + $task_id = '595_41517'; | ||
| 50 | if ($task_id === false) { | 51 | if ($task_id === false) { |
| 51 | //所有项目采集完成 | 52 | //所有项目采集完成 |
| 52 | sleep(60); | 53 | sleep(60); |
| @@ -336,62 +337,25 @@ class HtmlCollect extends Command | @@ -336,62 +337,25 @@ class HtmlCollect extends Command | ||
| 336 | ]); | 337 | ]); |
| 337 | $html = str_replace($vs['url'], getImageUrl($new_source), $html); | 338 | $html = str_replace($vs['url'], getImageUrl($new_source), $html); |
| 338 | 339 | ||
| 339 | - if (substr($new_source, -3, 3) == 'css') { | ||
| 340 | - // 下载css文件中的资源 | ||
| 341 | - $css_html = curl_c($vs['url_complete'], false); | ||
| 342 | - preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source); | ||
| 343 | - $css_source = $result_css_source[1] ?? []; | ||
| 344 | - | ||
| 345 | - $url_arr = explode('/', $vs['url_complete']); | ||
| 346 | - $target_arr = explode('/', $new_source); | ||
| 347 | - foreach ($css_source as $vcs) { | ||
| 348 | - $vcs = str_replace('"', '', $vcs); | ||
| 349 | - $vcs_arr = parse_url($vcs); | ||
| 350 | - if (isset($vcs_arr['domain'])) { | ||
| 351 | - //不是相对路径,不下载 | ||
| 352 | - continue; | ||
| 353 | - } | 340 | + if (substr($new_source, -3, 3) == 'css' || substr($new_source, -2, 2) == 'js') { |
| 354 | 341 | ||
| 355 | - $vcs = $vcs_arr['path'] ?? ''; | ||
| 356 | - if (!$vcs) { | ||
| 357 | - continue; | ||
| 358 | - } | ||
| 359 | - if (strpos($vcs, '.') === false) { | ||
| 360 | - continue; | ||
| 361 | - } | ||
| 362 | - $path_arr = explode('.', $vcs); | ||
| 363 | - if (in_array(end($path_arr), ['html', 'php', 'com', 'xml'])) { | ||
| 364 | - continue; | ||
| 365 | - } | 342 | + $source_html = curl_c(getImageUrl($new_source), false); |
| 366 | 343 | ||
| 367 | - $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first(); | ||
| 368 | - if ($source_info) { | ||
| 369 | - //已存在,不下载 | ||
| 370 | - continue; | ||
| 371 | - } | ||
| 372 | - | ||
| 373 | - $url_arr[count($url_arr) - 1] = $vcs; | ||
| 374 | - $url_css_complete = implode('/', $url_arr); | ||
| 375 | - $target_arr[count($target_arr) - 1] = $vcs; | ||
| 376 | - $path = implode('/', $target_arr); | ||
| 377 | - | ||
| 378 | - $new_source_css = CosService::uploadRemote($project_id, 'source', $url_css_complete, $path); | ||
| 379 | - if ($new_source_css) { | ||
| 380 | - CollectSource::insert([ | ||
| 381 | - 'project_id' => $project_id, | ||
| 382 | - 'origin' => $vcs, | ||
| 383 | - 'target' => $new_source_css, | ||
| 384 | - 'created_at' => date('Y-m-d H:i:s'), | ||
| 385 | - 'updated_at' => date('Y-m-d H:i:s'), | ||
| 386 | - ]); | ||
| 387 | - } | 344 | + if (substr($new_source, -3, 3) == 'css') { |
| 345 | + preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $source_html, $result_source); | ||
| 346 | + } else { | ||
| 347 | + preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source); | ||
| 388 | } | 348 | } |
| 389 | - } elseif (substr($new_source, -2, 2) == 'js') { | ||
| 390 | - $js_html = curl_c(getImageUrl($new_source), false); | ||
| 391 | - preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $js_html, $result_js_source); | ||
| 392 | - $js_source = $result_js_source[1] ?? []; | ||
| 393 | - if($js_source){ | ||
| 394 | - foreach ($js_source as $vjs) { | 349 | + |
| 350 | + $js_css_source = $result_source[1] ?? []; | ||
| 351 | + if ($js_css_source) { | ||
| 352 | + foreach ($js_css_source as $vjs) { | ||
| 353 | + if (substr($vjs, 0, 2) == './') { | ||
| 354 | + //相对路径 | ||
| 355 | + $url_arr = explode('/', $vs['url']); | ||
| 356 | + $url_arr[count($url_arr) - 1] = substr($vjs, 2); | ||
| 357 | + $vjs = implode('/', $url_arr); | ||
| 358 | + } | ||
| 395 | $vjs_result = $this->url_check($vjs, $project_id, $domain, $web_url_domain, $home_url); | 359 | $vjs_result = $this->url_check($vjs, $project_id, $domain, $web_url_domain, $home_url); |
| 396 | if (!$vjs_result) { | 360 | if (!$vjs_result) { |
| 397 | continue; | 361 | continue; |
| @@ -407,25 +371,39 @@ class HtmlCollect extends Command | @@ -407,25 +371,39 @@ class HtmlCollect extends Command | ||
| 407 | 'created_at' => date('Y-m-d H:i:s'), | 371 | 'created_at' => date('Y-m-d H:i:s'), |
| 408 | 'updated_at' => date('Y-m-d H:i:s'), | 372 | 'updated_at' => date('Y-m-d H:i:s'), |
| 409 | ]); | 373 | ]); |
| 410 | - $js_html = str_replace($vjs, getImageUrl($new_vjs), $js_html); | 374 | + $source_html = str_replace($vjs, getImageUrl($new_vjs), $source_html); |
| 411 | } | 375 | } |
| 412 | } else { | 376 | } else { |
| 413 | - $js_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $js_html); | 377 | + $source_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $source_html); |
| 414 | } | 378 | } |
| 415 | } | 379 | } |
| 416 | 380 | ||
| 417 | - CosService::uploadRemote($project_id, 'source', $new_source, $new_source, $js_html); | 381 | + CosService::uploadRemote($project_id, 'source', $new_source, $new_source, $source_html); |
| 418 | } | 382 | } |
| 419 | } | 383 | } |
| 420 | } | 384 | } |
| 421 | } else { | 385 | } else { |
| 422 | $html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html); | 386 | $html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html); |
| 423 | - if(substr($vs['url_complete'], -2, 2) == 'js'){ | ||
| 424 | - $js_html = curl_c(getImageUrl($vs['url_complete']), false); | ||
| 425 | - preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $js_html, $result_js_source); | ||
| 426 | - $js_source = $result_js_source[1] ?? []; | ||
| 427 | - if($js_source){ | ||
| 428 | - foreach ($js_source as $vjs) { | 387 | + |
| 388 | + if (substr($vs['url_complete'], -3, 3) == 'css' || substr($vs['url_complete'], -2, 2) == 'js') { | ||
| 389 | + | ||
| 390 | + $source_html = curl_c(getImageUrl($vs['url_complete']), false); | ||
| 391 | + | ||
| 392 | + if (substr($vs['url_complete'], -3, 3) == 'css') { | ||
| 393 | + preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $source_html, $result_source); | ||
| 394 | + } else { | ||
| 395 | + preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source); | ||
| 396 | + } | ||
| 397 | + | ||
| 398 | + $js_css_source = $result_source[1] ?? []; | ||
| 399 | + if ($js_css_source) { | ||
| 400 | + foreach ($js_css_source as $vjs) { | ||
| 401 | + if (substr($vjs, 0, 2) == './') { | ||
| 402 | + //相对路径 | ||
| 403 | + $url_arr = explode('/', $vs['url']); | ||
| 404 | + $url_arr[count($url_arr) - 1] = substr($vjs, 2); | ||
| 405 | + $vjs = implode('/', $url_arr); | ||
| 406 | + } | ||
| 429 | $vjs_result = $this->url_check($vjs, $project_id, $domain, $web_url_domain, $home_url); | 407 | $vjs_result = $this->url_check($vjs, $project_id, $domain, $web_url_domain, $home_url); |
| 430 | if (!$vjs_result) { | 408 | if (!$vjs_result) { |
| 431 | continue; | 409 | continue; |
| @@ -441,14 +419,14 @@ class HtmlCollect extends Command | @@ -441,14 +419,14 @@ class HtmlCollect extends Command | ||
| 441 | 'created_at' => date('Y-m-d H:i:s'), | 419 | 'created_at' => date('Y-m-d H:i:s'), |
| 442 | 'updated_at' => date('Y-m-d H:i:s'), | 420 | 'updated_at' => date('Y-m-d H:i:s'), |
| 443 | ]); | 421 | ]); |
| 444 | - $js_html = str_replace($vjs, getImageUrl($new_vjs), $js_html); | 422 | + $source_html = str_replace($vjs, getImageUrl($new_vjs), $source_html); |
| 445 | } | 423 | } |
| 446 | } else { | 424 | } else { |
| 447 | - $js_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $js_html); | 425 | + $source_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $source_html); |
| 448 | } | 426 | } |
| 449 | } | 427 | } |
| 450 | 428 | ||
| 451 | - CosService::uploadRemote($project_id, 'source', $vs['url_complete'], $vs['url_complete'], $js_html); | 429 | + CosService::uploadRemote($project_id, 'source', $vs['url_complete'], $vs['url_complete'], $source_html); |
| 452 | } | 430 | } |
| 453 | } | 431 | } |
| 454 | } | 432 | } |
-
请 注册 或 登录 后发表评论