HtmlCustomCollect.php 27.7 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
<?php

namespace App\Console\Commands\Update;

use App\Models\Collect\CollectSource;
use App\Models\Com\UpdateOldInfo;
use App\Services\CosService;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;

/**
 * 4.0,5.0升级到6.0,主站自定义页面采集
 * Class ProjectImport
 * @package App\Console\Commands
 * @author Akun
 * @date 2023/12/13 14:44
 */
class HtmlCustomCollect extends Command
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
    protected $signature = 'project_html_custom_collect';

    /**
     * The console command description.
     *
     * @var string
     */
    protected $description = '执行项目自定义html页面采集';


    public function handle()
    {
        ini_set('memory_limit', '512M');

        //        $project_id = 437;
        //        $project_site = 'v6-1500k.globalso.site';
        //        $pages = [
        //            'https://www.tourletent.com/project/b300-glamping-tent-in-china/',
        //            'https://www.tourletent.com/project/lotus-bell-tent-in-australia/',
        //            'https://www.tourletent.com/project/luxury-resort-in-china/',
        //            'https://www.tourletent.com/project/canvas-safari-tent-inthailand/',
        //            'https://www.tourletent.com/project/safari-tent-for-m8-in-mexcio/',
        //            'https://www.tourletent.com/project/9m-pvc-dome-tent-in-canada/',
        //            'https://www.tourletent.com/project/c900-hotel-tent-in-korea/',
        //            'https://www.tourletent.com/project/safari-tent-in-guizhou/',
        //            'https://www.tourletent.com/project/dome-tent-in-austin/',
        //            'https://www.tourletent.com/project/safari-tent-in-italy/',
        //            'https://www.tourletent.com/project/glass-igloo-in-china/',
        //            'https://www.tourletent.com/project/tree-house-in-sichuan/'
        //        ];

        //        $project_id = 517;
        //        $project_site = 'v6-1gee9.globalso.site';
        //        $pages = [
        //            'https://www.beifa.group/help/send-results/',
        //            'https://www.beifa.group/help/terms-of-use/',
        //            'https://www.beifa.group/help/position-3/',
        //            'https://www.beifa.group/help/position-2/',
        //            'https://www.beifa.group/help/position-1/',
        //            'https://www.beifa.group/help/social-media/',
        //            'https://www.beifa.group/help/globle-exibition-2/',
        //            'https://www.beifa.group/help/job/',
        //            'https://www.beifa.group/help/vr/',
        //            'https://www.beifa.group/help/on-live-video/',
        //            'https://www.beifa.group/help/honor/',
        //            'https://www.beifa.group/help/certification/',
        //            'https://www.beifa.group/help/quanity-control/',
        //            'https://www.beifa.group/help/testing-center/',
        //            'https://www.beifa.group/help/rd/',
        //            'https://www.beifa.group/help/design-trend/',
        //            'https://www.beifa.group/help/partner/',
        //            'https://www.beifa.group/help/social-responsibility/',
        //            'https://www.beifa.group/help/contact/',
        //        ];

//        $project_id = 546;
//        $project_site = 'v6-kx260.globalso.site';
//        $pages = [
//            'https://www.grechofiberglass.com/success_stories/',
//            'https://www.grechofiberglass.com/success_stories/achieving-transformative-improvements-for-polyurethane-exterior-insulation-panels-in-france/',
//            'https://www.grechofiberglass.com/success_stories/grechos-fiberglass-rebar-revolutionizing-canadian-construction-projects-with-unparalleled-quality/',
//            'https://www.grechofiberglass.com/success_stories/customer-from-the-uk-purchases-fiberglass-coated-mats-for-plasterboards-from-grecho/',
//            'https://www.grechofiberglass.com/success_stories/supplying-carbon-fiber-to-australian-surfboard-manufacturer/',
//            'https://www.grechofiberglass.com/success_stories/300g-chopped-strand-mat-for-composite-slates-shipped-to-malaysia/',
//            'https://www.grechofiberglass.com/success_stories/fiberglass-roving-for-pipewater-tank-shipped-to-russia/',
//            'https://www.grechofiberglass.com/success_stories/fiberglass-fleece-for-acoustic-ceiling-shipped-to-russia/',
//            'https://www.grechofiberglass.com/success_stories/600g-fiberglass-aluminum-foil-cloth-shipped-to-australia-for-pipe-heat-shielding/',
//            'https://www.grechofiberglass.com/success_stories/shipping-our-first-truck-of-fiberglass-tissue-in-2022/',
//        ];

        //        $project_id = 586;
        //        $project_site = 'v6-m605x.globalso.site';
        //        $pages = [
        //            'https://www.citymax-group.com/case/',
        //            'https://www.citymax-group.com/case_catalog/crop-classification/',
        //            'https://www.citymax-group.com/case_catalog/field-crops/',
        //            'https://www.citymax-group.com/case_catalog/fruits/',
        //            'https://www.citymax-group.com/case_catalog/vegetables/',
        //            'https://www.citymax-group.com/case/report-on-use-of-citymax-products-on-grapes-2/',
        //            'https://www.citymax-group.com/case/report-on-use-of-citymax-products-on-cucumber/',
        //            'https://www.citymax-group.com/case/field-experiment-crop-lettuce/',
        //        ];

//        $project_id = 626;
//        $project_site = 'v6-m342g.globalso.site';
//        $pages = [
//            'https://a574.goodao.net/project_catalog/project/',
//            'https://a574.goodao.net/project_catalog/project/page/2/',
//                    'https://www.lecusostreetlight.com/project/560pcs-250w-smart-led-street-light-in-manila-city-philippines/',
//                    'https://www.lecusostreetlight.com/project/3200pcs-8m-150w-solar-street-light-in-cebu-philippines/',
//                    'https://lecusostreetlight.com/project/170pcs-100w-split-lithium-battery-solar-street-light-in-tanzania/',
//                    'https://www.lecusostreetlight.com/project/250pcs-40w-sl-series-solar-street-light-in-kuwait/',
//                    'https://www.lecusostreetlight.com/project/272pcs-8m-80w-solar-street-light-in-tanzania/',
//                    'https://www.lecusostreetlight.com/project/185pcs-10m-120w-highway-solar-street-light-in-jordan/',
//                    'https://www.lecusostreetlight.com/project/270pcs-9m-patterned-decorative-light-pole-with-150w-led-cobra-light-in-cambodia/',
//                    'https://www.lecusostreetlight.com/project/48pcs-5m-24w-decorative-aluminium-pole-in-dubai-uae/',
//                    'https://www.lecusostreetlight.com/project/105pcs-9m-100w-led-street-light-in-sri-lanka/',
//                    'https://www.lecusostreetlight.com/project/45pcs-6m-hot-dip-galvanized-double-arm-street-light-pole-in-dubai-uae/',
//                    'https://www.lecusostreetlight.com/project/356pcs-8m-100w-solar-street-light-in-ethiopia/',
//                    'https://www.lecusostreetlight.com/project/52pcs-6m-30w-solar-led-street-light-with-gel-battery-in-poland/',
//                    'https://www.lecusostreetlight.com/project/225pcs-6m-80w-solar-street-light-in-vietnam/',
//                    'https://www.lecusostreetlight.com/project/450pcs-7m-60w-double-arm-solar-light-in-nigeria/',
//                    'https://www.lecusostreetlight.com/project/100pcs-6m-50w-led-street-light-in-maldives/',
//                    'https://www.lecusostreetlight.com/project/202pcs-6m-40w-3000k-solar-street-light-in-manila-philippines/',
//                    'https://www.lecusostreetlight.com/project/245pcs-120w-smart-led-street-light-in-bangkok-thailand/',
//                    'https://www.lecusostreetlight.com/project/170pcs-7m-50w-all-in-one-solar-light-in-davao-philippines/',
//                    'https://www.lecusostreetlight.com/project/80pcs-9m-150w-led-street-light-in-iraq/',
//                    'https://www.lecusostreetlight.com/project/252pcs-6m-40w-separate-solar-street-light-with-lithium-battery-in-thailand/',
//                    'https://www.lecusostreetlight.com/project/198pcs-8m-80w-zc-series-all-in-two-solar-light-in-philippines/',
//                    'https://www.lecusostreetlight.com/project/5m-30w-morden-led-garden-light-in-russian/',
//                    'https://www.lecusostreetlight.com/project/135pcs-all-in-one-solar-street-light-in-uae-dubai-park/'
//        ];

        //        $project_id = 633;
        //        $project_site = 'v6-ke5nz.globalso.site';
        //        $pages = [
        //            'https://www.mach-sales.com/case_catalog/cases/',
        //            'https://www.mach-sales.com/case_catalog/cases/page/2/',
        //            'https://www.mach-sales.com/case_catalog/cases/page/3/',
        //            'https://www.mach-sales.com/case/growing-with-customers-from-small-motors-to-big-drivers/',
        //            'https://www.mach-sales.com/case/growing-together-with-customers-the-journey-of-intelligent-manufacturing-in-a-modernized-factory/',
        //            'https://www.mach-sales.com/case/%e3%80%90growing-together-with-customers%e3%80%91-the-evolutionary-journey-of-a-baking-brand/',
        //            'https://www.mach-sales.com/case/sumecs-footprints-in-belt-and-road-singapore/',
        //            'https://www.mach-sales.com/case/sumecs-footprints-in-belt-and-road-southeast-asia/',
        //            'https://www.mach-sales.com/case/naming-and-delivery-of-a-new-ship-1/',
        //            'https://www.mach-sales.com/case/a-newly-signed-contract-cable-manufacturing-equipment/',
        //            'https://www.mach-sales.com/case/its-amazing-that-the-glass-can-also-save-energy/',
        //            'https://www.mach-sales.com/case/%e3%80%90grow-with-customers%e3%80%91better-service-for-papermaking-equipment-procurement/',
        //            'https://www.mach-sales.com/case/expansion-of-blower-equipment-contributes-to-environmental-protection/',
        //            'https://www.mach-sales.com/case/new-ship-type-new-contract/',
        //            'https://www.mach-sales.com/case/new-arrival-introducing-the-latest-ship-model/',
        //            'https://www.mach-sales.com/case/another-contract-signed-in-the-philippines/',
        //            'https://www.mach-sales.com/case/the-road-to-going-global-is-supported-by-sumec-services/',
        //            'https://www.mach-sales.com/case/sumec-textile-launches-sun-protection-series-in-collaboration-with-skechers-kids/',
        //            'https://www.mach-sales.com/case/footwear-manufacturing-equipment-new-contract/',
        //            'https://www.mach-sales.com/case/sumec-energy-company-has-successfully-signed-a-photovoltaic-component-supply-agreement-with-wattkraft-a-german-engineering-system-integrator/',
        //            'https://www.mach-sales.com/case/sumec-technology-company-successfully-signed-a-contract-for-the-equipment-related-to-the-high-strength-particleboard-project/',
        //            'https://www.mach-sales.com/case/%e3%80%90growing-together-with-our-customers%e3%80%91together-on-the-road-to-transformation/',
        //            'https://www.mach-sales.com/case/new-signing/',
        //            'https://www.mach-sales.com/case/complete-the-last-mile-equipment-procurement-from-around-the-world/',
        //            'https://www.mach-sales.com/case/science-and-technology-to-rejuvenate-agriculture-this-plant-factory-is-not-simple/',
        //            'https://www.mach-sales.com/case/new-cooperation-high-end-equipment-going-global/',
        //            'https://www.mach-sales.com/case/a-new-cooperation-with-an-annual-output-of-40000-tons/',
        //            'https://www.mach-sales.com/case/a-new-contract-cooperation-upgrades/',
        //            'https://www.mach-sales.com/case/annual-output-of-50000-tons-this-bopp-film-production-line-was-officially-put-into-operation/',
        //            'https://www.mach-sales.com/case/this-is-sumec-speed/',
        //            'https://www.mach-sales.com/case/the-first-order-of-a-new-semiconductor-brand-equipment-direct-sales-bear-fruit-again/',
        //            'https://www.mach-sales.com/case/sumec-touch-world-celebrates-its-5th-anniversary-with-well-known-suppliers/',
        //            'https://www.mach-sales.com/case/from-traditional-agency-to-digital-service/',
        //            'https://www.mach-sales.com/case/cooperation-win-win-and-starting-anew-sumec-creates-a-new-sample-of-strong-enterprise-cooperation/'
        //        ];

        $project_id = 549;
        $project_site = 'v6-myz64.globalso.site';
        $pages = [
            'https://sryled.goodao.net/event-staging/page/2/',
            'https://sryled.goodao.net/event-staging/page/3/',
            'https://sryled.goodao.net/event-staging/page/4/',
            'https://sryled.goodao.net/advertising-3/page/2/',
            'https://sryled.goodao.net/advertising-3/page/3/',
            'https://sryled.goodao.net/advertising-3/page/4/',
            'https://sryled.goodao.net/indoor/page/2/',
            'https://sryled.goodao.net/indoor/page/3/',
        ];

        foreach ($pages as $page) {
            $this->start_collect(urldecode($page), $project_id, $project_site);
        }
    }

    protected function start_collect($page, $project_id, $project_site)
    {
        $page_arr = parse_url($page);
        $domain = $page_arr['host'];
        $path = $page_arr['path'];

        //设置数据库
        $project = ProjectServer::useProject($project_id);
        if ($project) {
            echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', collect start' . PHP_EOL;

            //获取站点原始域名信息
            $old_info = UpdateOldInfo::getOldDomain($project_id, $domain);

            //采集html页面,下载资源到本地并替换
            try {
                $html = curl_c($page, false);
                if ($html == '0') {
                    echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', error: no html' . PHP_EOL;
                    sleep(2);
                    return true;
                }

                //如果有base64图片,先替换掉,再进行资源匹配
                $new_html = $html;
                preg_match_all("/data:([^;]*);base64,(.*)?\"/", $new_html, $result_img);
                $img_base64 = $result_img[2] ?? [];
                foreach ($img_base64 as $v64) {
                    $new_html = str_replace($v64, '', $new_html);
                }

                //匹配资源链接
                $source_list = $this->html_preg($new_html, $project_id, $domain, $old_info['web_url_domain'], $old_info['home_url']);

                //下载资源
                if ($source_list) {
                    $html = $this->upload_source($html, $source_list, $project_id, $domain, $old_info['web_url_domain'], $old_info['home_url']);
                }

                //替换域名
                $html = str_replace("http://" . $old_info['web_url_domain'], "", $html);
                $html = str_replace("https://" . $old_info['web_url_domain'], "", $html);
                $html = str_replace("http://" . $old_info['home_url'], "", $html);
                $html = str_replace("https://" . $old_info['home_url'], "", $html);

//                //暂时隐藏小语种
//                $html = str_replace('<div class="change-language ensemble">', '<div class="change-language ensemble" style="display: none">', $html);
//                $html = str_replace('<div class="language_more">', '<div class="language_more" style="display: none">', $html);

                //处理搜索
                preg_match_all('/<form\s+[^>]*?action\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_search);
                $search = $result_search[2] ?? [];
                foreach ($search as $vc) {
                    if ((strpos($vc, 'search.php') !== false) || (strpos($vc, 'index.php') !== false)) {
                        $html = str_replace($vc, '/search/', $html);
                    }
                }

                //增加统计代码
                $html = str_replace('</body>', '<script src="https://ecdn6.globalso.com/public/customerVisit.min.js\"></script></body>', $html);

                //html写入文件
                $file_path = '/www/wwwroot/globalso-v6-c-glo/public/' . $project_site . $path;
                if (!file_exists($file_path)) {
                    mkdir($file_path, 0777, true);
                }

                file_put_contents($file_path . 'index.html', $html);
                chmod($file_path . 'index.html', 0777);

            } catch (\Exception $e) {
                echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', error: ' . $e->getMessage() . PHP_EOL;
                sleep(2);
                return true;
            }


            echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', collect end' . PHP_EOL;
        } else {
            echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', no project' . PHP_EOL;
        }
        //关闭数据库
        DB::disconnect('custom_mysql');

        sleep(2);
        return true;
    }

    //正则匹配html资源
    protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
    {
        $source = [];

        if (!$html) {
            return $source;
        }

        //image
        preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
        $img = $result_img[2] ?? [];
        foreach ($img as $vi) {
            $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url);
            if ($check_vi && (!in_array($check_vi, $source))) {
                $source[] = $check_vi;
            }
        }

        //js
        preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js);
        $js = $result_js[2] ?? [];
        foreach ($js as $vj) {
            $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url);
            if ($check_vj && (!in_array($check_vj, $source))) {
                $source[] = $check_vj;
            }
        }

        //video
        preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
        $video = $result_video[2] ?? [];
        foreach ($video as $vv) {
            $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url);
            if ($check_vv && (!in_array($check_vv, $source))) {
                $source[] = $check_vv;
            }
        }
        preg_match_all('/<video\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video_2);
        $video_2 = $result_video_2[2] ?? [];
        foreach ($video_2 as $vv2) {
            $check_vv2 = $this->url_check($vv2, $project_id, $domain, $web_url_domain, $home_url);
            if ($check_vv2 && (!in_array($check_vv2, $source))) {
                $source[] = $check_vv2;
            }
        }
        preg_match_all('/<iframe\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video_3);
        $video_3 = $result_video_3[2] ?? [];
        foreach ($video_3 as $vv3) {
            $check_vv3 = $this->url_check($vv3, $project_id, $domain, $web_url_domain, $home_url);
            if ($check_vv3 && (!in_array($check_vv3, $source))) {
                $source[] = $check_vv3;
            }
        }

        //css
        preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css);
        $css = $result_css[2] ?? [];
        foreach ($css as $vc) {
            $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url);
            if ($check_vc && (!in_array($check_vc, $source))) {
                $source[] = $check_vc;
            }
        }

        //css background
        preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b);
        $css_b = $result_css_b[1] ?? [];
        foreach ($css_b as $vc_b) {
            $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url);
            if ($check_vc_b && (!in_array($check_vc_b, $source))) {
                $source[] = $check_vc_b;
            }
        }

        //a标签下载资源
        preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a);
        $down = $result_a[2] ?? [];
        foreach ($down as $vd) {
            $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url);
            if ($check_vd && (!in_array($check_vd, $source))) {
                $source[] = $check_vd;
            }
        }

        return $source;
    }

    //判断资源是否需要下载
    protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
    {
        $url = trim($url);
        if ($url) {
            $url = str_replace('&quot;', '', $url);
            $arr = parse_url($url);
            $scheme = $arr['scheme'] ?? '';
            $host = $arr['host'] ?? '';
            $path = $arr['path'] ?? '';
            $query = $arr['query'] ?? '';

            $path_arr = explode('.', $path);
            $path_end = end($path_arr);
            if (
                (empty($scheme) || $scheme == 'https' || $scheme == 'http')
                && (empty($host) || (strpos($web_url_domain, $host) !== false) || (strpos($home_url, $host) !== false))
                && $path
                && (substr($path, 0, 1) == '/')
                && (strpos($path, '.') !== false)
                && (strpos($path_end, 'html') === false)
                && (strpos($path_end, 'php') === false)
                && (strpos($path_end, 'com') === false)
                && (strpos($path_end, 'xml') === false)
            ) {
                $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
                if (!$source) {
                    return [
                        'download' => true,
                        'url' => $url,
                        'url_complete' => ($scheme ?: 'https') . '://' . $domain . $path . ($query ? '?' . $query : '')
                    ];
                } else {
                    return [
                        'download' => false,
                        'url' => $url,
                        'url_complete' => $source['target']
                    ];
                }
            } else {
                return false;
            }
        } else {
            return false;
        }
    }

    //下载并替换资源
    protected function upload_source($html, $source, $project_id, $domain, $web_url_domain, $home_url)
    {
        foreach ($source as $vs) {

            if ($vs['download']) {
                if (in_array(substr($vs['url_complete'], -3), ['pdf', 'zip', 'rar', '.gz'])) {
                    //可下载类资源要保持原名称
                    $new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete'], '', '', 1);
                } else {
                    $new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);
                }
                if ($new_source) {
                    CollectSource::insert([
                        'project_id' => $project_id,
                        'origin' => $vs['url'],
                        'target' => $new_source,
                        'created_at' => date('Y-m-d H:i:s'),
                        'updated_at' => date('Y-m-d H:i:s'),
                    ]);
                    $html = str_replace($vs['url'], getImageUrl($new_source), $html);

                    if (substr($new_source, -3, 3) == 'css' || substr($new_source, -2, 2) == 'js') {

                        $source_html = curl_c(getImageUrl($new_source), false);

                        if (substr($new_source, -3, 3) == 'css') {
                            preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $source_html, $result_source);
                        } else {
                            preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source);
                        }

                        $js_css_source = $result_source[1] ?? [];
                        if ($js_css_source) {
                            foreach ($js_css_source as $vjs) {
                                if (strpos($vjs, 'URL:"') !== false) {
                                    $vjs = substr($vjs, strpos($vjs, 'URL:"') + 5);
                                }

                                $vjs_down = str_replace('&quot;', '', $vjs);
                                if (strpos($vjs_down, 'data:') !== false) {
                                    //过滤二进制文件
                                    continue;
                                }
                                if (strlen($vjs_down) > 255) {
                                    //过滤太长文件
                                    continue;
                                }

                                $vjs_down_arr = parse_url($vjs_down);
                                $vjs_down_host = $vjs_down_arr['host'] ?? '';

                                $cos = config('filesystems.disks.cos');
                                $cosCdn = $cos['cdn'];

                                if ($vjs_down_host && $vjs_down_host == $cosCdn) {
                                    //过滤已经下载的
                                    continue;
                                }

                                if (empty($vjs_down_host) && substr($vjs_down, 0, 1) != '/') {
                                    //相对路径
                                    $url_arr = explode('/', $vs['url']);
                                    $url_arr[count($url_arr) - 1] = $vjs_down;
                                    $vjs_down = implode('/', $url_arr);
                                }

                                $vjs_result = $this->url_check($vjs_down, $project_id, $domain, $web_url_domain, $home_url);
                                if (!$vjs_result) {
                                    continue;
                                }

                                if ($vjs_result['download']) {
                                    $new_vjs = CosService::uploadRemote($project_id, 'source', $vjs_result['url_complete']);
                                    if ($new_vjs) {
                                        CollectSource::insert([
                                            'project_id' => $project_id,
                                            'origin' => $vjs_result['url'],
                                            'target' => $new_vjs,
                                            'created_at' => date('Y-m-d H:i:s'),
                                            'updated_at' => date('Y-m-d H:i:s'),
                                        ]);
                                        $source_html = str_replace($vjs, getImageUrl($new_vjs), $source_html);
                                    }
                                } else {
                                    $source_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $source_html);
                                }
                            }

                            CosService::uploadRemote($project_id, 'source', $new_source, $new_source, $source_html);
                        }
                    }
                }
            } else {
                $html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html);
            }
        }

        return $html;
    }
}