合并分支 'akun' 到 'master'
Akun 查看合并请求 !377
正在显示
1 个修改的文件
包含
473 行增加
和
0 行删除
| 1 | +<?php | ||
| 2 | + | ||
| 3 | +namespace App\Console\Commands\Update; | ||
| 4 | + | ||
| 5 | +use App\Models\Collect\CollectSource; | ||
| 6 | +use App\Models\Com\UpdateOldInfo; | ||
| 7 | +use App\Services\CosService; | ||
| 8 | +use App\Services\ProjectServer; | ||
| 9 | +use Illuminate\Console\Command; | ||
| 10 | +use Illuminate\Support\Facades\DB; | ||
| 11 | + | ||
| 12 | +/** | ||
| 13 | + * 4.0,5.0升级到6.0,主站自定义页面采集 | ||
| 14 | + * Class ProjectImport | ||
| 15 | + * @package App\Console\Commands | ||
| 16 | + * @author Akun | ||
| 17 | + * @date 2023/12/13 14:44 | ||
| 18 | + */ | ||
| 19 | +class HtmlCustomCollect extends Command | ||
| 20 | +{ | ||
| 21 | + /** | ||
| 22 | + * The name and signature of the console command. | ||
| 23 | + * | ||
| 24 | + * @var string | ||
| 25 | + */ | ||
| 26 | + protected $signature = 'project_html_custom_collect'; | ||
| 27 | + | ||
| 28 | + /** | ||
| 29 | + * The console command description. | ||
| 30 | + * | ||
| 31 | + * @var string | ||
| 32 | + */ | ||
| 33 | + protected $description = '执行项目自定义html页面采集'; | ||
| 34 | + | ||
| 35 | + | ||
| 36 | + public function handle() | ||
| 37 | + { | ||
| 38 | + ini_set('memory_limit', '512M'); | ||
| 39 | + | ||
| 40 | + // $project_id = 437; | ||
| 41 | + // $project_site = 'v6-1500k.globalso.site'; | ||
| 42 | + // $pages = [ | ||
| 43 | + // 'https://www.tourletent.com/project/b300-glamping-tent-in-china/', | ||
| 44 | + // 'https://www.tourletent.com/project/lotus-bell-tent-in-australia/', | ||
| 45 | + // 'https://www.tourletent.com/project/luxury-resort-in-china/', | ||
| 46 | + // 'https://www.tourletent.com/project/canvas-safari-tent-inthailand/', | ||
| 47 | + // 'https://www.tourletent.com/project/safari-tent-for-m8-in-mexcio/', | ||
| 48 | + // 'https://www.tourletent.com/project/9m-pvc-dome-tent-in-canada/', | ||
| 49 | + // 'https://www.tourletent.com/project/c900-hotel-tent-in-korea/', | ||
| 50 | + // 'https://www.tourletent.com/project/safari-tent-in-guizhou/', | ||
| 51 | + // 'https://www.tourletent.com/project/dome-tent-in-austin/', | ||
| 52 | + // 'https://www.tourletent.com/project/safari-tent-in-italy/', | ||
| 53 | + // 'https://www.tourletent.com/project/glass-igloo-in-china/', | ||
| 54 | + // 'https://www.tourletent.com/project/tree-house-in-sichuan/' | ||
| 55 | + // ]; | ||
| 56 | + | ||
| 57 | + // $project_id = 517; | ||
| 58 | + // $project_site = 'v6-1gee9.globalso.site'; | ||
| 59 | + // $pages = [ | ||
| 60 | + // 'https://www.beifa.group/help/send-results/', | ||
| 61 | + // 'https://www.beifa.group/help/terms-of-use/', | ||
| 62 | + // 'https://www.beifa.group/help/position-3/', | ||
| 63 | + // 'https://www.beifa.group/help/position-2/', | ||
| 64 | + // 'https://www.beifa.group/help/position-1/', | ||
| 65 | + // 'https://www.beifa.group/help/social-media/', | ||
| 66 | + // 'https://www.beifa.group/help/globle-exibition-2/', | ||
| 67 | + // 'https://www.beifa.group/help/job/', | ||
| 68 | + // 'https://www.beifa.group/help/vr/', | ||
| 69 | + // 'https://www.beifa.group/help/on-live-video/', | ||
| 70 | + // 'https://www.beifa.group/help/honor/', | ||
| 71 | + // 'https://www.beifa.group/help/certification/', | ||
| 72 | + // 'https://www.beifa.group/help/quanity-control/', | ||
| 73 | + // 'https://www.beifa.group/help/testing-center/', | ||
| 74 | + // 'https://www.beifa.group/help/rd/', | ||
| 75 | + // 'https://www.beifa.group/help/design-trend/', | ||
| 76 | + // 'https://www.beifa.group/help/partner/', | ||
| 77 | + // 'https://www.beifa.group/help/social-responsibility/', | ||
| 78 | + // 'https://www.beifa.group/help/contact/', | ||
| 79 | + // ]; | ||
| 80 | + | ||
| 81 | +// $project_id = 546; | ||
| 82 | +// $project_site = 'v6-kx260.globalso.site'; | ||
| 83 | +// $pages = [ | ||
| 84 | +// 'https://www.grechofiberglass.com/success_stories/', | ||
| 85 | +// 'https://www.grechofiberglass.com/success_stories/achieving-transformative-improvements-for-polyurethane-exterior-insulation-panels-in-france/', | ||
| 86 | +// 'https://www.grechofiberglass.com/success_stories/grechos-fiberglass-rebar-revolutionizing-canadian-construction-projects-with-unparalleled-quality/', | ||
| 87 | +// 'https://www.grechofiberglass.com/success_stories/customer-from-the-uk-purchases-fiberglass-coated-mats-for-plasterboards-from-grecho/', | ||
| 88 | +// 'https://www.grechofiberglass.com/success_stories/supplying-carbon-fiber-to-australian-surfboard-manufacturer/', | ||
| 89 | +// 'https://www.grechofiberglass.com/success_stories/300g-chopped-strand-mat-for-composite-slates-shipped-to-malaysia/', | ||
| 90 | +// 'https://www.grechofiberglass.com/success_stories/fiberglass-roving-for-pipewater-tank-shipped-to-russia/', | ||
| 91 | +// 'https://www.grechofiberglass.com/success_stories/fiberglass-fleece-for-acoustic-ceiling-shipped-to-russia/', | ||
| 92 | +// 'https://www.grechofiberglass.com/success_stories/600g-fiberglass-aluminum-foil-cloth-shipped-to-australia-for-pipe-heat-shielding/', | ||
| 93 | +// 'https://www.grechofiberglass.com/success_stories/shipping-our-first-truck-of-fiberglass-tissue-in-2022/', | ||
| 94 | +// ]; | ||
| 95 | + | ||
| 96 | + // $project_id = 586; | ||
| 97 | + // $project_site = 'v6-m605x.globalso.site'; | ||
| 98 | + // $pages = [ | ||
| 99 | + // 'https://www.citymax-group.com/case/', | ||
| 100 | + // 'https://www.citymax-group.com/case_catalog/crop-classification/', | ||
| 101 | + // 'https://www.citymax-group.com/case_catalog/field-crops/', | ||
| 102 | + // 'https://www.citymax-group.com/case_catalog/fruits/', | ||
| 103 | + // 'https://www.citymax-group.com/case_catalog/vegetables/', | ||
| 104 | + // 'https://www.citymax-group.com/case/report-on-use-of-citymax-products-on-grapes-2/', | ||
| 105 | + // 'https://www.citymax-group.com/case/report-on-use-of-citymax-products-on-cucumber/', | ||
| 106 | + // 'https://www.citymax-group.com/case/field-experiment-crop-lettuce/', | ||
| 107 | + // ]; | ||
| 108 | + | ||
| 109 | + $project_id = 626; | ||
| 110 | + $project_site = 'v6-m342g.globalso.site'; | ||
| 111 | + $pages = [ | ||
| 112 | + 'https://www.lecusostreetlight.com/project_catalog/project/', | ||
| 113 | + 'https://www.lecusostreetlight.com/project_catalog/project/page/2/', | ||
| 114 | +// 'https://www.lecusostreetlight.com/project/560pcs-250w-smart-led-street-light-in-manila-city-philippines/', | ||
| 115 | +// 'https://www.lecusostreetlight.com/project/3200pcs-8m-150w-solar-street-light-in-cebu-philippines/', | ||
| 116 | +// 'https://lecusostreetlight.com/project/170pcs-100w-split-lithium-battery-solar-street-light-in-tanzania/', | ||
| 117 | +// 'https://www.lecusostreetlight.com/project/250pcs-40w-sl-series-solar-street-light-in-kuwait/', | ||
| 118 | +// 'https://www.lecusostreetlight.com/project/272pcs-8m-80w-solar-street-light-in-tanzania/', | ||
| 119 | +// 'https://www.lecusostreetlight.com/project/185pcs-10m-120w-highway-solar-street-light-in-jordan/', | ||
| 120 | +// 'https://www.lecusostreetlight.com/project/270pcs-9m-patterned-decorative-light-pole-with-150w-led-cobra-light-in-cambodia/', | ||
| 121 | +// 'https://www.lecusostreetlight.com/project/48pcs-5m-24w-decorative-aluminium-pole-in-dubai-uae/', | ||
| 122 | +// 'https://www.lecusostreetlight.com/project/105pcs-9m-100w-led-street-light-in-sri-lanka/', | ||
| 123 | +// 'https://www.lecusostreetlight.com/project/45pcs-6m-hot-dip-galvanized-double-arm-street-light-pole-in-dubai-uae/', | ||
| 124 | +// 'https://www.lecusostreetlight.com/project/356pcs-8m-100w-solar-street-light-in-ethiopia/', | ||
| 125 | +// 'https://www.lecusostreetlight.com/project/52pcs-6m-30w-solar-led-street-light-with-gel-battery-in-poland/', | ||
| 126 | +// 'https://www.lecusostreetlight.com/project/225pcs-6m-80w-solar-street-light-in-vietnam/', | ||
| 127 | +// 'https://www.lecusostreetlight.com/project/450pcs-7m-60w-double-arm-solar-light-in-nigeria/', | ||
| 128 | +// 'https://www.lecusostreetlight.com/project/100pcs-6m-50w-led-street-light-in-maldives/', | ||
| 129 | +// 'https://www.lecusostreetlight.com/project/202pcs-6m-40w-3000k-solar-street-light-in-manila-philippines/', | ||
| 130 | +// 'https://www.lecusostreetlight.com/project/245pcs-120w-smart-led-street-light-in-bangkok-thailand/', | ||
| 131 | +// 'https://www.lecusostreetlight.com/project/170pcs-7m-50w-all-in-one-solar-light-in-davao-philippines/', | ||
| 132 | +// 'https://www.lecusostreetlight.com/project/80pcs-9m-150w-led-street-light-in-iraq/', | ||
| 133 | +// 'https://www.lecusostreetlight.com/project/252pcs-6m-40w-separate-solar-street-light-with-lithium-battery-in-thailand/', | ||
| 134 | +// 'https://www.lecusostreetlight.com/project/198pcs-8m-80w-zc-series-all-in-two-solar-light-in-philippines/', | ||
| 135 | +// 'https://www.lecusostreetlight.com/project/5m-30w-morden-led-garden-light-in-russian/', | ||
| 136 | +// 'https://www.lecusostreetlight.com/project/135pcs-all-in-one-solar-street-light-in-uae-dubai-park/' | ||
| 137 | + ]; | ||
| 138 | + | ||
| 139 | + // $project_id = 633; | ||
| 140 | + // $project_site = 'v6-ke5nz.globalso.site'; | ||
| 141 | + // $pages = [ | ||
| 142 | + // 'https://www.mach-sales.com/case_catalog/cases/', | ||
| 143 | + // 'https://www.mach-sales.com/case_catalog/cases/page/2/', | ||
| 144 | + // 'https://www.mach-sales.com/case_catalog/cases/page/3/', | ||
| 145 | + // 'https://www.mach-sales.com/case/growing-with-customers-from-small-motors-to-big-drivers/', | ||
| 146 | + // 'https://www.mach-sales.com/case/growing-together-with-customers-the-journey-of-intelligent-manufacturing-in-a-modernized-factory/', | ||
| 147 | + // 'https://www.mach-sales.com/case/%e3%80%90growing-together-with-customers%e3%80%91-the-evolutionary-journey-of-a-baking-brand/', | ||
| 148 | + // 'https://www.mach-sales.com/case/sumecs-footprints-in-belt-and-road-singapore/', | ||
| 149 | + // 'https://www.mach-sales.com/case/sumecs-footprints-in-belt-and-road-southeast-asia/', | ||
| 150 | + // 'https://www.mach-sales.com/case/naming-and-delivery-of-a-new-ship-1/', | ||
| 151 | + // 'https://www.mach-sales.com/case/a-newly-signed-contract-cable-manufacturing-equipment/', | ||
| 152 | + // 'https://www.mach-sales.com/case/its-amazing-that-the-glass-can-also-save-energy/', | ||
| 153 | + // 'https://www.mach-sales.com/case/%e3%80%90grow-with-customers%e3%80%91better-service-for-papermaking-equipment-procurement/', | ||
| 154 | + // 'https://www.mach-sales.com/case/expansion-of-blower-equipment-contributes-to-environmental-protection/', | ||
| 155 | + // 'https://www.mach-sales.com/case/new-ship-type-new-contract/', | ||
| 156 | + // 'https://www.mach-sales.com/case/new-arrival-introducing-the-latest-ship-model/', | ||
| 157 | + // 'https://www.mach-sales.com/case/another-contract-signed-in-the-philippines/', | ||
| 158 | + // 'https://www.mach-sales.com/case/the-road-to-going-global-is-supported-by-sumec-services/', | ||
| 159 | + // 'https://www.mach-sales.com/case/sumec-textile-launches-sun-protection-series-in-collaboration-with-skechers-kids/', | ||
| 160 | + // 'https://www.mach-sales.com/case/footwear-manufacturing-equipment-new-contract/', | ||
| 161 | + // 'https://www.mach-sales.com/case/sumec-energy-company-has-successfully-signed-a-photovoltaic-component-supply-agreement-with-wattkraft-a-german-engineering-system-integrator/', | ||
| 162 | + // 'https://www.mach-sales.com/case/sumec-technology-company-successfully-signed-a-contract-for-the-equipment-related-to-the-high-strength-particleboard-project/', | ||
| 163 | + // 'https://www.mach-sales.com/case/%e3%80%90growing-together-with-our-customers%e3%80%91together-on-the-road-to-transformation/', | ||
| 164 | + // 'https://www.mach-sales.com/case/new-signing/', | ||
| 165 | + // 'https://www.mach-sales.com/case/complete-the-last-mile-equipment-procurement-from-around-the-world/', | ||
| 166 | + // 'https://www.mach-sales.com/case/science-and-technology-to-rejuvenate-agriculture-this-plant-factory-is-not-simple/', | ||
| 167 | + // 'https://www.mach-sales.com/case/new-cooperation-high-end-equipment-going-global/', | ||
| 168 | + // 'https://www.mach-sales.com/case/a-new-cooperation-with-an-annual-output-of-40000-tons/', | ||
| 169 | + // 'https://www.mach-sales.com/case/a-new-contract-cooperation-upgrades/', | ||
| 170 | + // 'https://www.mach-sales.com/case/annual-output-of-50000-tons-this-bopp-film-production-line-was-officially-put-into-operation/', | ||
| 171 | + // 'https://www.mach-sales.com/case/this-is-sumec-speed/', | ||
| 172 | + // 'https://www.mach-sales.com/case/the-first-order-of-a-new-semiconductor-brand-equipment-direct-sales-bear-fruit-again/', | ||
| 173 | + // 'https://www.mach-sales.com/case/sumec-touch-world-celebrates-its-5th-anniversary-with-well-known-suppliers/', | ||
| 174 | + // 'https://www.mach-sales.com/case/from-traditional-agency-to-digital-service/', | ||
| 175 | + // 'https://www.mach-sales.com/case/cooperation-win-win-and-starting-anew-sumec-creates-a-new-sample-of-strong-enterprise-cooperation/' | ||
| 176 | + // ]; | ||
| 177 | + | ||
| 178 | + foreach ($pages as $page) { | ||
| 179 | + $this->start_collect(urldecode($page), $project_id, $project_site); | ||
| 180 | + } | ||
| 181 | + } | ||
| 182 | + | ||
| 183 | + protected function start_collect($page, $project_id, $project_site) | ||
| 184 | + { | ||
| 185 | + $page_arr = parse_url($page); | ||
| 186 | + $domain = $page_arr['host']; | ||
| 187 | + $path = $page_arr['path']; | ||
| 188 | + | ||
| 189 | + //设置数据库 | ||
| 190 | + $project = ProjectServer::useProject($project_id); | ||
| 191 | + if ($project) { | ||
| 192 | + echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', collect start' . PHP_EOL; | ||
| 193 | + | ||
| 194 | + //获取站点原始域名信息 | ||
| 195 | + $old_info = UpdateOldInfo::getOldDomain($project_id, $domain); | ||
| 196 | + | ||
| 197 | + //采集html页面,下载资源到本地并替换 | ||
| 198 | + try { | ||
| 199 | + $html = curl_c($page, false); | ||
| 200 | + if ($html == '0') { | ||
| 201 | + echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', error: no html' . PHP_EOL; | ||
| 202 | + sleep(2); | ||
| 203 | + return true; | ||
| 204 | + } | ||
| 205 | + | ||
| 206 | + //如果有base64图片,先替换掉,再进行资源匹配 | ||
| 207 | + $new_html = $html; | ||
| 208 | + preg_match_all("/data:([^;]*);base64,(.*)?\"/", $new_html, $result_img); | ||
| 209 | + $img_base64 = $result_img[2] ?? []; | ||
| 210 | + foreach ($img_base64 as $v64) { | ||
| 211 | + $new_html = str_replace($v64, '', $new_html); | ||
| 212 | + } | ||
| 213 | + | ||
| 214 | + //匹配资源链接 | ||
| 215 | + $source_list = $this->html_preg($new_html, $project_id, $domain, $old_info['web_url_domain'], $old_info['home_url']); | ||
| 216 | + | ||
| 217 | + //下载资源 | ||
| 218 | + if ($source_list) { | ||
| 219 | + $html = $this->upload_source($html, $source_list, $project_id, $domain, $old_info['web_url_domain'], $old_info['home_url']); | ||
| 220 | + } | ||
| 221 | + | ||
| 222 | + //替换域名 | ||
| 223 | + $html = str_replace("http://" . $old_info['web_url_domain'], "", $html); | ||
| 224 | + $html = str_replace("https://" . $old_info['web_url_domain'], "", $html); | ||
| 225 | + $html = str_replace("http://" . $old_info['home_url'], "", $html); | ||
| 226 | + $html = str_replace("https://" . $old_info['home_url'], "", $html); | ||
| 227 | + | ||
| 228 | +// //暂时隐藏小语种 | ||
| 229 | +// $html = str_replace('<div class="change-language ensemble">', '<div class="change-language ensemble" style="display: none">', $html); | ||
| 230 | +// $html = str_replace('<div class="language_more">', '<div class="language_more" style="display: none">', $html); | ||
| 231 | + | ||
| 232 | + //处理搜索 | ||
| 233 | + preg_match_all('/<form\s+[^>]*?action\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_search); | ||
| 234 | + $search = $result_search[2] ?? []; | ||
| 235 | + foreach ($search as $vc) { | ||
| 236 | + if ((strpos($vc, 'search.php') !== false) || (strpos($vc, 'index.php') !== false)) { | ||
| 237 | + $html = str_replace($vc, '/search/', $html); | ||
| 238 | + } | ||
| 239 | + } | ||
| 240 | + | ||
| 241 | + //增加统计代码 | ||
| 242 | + $html = str_replace('</body>', '<script src="https://ecdn6.globalso.com/public/customerVisit.min.js\"></script></body>', $html); | ||
| 243 | + | ||
| 244 | + //html写入文件 | ||
| 245 | + $file_path = '/www/wwwroot/globalso-v6-c-glo/public/' . $project_site . $path; | ||
| 246 | + if (!file_exists($file_path)) { | ||
| 247 | + mkdir($file_path, 0777, true); | ||
| 248 | + } | ||
| 249 | + | ||
| 250 | + file_put_contents($file_path . 'index.html', $html); | ||
| 251 | + chmod($file_path . 'index.html', 0777); | ||
| 252 | + | ||
| 253 | + } catch (\Exception $e) { | ||
| 254 | + echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', error: ' . $e->getMessage() . PHP_EOL; | ||
| 255 | + sleep(2); | ||
| 256 | + return true; | ||
| 257 | + } | ||
| 258 | + | ||
| 259 | + | ||
| 260 | + echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', collect end' . PHP_EOL; | ||
| 261 | + } else { | ||
| 262 | + echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', no project' . PHP_EOL; | ||
| 263 | + } | ||
| 264 | + //关闭数据库 | ||
| 265 | + DB::disconnect('custom_mysql'); | ||
| 266 | + | ||
| 267 | + sleep(2); | ||
| 268 | + return true; | ||
| 269 | + } | ||
| 270 | + | ||
| 271 | + //正则匹配html资源 | ||
| 272 | + protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url) | ||
| 273 | + { | ||
| 274 | + $source = []; | ||
| 275 | + | ||
| 276 | + if (!$html) { | ||
| 277 | + return $source; | ||
| 278 | + } | ||
| 279 | + | ||
| 280 | + //image | ||
| 281 | + preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img); | ||
| 282 | + $img = $result_img[2] ?? []; | ||
| 283 | + foreach ($img as $vi) { | ||
| 284 | + $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url); | ||
| 285 | + $check_vi && $source[] = $check_vi; | ||
| 286 | + } | ||
| 287 | + | ||
| 288 | + //js | ||
| 289 | + preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js); | ||
| 290 | + $js = $result_js[2] ?? []; | ||
| 291 | + foreach ($js as $vj) { | ||
| 292 | + $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url); | ||
| 293 | + $check_vj && $source[] = $check_vj; | ||
| 294 | + } | ||
| 295 | + | ||
| 296 | + //video | ||
| 297 | + preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video); | ||
| 298 | + $video = $result_video[2] ?? []; | ||
| 299 | + foreach ($video as $vv) { | ||
| 300 | + $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url); | ||
| 301 | + $check_vv && $source[] = $check_vv; | ||
| 302 | + } | ||
| 303 | + | ||
| 304 | + //css | ||
| 305 | + preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css); | ||
| 306 | + $css = $result_css[2] ?? []; | ||
| 307 | + foreach ($css as $vc) { | ||
| 308 | + $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url); | ||
| 309 | + $check_vc && $source[] = $check_vc; | ||
| 310 | + } | ||
| 311 | + | ||
| 312 | + //css background | ||
| 313 | + preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b); | ||
| 314 | + $css_b = $result_css_b[1] ?? []; | ||
| 315 | + foreach ($css_b as $vc_b) { | ||
| 316 | + $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url); | ||
| 317 | + $check_vc_b && $source[] = $check_vc_b; | ||
| 318 | + } | ||
| 319 | + | ||
| 320 | + //a标签下载资源 | ||
| 321 | + preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a); | ||
| 322 | + $down = $result_a[2] ?? []; | ||
| 323 | + foreach ($down as $vd) { | ||
| 324 | + $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url); | ||
| 325 | + $check_vd && $source[] = $check_vd; | ||
| 326 | + } | ||
| 327 | + | ||
| 328 | + return $source; | ||
| 329 | + } | ||
| 330 | + | ||
| 331 | + //判断资源是否需要下载 | ||
| 332 | + protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url) | ||
| 333 | + { | ||
| 334 | + $url = trim($url); | ||
| 335 | + if ($url) { | ||
| 336 | + $url = str_replace('"', '', $url); | ||
| 337 | + $arr = parse_url($url); | ||
| 338 | + $scheme = $arr['scheme'] ?? ''; | ||
| 339 | + $host = $arr['host'] ?? ''; | ||
| 340 | + $path = $arr['path'] ?? ''; | ||
| 341 | + $query = $arr['query'] ?? ''; | ||
| 342 | + | ||
| 343 | + $path_arr = explode('.', $path); | ||
| 344 | + $path_end = end($path_arr); | ||
| 345 | + if ( | ||
| 346 | + (empty($scheme) || $scheme == 'https' || $scheme == 'http') | ||
| 347 | + && (empty($host) || (strpos($web_url_domain, $host) !== false) || (strpos($home_url, $host) !== false)) | ||
| 348 | + && $path | ||
| 349 | + && (substr($path, 0, 1) == '/') | ||
| 350 | + && (strpos($path, '.') !== false) | ||
| 351 | + && (strpos($path_end, 'html') === false) | ||
| 352 | + && (strpos($path_end, 'php') === false) | ||
| 353 | + && (strpos($path_end, 'com') === false) | ||
| 354 | + && (strpos($path_end, 'xml') === false) | ||
| 355 | + ) { | ||
| 356 | + $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); | ||
| 357 | + if (!$source) { | ||
| 358 | + return [ | ||
| 359 | + 'download' => true, | ||
| 360 | + 'url' => $url, | ||
| 361 | + 'url_complete' => ($scheme ?: 'https') . '://' . $domain . $path . ($query ? '?' . $query : '') | ||
| 362 | + ]; | ||
| 363 | + } else { | ||
| 364 | + return [ | ||
| 365 | + 'download' => false, | ||
| 366 | + 'url' => $url, | ||
| 367 | + 'url_complete' => $source['target'] | ||
| 368 | + ]; | ||
| 369 | + } | ||
| 370 | + } else { | ||
| 371 | + return false; | ||
| 372 | + } | ||
| 373 | + } else { | ||
| 374 | + return false; | ||
| 375 | + } | ||
| 376 | + } | ||
| 377 | + | ||
| 378 | + //下载并替换资源 | ||
| 379 | + protected function upload_source($html, $source, $project_id, $domain, $web_url_domain, $home_url) | ||
| 380 | + { | ||
| 381 | + foreach ($source as $vs) { | ||
| 382 | + | ||
| 383 | + if ($vs['download']) { | ||
| 384 | + $new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']); | ||
| 385 | + if ($new_source) { | ||
| 386 | + CollectSource::insert([ | ||
| 387 | + 'project_id' => $project_id, | ||
| 388 | + 'origin' => $vs['url'], | ||
| 389 | + 'target' => $new_source, | ||
| 390 | + 'created_at' => date('Y-m-d H:i:s'), | ||
| 391 | + 'updated_at' => date('Y-m-d H:i:s'), | ||
| 392 | + ]); | ||
| 393 | + $html = str_replace($vs['url'], getImageUrl($new_source), $html); | ||
| 394 | + | ||
| 395 | + if (substr($new_source, -3, 3) == 'css' || substr($new_source, -2, 2) == 'js') { | ||
| 396 | + | ||
| 397 | + $source_html = curl_c(getImageUrl($new_source), false); | ||
| 398 | + | ||
| 399 | + if (substr($new_source, -3, 3) == 'css') { | ||
| 400 | + preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $source_html, $result_source); | ||
| 401 | + } else { | ||
| 402 | + preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source); | ||
| 403 | + } | ||
| 404 | + | ||
| 405 | + $js_css_source = $result_source[1] ?? []; | ||
| 406 | + if ($js_css_source) { | ||
| 407 | + foreach ($js_css_source as $vjs) { | ||
| 408 | + if (strpos($vjs, 'URL:"') !== false) { | ||
| 409 | + $vjs = substr($vjs, strpos($vjs, 'URL:"') + 5); | ||
| 410 | + } | ||
| 411 | + | ||
| 412 | + $vjs_down = str_replace('"', '', $vjs); | ||
| 413 | + if (strpos($vjs_down, 'data:') !== false) { | ||
| 414 | + //过滤二进制文件 | ||
| 415 | + continue; | ||
| 416 | + } | ||
| 417 | + if (strlen($vjs_down) > 255) { | ||
| 418 | + //过滤太长文件 | ||
| 419 | + continue; | ||
| 420 | + } | ||
| 421 | + | ||
| 422 | + $vjs_down_arr = parse_url($vjs_down); | ||
| 423 | + $vjs_down_host = $vjs_down_arr['host'] ?? ''; | ||
| 424 | + | ||
| 425 | + $cos = config('filesystems.disks.cos'); | ||
| 426 | + $cosCdn = $cos['cdn']; | ||
| 427 | + | ||
| 428 | + if ($vjs_down_host && $vjs_down_host == $cosCdn) { | ||
| 429 | + //过滤已经下载的 | ||
| 430 | + continue; | ||
| 431 | + } | ||
| 432 | + | ||
| 433 | + if (empty($vjs_down_host) && substr($vjs_down, 0, 1) != '/') { | ||
| 434 | + //相对路径 | ||
| 435 | + $url_arr = explode('/', $vs['url']); | ||
| 436 | + $url_arr[count($url_arr) - 1] = $vjs_down; | ||
| 437 | + $vjs_down = implode('/', $url_arr); | ||
| 438 | + } | ||
| 439 | + | ||
| 440 | + $vjs_result = $this->url_check($vjs_down, $project_id, $domain, $web_url_domain, $home_url); | ||
| 441 | + if (!$vjs_result) { | ||
| 442 | + continue; | ||
| 443 | + } | ||
| 444 | + | ||
| 445 | + if ($vjs_result['download']) { | ||
| 446 | + $new_vjs = CosService::uploadRemote($project_id, 'source', $vjs_result['url_complete']); | ||
| 447 | + if ($new_vjs) { | ||
| 448 | + CollectSource::insert([ | ||
| 449 | + 'project_id' => $project_id, | ||
| 450 | + 'origin' => $vjs_result['url'], | ||
| 451 | + 'target' => $new_vjs, | ||
| 452 | + 'created_at' => date('Y-m-d H:i:s'), | ||
| 453 | + 'updated_at' => date('Y-m-d H:i:s'), | ||
| 454 | + ]); | ||
| 455 | + $source_html = str_replace($vjs, getImageUrl($new_vjs), $source_html); | ||
| 456 | + } | ||
| 457 | + } else { | ||
| 458 | + $source_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $source_html); | ||
| 459 | + } | ||
| 460 | + } | ||
| 461 | + | ||
| 462 | + CosService::uploadRemote($project_id, 'source', $new_source, $new_source, $source_html); | ||
| 463 | + } | ||
| 464 | + } | ||
| 465 | + } | ||
| 466 | + } else { | ||
| 467 | + $html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html); | ||
| 468 | + } | ||
| 469 | + } | ||
| 470 | + | ||
| 471 | + return $html; | ||
| 472 | + } | ||
| 473 | +} |
-
请 注册 或 登录 后发表评论