|
|
|
1
|
+<?php
|
|
|
|
2
|
+
|
|
|
|
3
|
+namespace App\Console\Commands\Update;
|
|
|
|
4
|
+
|
|
|
|
5
|
+use App\Models\Collect\CollectSource;
|
|
|
|
6
|
+use App\Models\Com\UpdateOldInfo;
|
|
|
|
7
|
+use App\Services\CosService;
|
|
|
|
8
|
+use App\Services\ProjectServer;
|
|
|
|
9
|
+use Illuminate\Console\Command;
|
|
|
|
10
|
+use Illuminate\Support\Facades\DB;
|
|
|
|
11
|
+
|
|
|
|
12
|
+/**
|
|
|
|
13
|
+ * 4.0,5.0升级到6.0,主站自定义页面采集
|
|
|
|
14
|
+ * Class ProjectImport
|
|
|
|
15
|
+ * @package App\Console\Commands
|
|
|
|
16
|
+ * @author Akun
|
|
|
|
17
|
+ * @date 2023/12/13 14:44
|
|
|
|
18
|
+ */
|
|
|
|
19
|
+class HtmlCustomCollect extends Command
|
|
|
|
20
|
+{
|
|
|
|
21
|
+ /**
|
|
|
|
22
|
+ * The name and signature of the console command.
|
|
|
|
23
|
+ *
|
|
|
|
24
|
+ * @var string
|
|
|
|
25
|
+ */
|
|
|
|
26
|
+ protected $signature = 'project_html_custom_collect';
|
|
|
|
27
|
+
|
|
|
|
28
|
+ /**
|
|
|
|
29
|
+ * The console command description.
|
|
|
|
30
|
+ *
|
|
|
|
31
|
+ * @var string
|
|
|
|
32
|
+ */
|
|
|
|
33
|
+ protected $description = '执行项目自定义html页面采集';
|
|
|
|
34
|
+
|
|
|
|
35
|
+
|
|
|
|
36
|
+ public function handle()
|
|
|
|
37
|
+ {
|
|
|
|
38
|
+ ini_set('memory_limit', '512M');
|
|
|
|
39
|
+
|
|
|
|
40
|
+ // $project_id = 437;
|
|
|
|
41
|
+ // $project_site = 'v6-1500k.globalso.site';
|
|
|
|
42
|
+ // $pages = [
|
|
|
|
43
|
+ // 'https://www.tourletent.com/project/b300-glamping-tent-in-china/',
|
|
|
|
44
|
+ // 'https://www.tourletent.com/project/lotus-bell-tent-in-australia/',
|
|
|
|
45
|
+ // 'https://www.tourletent.com/project/luxury-resort-in-china/',
|
|
|
|
46
|
+ // 'https://www.tourletent.com/project/canvas-safari-tent-inthailand/',
|
|
|
|
47
|
+ // 'https://www.tourletent.com/project/safari-tent-for-m8-in-mexcio/',
|
|
|
|
48
|
+ // 'https://www.tourletent.com/project/9m-pvc-dome-tent-in-canada/',
|
|
|
|
49
|
+ // 'https://www.tourletent.com/project/c900-hotel-tent-in-korea/',
|
|
|
|
50
|
+ // 'https://www.tourletent.com/project/safari-tent-in-guizhou/',
|
|
|
|
51
|
+ // 'https://www.tourletent.com/project/dome-tent-in-austin/',
|
|
|
|
52
|
+ // 'https://www.tourletent.com/project/safari-tent-in-italy/',
|
|
|
|
53
|
+ // 'https://www.tourletent.com/project/glass-igloo-in-china/',
|
|
|
|
54
|
+ // 'https://www.tourletent.com/project/tree-house-in-sichuan/'
|
|
|
|
55
|
+ // ];
|
|
|
|
56
|
+
|
|
|
|
57
|
+ // $project_id = 517;
|
|
|
|
58
|
+ // $project_site = 'v6-1gee9.globalso.site';
|
|
|
|
59
|
+ // $pages = [
|
|
|
|
60
|
+ // 'https://www.beifa.group/help/send-results/',
|
|
|
|
61
|
+ // 'https://www.beifa.group/help/terms-of-use/',
|
|
|
|
62
|
+ // 'https://www.beifa.group/help/position-3/',
|
|
|
|
63
|
+ // 'https://www.beifa.group/help/position-2/',
|
|
|
|
64
|
+ // 'https://www.beifa.group/help/position-1/',
|
|
|
|
65
|
+ // 'https://www.beifa.group/help/social-media/',
|
|
|
|
66
|
+ // 'https://www.beifa.group/help/globle-exibition-2/',
|
|
|
|
67
|
+ // 'https://www.beifa.group/help/job/',
|
|
|
|
68
|
+ // 'https://www.beifa.group/help/vr/',
|
|
|
|
69
|
+ // 'https://www.beifa.group/help/on-live-video/',
|
|
|
|
70
|
+ // 'https://www.beifa.group/help/honor/',
|
|
|
|
71
|
+ // 'https://www.beifa.group/help/certification/',
|
|
|
|
72
|
+ // 'https://www.beifa.group/help/quanity-control/',
|
|
|
|
73
|
+ // 'https://www.beifa.group/help/testing-center/',
|
|
|
|
74
|
+ // 'https://www.beifa.group/help/rd/',
|
|
|
|
75
|
+ // 'https://www.beifa.group/help/design-trend/',
|
|
|
|
76
|
+ // 'https://www.beifa.group/help/partner/',
|
|
|
|
77
|
+ // 'https://www.beifa.group/help/social-responsibility/',
|
|
|
|
78
|
+ // 'https://www.beifa.group/help/contact/',
|
|
|
|
79
|
+ // ];
|
|
|
|
80
|
+
|
|
|
|
81
|
+// $project_id = 546;
|
|
|
|
82
|
+// $project_site = 'v6-kx260.globalso.site';
|
|
|
|
83
|
+// $pages = [
|
|
|
|
84
|
+// 'https://www.grechofiberglass.com/success_stories/',
|
|
|
|
85
|
+// 'https://www.grechofiberglass.com/success_stories/achieving-transformative-improvements-for-polyurethane-exterior-insulation-panels-in-france/',
|
|
|
|
86
|
+// 'https://www.grechofiberglass.com/success_stories/grechos-fiberglass-rebar-revolutionizing-canadian-construction-projects-with-unparalleled-quality/',
|
|
|
|
87
|
+// 'https://www.grechofiberglass.com/success_stories/customer-from-the-uk-purchases-fiberglass-coated-mats-for-plasterboards-from-grecho/',
|
|
|
|
88
|
+// 'https://www.grechofiberglass.com/success_stories/supplying-carbon-fiber-to-australian-surfboard-manufacturer/',
|
|
|
|
89
|
+// 'https://www.grechofiberglass.com/success_stories/300g-chopped-strand-mat-for-composite-slates-shipped-to-malaysia/',
|
|
|
|
90
|
+// 'https://www.grechofiberglass.com/success_stories/fiberglass-roving-for-pipewater-tank-shipped-to-russia/',
|
|
|
|
91
|
+// 'https://www.grechofiberglass.com/success_stories/fiberglass-fleece-for-acoustic-ceiling-shipped-to-russia/',
|
|
|
|
92
|
+// 'https://www.grechofiberglass.com/success_stories/600g-fiberglass-aluminum-foil-cloth-shipped-to-australia-for-pipe-heat-shielding/',
|
|
|
|
93
|
+// 'https://www.grechofiberglass.com/success_stories/shipping-our-first-truck-of-fiberglass-tissue-in-2022/',
|
|
|
|
94
|
+// ];
|
|
|
|
95
|
+
|
|
|
|
96
|
+ // $project_id = 586;
|
|
|
|
97
|
+ // $project_site = 'v6-m605x.globalso.site';
|
|
|
|
98
|
+ // $pages = [
|
|
|
|
99
|
+ // 'https://www.citymax-group.com/case/',
|
|
|
|
100
|
+ // 'https://www.citymax-group.com/case_catalog/crop-classification/',
|
|
|
|
101
|
+ // 'https://www.citymax-group.com/case_catalog/field-crops/',
|
|
|
|
102
|
+ // 'https://www.citymax-group.com/case_catalog/fruits/',
|
|
|
|
103
|
+ // 'https://www.citymax-group.com/case_catalog/vegetables/',
|
|
|
|
104
|
+ // 'https://www.citymax-group.com/case/report-on-use-of-citymax-products-on-grapes-2/',
|
|
|
|
105
|
+ // 'https://www.citymax-group.com/case/report-on-use-of-citymax-products-on-cucumber/',
|
|
|
|
106
|
+ // 'https://www.citymax-group.com/case/field-experiment-crop-lettuce/',
|
|
|
|
107
|
+ // ];
|
|
|
|
108
|
+
|
|
|
|
109
|
+ $project_id = 626;
|
|
|
|
110
|
+ $project_site = 'v6-m342g.globalso.site';
|
|
|
|
111
|
+ $pages = [
|
|
|
|
112
|
+ 'https://a574.goodao.net/project_catalog/project/',
|
|
|
|
113
|
+ 'https://a574.goodao.net/project_catalog/project/page/2/',
|
|
|
|
114
|
+// 'https://www.lecusostreetlight.com/project/560pcs-250w-smart-led-street-light-in-manila-city-philippines/',
|
|
|
|
115
|
+// 'https://www.lecusostreetlight.com/project/3200pcs-8m-150w-solar-street-light-in-cebu-philippines/',
|
|
|
|
116
|
+// 'https://lecusostreetlight.com/project/170pcs-100w-split-lithium-battery-solar-street-light-in-tanzania/',
|
|
|
|
117
|
+// 'https://www.lecusostreetlight.com/project/250pcs-40w-sl-series-solar-street-light-in-kuwait/',
|
|
|
|
118
|
+// 'https://www.lecusostreetlight.com/project/272pcs-8m-80w-solar-street-light-in-tanzania/',
|
|
|
|
119
|
+// 'https://www.lecusostreetlight.com/project/185pcs-10m-120w-highway-solar-street-light-in-jordan/',
|
|
|
|
120
|
+// 'https://www.lecusostreetlight.com/project/270pcs-9m-patterned-decorative-light-pole-with-150w-led-cobra-light-in-cambodia/',
|
|
|
|
121
|
+// 'https://www.lecusostreetlight.com/project/48pcs-5m-24w-decorative-aluminium-pole-in-dubai-uae/',
|
|
|
|
122
|
+// 'https://www.lecusostreetlight.com/project/105pcs-9m-100w-led-street-light-in-sri-lanka/',
|
|
|
|
123
|
+// 'https://www.lecusostreetlight.com/project/45pcs-6m-hot-dip-galvanized-double-arm-street-light-pole-in-dubai-uae/',
|
|
|
|
124
|
+// 'https://www.lecusostreetlight.com/project/356pcs-8m-100w-solar-street-light-in-ethiopia/',
|
|
|
|
125
|
+// 'https://www.lecusostreetlight.com/project/52pcs-6m-30w-solar-led-street-light-with-gel-battery-in-poland/',
|
|
|
|
126
|
+// 'https://www.lecusostreetlight.com/project/225pcs-6m-80w-solar-street-light-in-vietnam/',
|
|
|
|
127
|
+// 'https://www.lecusostreetlight.com/project/450pcs-7m-60w-double-arm-solar-light-in-nigeria/',
|
|
|
|
128
|
+// 'https://www.lecusostreetlight.com/project/100pcs-6m-50w-led-street-light-in-maldives/',
|
|
|
|
129
|
+// 'https://www.lecusostreetlight.com/project/202pcs-6m-40w-3000k-solar-street-light-in-manila-philippines/',
|
|
|
|
130
|
+// 'https://www.lecusostreetlight.com/project/245pcs-120w-smart-led-street-light-in-bangkok-thailand/',
|
|
|
|
131
|
+// 'https://www.lecusostreetlight.com/project/170pcs-7m-50w-all-in-one-solar-light-in-davao-philippines/',
|
|
|
|
132
|
+// 'https://www.lecusostreetlight.com/project/80pcs-9m-150w-led-street-light-in-iraq/',
|
|
|
|
133
|
+// 'https://www.lecusostreetlight.com/project/252pcs-6m-40w-separate-solar-street-light-with-lithium-battery-in-thailand/',
|
|
|
|
134
|
+// 'https://www.lecusostreetlight.com/project/198pcs-8m-80w-zc-series-all-in-two-solar-light-in-philippines/',
|
|
|
|
135
|
+// 'https://www.lecusostreetlight.com/project/5m-30w-morden-led-garden-light-in-russian/',
|
|
|
|
136
|
+// 'https://www.lecusostreetlight.com/project/135pcs-all-in-one-solar-street-light-in-uae-dubai-park/'
|
|
|
|
137
|
+ ];
|
|
|
|
138
|
+
|
|
|
|
139
|
+ // $project_id = 633;
|
|
|
|
140
|
+ // $project_site = 'v6-ke5nz.globalso.site';
|
|
|
|
141
|
+ // $pages = [
|
|
|
|
142
|
+ // 'https://www.mach-sales.com/case_catalog/cases/',
|
|
|
|
143
|
+ // 'https://www.mach-sales.com/case_catalog/cases/page/2/',
|
|
|
|
144
|
+ // 'https://www.mach-sales.com/case_catalog/cases/page/3/',
|
|
|
|
145
|
+ // 'https://www.mach-sales.com/case/growing-with-customers-from-small-motors-to-big-drivers/',
|
|
|
|
146
|
+ // 'https://www.mach-sales.com/case/growing-together-with-customers-the-journey-of-intelligent-manufacturing-in-a-modernized-factory/',
|
|
|
|
147
|
+ // 'https://www.mach-sales.com/case/%e3%80%90growing-together-with-customers%e3%80%91-the-evolutionary-journey-of-a-baking-brand/',
|
|
|
|
148
|
+ // 'https://www.mach-sales.com/case/sumecs-footprints-in-belt-and-road-singapore/',
|
|
|
|
149
|
+ // 'https://www.mach-sales.com/case/sumecs-footprints-in-belt-and-road-southeast-asia/',
|
|
|
|
150
|
+ // 'https://www.mach-sales.com/case/naming-and-delivery-of-a-new-ship-1/',
|
|
|
|
151
|
+ // 'https://www.mach-sales.com/case/a-newly-signed-contract-cable-manufacturing-equipment/',
|
|
|
|
152
|
+ // 'https://www.mach-sales.com/case/its-amazing-that-the-glass-can-also-save-energy/',
|
|
|
|
153
|
+ // 'https://www.mach-sales.com/case/%e3%80%90grow-with-customers%e3%80%91better-service-for-papermaking-equipment-procurement/',
|
|
|
|
154
|
+ // 'https://www.mach-sales.com/case/expansion-of-blower-equipment-contributes-to-environmental-protection/',
|
|
|
|
155
|
+ // 'https://www.mach-sales.com/case/new-ship-type-new-contract/',
|
|
|
|
156
|
+ // 'https://www.mach-sales.com/case/new-arrival-introducing-the-latest-ship-model/',
|
|
|
|
157
|
+ // 'https://www.mach-sales.com/case/another-contract-signed-in-the-philippines/',
|
|
|
|
158
|
+ // 'https://www.mach-sales.com/case/the-road-to-going-global-is-supported-by-sumec-services/',
|
|
|
|
159
|
+ // 'https://www.mach-sales.com/case/sumec-textile-launches-sun-protection-series-in-collaboration-with-skechers-kids/',
|
|
|
|
160
|
+ // 'https://www.mach-sales.com/case/footwear-manufacturing-equipment-new-contract/',
|
|
|
|
161
|
+ // 'https://www.mach-sales.com/case/sumec-energy-company-has-successfully-signed-a-photovoltaic-component-supply-agreement-with-wattkraft-a-german-engineering-system-integrator/',
|
|
|
|
162
|
+ // 'https://www.mach-sales.com/case/sumec-technology-company-successfully-signed-a-contract-for-the-equipment-related-to-the-high-strength-particleboard-project/',
|
|
|
|
163
|
+ // 'https://www.mach-sales.com/case/%e3%80%90growing-together-with-our-customers%e3%80%91together-on-the-road-to-transformation/',
|
|
|
|
164
|
+ // 'https://www.mach-sales.com/case/new-signing/',
|
|
|
|
165
|
+ // 'https://www.mach-sales.com/case/complete-the-last-mile-equipment-procurement-from-around-the-world/',
|
|
|
|
166
|
+ // 'https://www.mach-sales.com/case/science-and-technology-to-rejuvenate-agriculture-this-plant-factory-is-not-simple/',
|
|
|
|
167
|
+ // 'https://www.mach-sales.com/case/new-cooperation-high-end-equipment-going-global/',
|
|
|
|
168
|
+ // 'https://www.mach-sales.com/case/a-new-cooperation-with-an-annual-output-of-40000-tons/',
|
|
|
|
169
|
+ // 'https://www.mach-sales.com/case/a-new-contract-cooperation-upgrades/',
|
|
|
|
170
|
+ // 'https://www.mach-sales.com/case/annual-output-of-50000-tons-this-bopp-film-production-line-was-officially-put-into-operation/',
|
|
|
|
171
|
+ // 'https://www.mach-sales.com/case/this-is-sumec-speed/',
|
|
|
|
172
|
+ // 'https://www.mach-sales.com/case/the-first-order-of-a-new-semiconductor-brand-equipment-direct-sales-bear-fruit-again/',
|
|
|
|
173
|
+ // 'https://www.mach-sales.com/case/sumec-touch-world-celebrates-its-5th-anniversary-with-well-known-suppliers/',
|
|
|
|
174
|
+ // 'https://www.mach-sales.com/case/from-traditional-agency-to-digital-service/',
|
|
|
|
175
|
+ // 'https://www.mach-sales.com/case/cooperation-win-win-and-starting-anew-sumec-creates-a-new-sample-of-strong-enterprise-cooperation/'
|
|
|
|
176
|
+ // ];
|
|
|
|
177
|
+
|
|
|
|
178
|
+ foreach ($pages as $page) {
|
|
|
|
179
|
+ $this->start_collect(urldecode($page), $project_id, $project_site);
|
|
|
|
180
|
+ }
|
|
|
|
181
|
+ }
|
|
|
|
182
|
+
|
|
|
|
183
|
+ protected function start_collect($page, $project_id, $project_site)
|
|
|
|
184
|
+ {
|
|
|
|
185
|
+ $page_arr = parse_url($page);
|
|
|
|
186
|
+ $domain = $page_arr['host'];
|
|
|
|
187
|
+ $path = $page_arr['path'];
|
|
|
|
188
|
+
|
|
|
|
189
|
+ //设置数据库
|
|
|
|
190
|
+ $project = ProjectServer::useProject($project_id);
|
|
|
|
191
|
+ if ($project) {
|
|
|
|
192
|
+ echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', collect start' . PHP_EOL;
|
|
|
|
193
|
+
|
|
|
|
194
|
+ //获取站点原始域名信息
|
|
|
|
195
|
+ $old_info = UpdateOldInfo::getOldDomain($project_id, $domain);
|
|
|
|
196
|
+
|
|
|
|
197
|
+ //采集html页面,下载资源到本地并替换
|
|
|
|
198
|
+ try {
|
|
|
|
199
|
+ $html = curl_c($page, false);
|
|
|
|
200
|
+ if ($html == '0') {
|
|
|
|
201
|
+ echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', error: no html' . PHP_EOL;
|
|
|
|
202
|
+ sleep(2);
|
|
|
|
203
|
+ return true;
|
|
|
|
204
|
+ }
|
|
|
|
205
|
+
|
|
|
|
206
|
+ //如果有base64图片,先替换掉,再进行资源匹配
|
|
|
|
207
|
+ $new_html = $html;
|
|
|
|
208
|
+ preg_match_all("/data:([^;]*);base64,(.*)?\"/", $new_html, $result_img);
|
|
|
|
209
|
+ $img_base64 = $result_img[2] ?? [];
|
|
|
|
210
|
+ foreach ($img_base64 as $v64) {
|
|
|
|
211
|
+ $new_html = str_replace($v64, '', $new_html);
|
|
|
|
212
|
+ }
|
|
|
|
213
|
+
|
|
|
|
214
|
+ //匹配资源链接
|
|
|
|
215
|
+ $source_list = $this->html_preg($new_html, $project_id, $domain, $old_info['web_url_domain'], $old_info['home_url']);
|
|
|
|
216
|
+
|
|
|
|
217
|
+ //下载资源
|
|
|
|
218
|
+ if ($source_list) {
|
|
|
|
219
|
+ $html = $this->upload_source($html, $source_list, $project_id, $domain, $old_info['web_url_domain'], $old_info['home_url']);
|
|
|
|
220
|
+ }
|
|
|
|
221
|
+
|
|
|
|
222
|
+ //替换域名
|
|
|
|
223
|
+ $html = str_replace("http://" . $old_info['web_url_domain'], "", $html);
|
|
|
|
224
|
+ $html = str_replace("https://" . $old_info['web_url_domain'], "", $html);
|
|
|
|
225
|
+ $html = str_replace("http://" . $old_info['home_url'], "", $html);
|
|
|
|
226
|
+ $html = str_replace("https://" . $old_info['home_url'], "", $html);
|
|
|
|
227
|
+
|
|
|
|
228
|
+// //暂时隐藏小语种
|
|
|
|
229
|
+// $html = str_replace('<div class="change-language ensemble">', '<div class="change-language ensemble" style="display: none">', $html);
|
|
|
|
230
|
+// $html = str_replace('<div class="language_more">', '<div class="language_more" style="display: none">', $html);
|
|
|
|
231
|
+
|
|
|
|
232
|
+ //处理搜索
|
|
|
|
233
|
+ preg_match_all('/<form\s+[^>]*?action\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_search);
|
|
|
|
234
|
+ $search = $result_search[2] ?? [];
|
|
|
|
235
|
+ foreach ($search as $vc) {
|
|
|
|
236
|
+ if ((strpos($vc, 'search.php') !== false) || (strpos($vc, 'index.php') !== false)) {
|
|
|
|
237
|
+ $html = str_replace($vc, '/search/', $html);
|
|
|
|
238
|
+ }
|
|
|
|
239
|
+ }
|
|
|
|
240
|
+
|
|
|
|
241
|
+ //增加统计代码
|
|
|
|
242
|
+ $html = str_replace('</body>', '<script src="https://ecdn6.globalso.com/public/customerVisit.min.js\"></script></body>', $html);
|
|
|
|
243
|
+
|
|
|
|
244
|
+ //html写入文件
|
|
|
|
245
|
+ $file_path = '/www/wwwroot/globalso-v6-c-glo/public/' . $project_site . $path;
|
|
|
|
246
|
+ if (!file_exists($file_path)) {
|
|
|
|
247
|
+ mkdir($file_path, 0777, true);
|
|
|
|
248
|
+ }
|
|
|
|
249
|
+
|
|
|
|
250
|
+ file_put_contents($file_path . 'index.html', $html);
|
|
|
|
251
|
+ chmod($file_path . 'index.html', 0777);
|
|
|
|
252
|
+
|
|
|
|
253
|
+ } catch (\Exception $e) {
|
|
|
|
254
|
+ echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', error: ' . $e->getMessage() . PHP_EOL;
|
|
|
|
255
|
+ sleep(2);
|
|
|
|
256
|
+ return true;
|
|
|
|
257
|
+ }
|
|
|
|
258
|
+
|
|
|
|
259
|
+
|
|
|
|
260
|
+ echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', collect end' . PHP_EOL;
|
|
|
|
261
|
+ } else {
|
|
|
|
262
|
+ echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', page: ' . $page . ', no project' . PHP_EOL;
|
|
|
|
263
|
+ }
|
|
|
|
264
|
+ //关闭数据库
|
|
|
|
265
|
+ DB::disconnect('custom_mysql');
|
|
|
|
266
|
+
|
|
|
|
267
|
+ sleep(2);
|
|
|
|
268
|
+ return true;
|
|
|
|
269
|
+ }
|
|
|
|
270
|
+
|
|
|
|
271
|
+ //正则匹配html资源
|
|
|
|
272
|
+ protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
|
|
|
|
273
|
+ {
|
|
|
|
274
|
+ $source = [];
|
|
|
|
275
|
+
|
|
|
|
276
|
+ if (!$html) {
|
|
|
|
277
|
+ return $source;
|
|
|
|
278
|
+ }
|
|
|
|
279
|
+
|
|
|
|
280
|
+ //image
|
|
|
|
281
|
+ preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
|
|
|
|
282
|
+ $img = $result_img[2] ?? [];
|
|
|
|
283
|
+ foreach ($img as $vi) {
|
|
|
|
284
|
+ $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url);
|
|
|
|
285
|
+ $check_vi && $source[] = $check_vi;
|
|
|
|
286
|
+ }
|
|
|
|
287
|
+
|
|
|
|
288
|
+ //js
|
|
|
|
289
|
+ preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js);
|
|
|
|
290
|
+ $js = $result_js[2] ?? [];
|
|
|
|
291
|
+ foreach ($js as $vj) {
|
|
|
|
292
|
+ $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url);
|
|
|
|
293
|
+ $check_vj && $source[] = $check_vj;
|
|
|
|
294
|
+ }
|
|
|
|
295
|
+
|
|
|
|
296
|
+ //video
|
|
|
|
297
|
+ preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
|
|
|
|
298
|
+ $video = $result_video[2] ?? [];
|
|
|
|
299
|
+ foreach ($video as $vv) {
|
|
|
|
300
|
+ $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url);
|
|
|
|
301
|
+ $check_vv && $source[] = $check_vv;
|
|
|
|
302
|
+ }
|
|
|
|
303
|
+
|
|
|
|
304
|
+ //css
|
|
|
|
305
|
+ preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css);
|
|
|
|
306
|
+ $css = $result_css[2] ?? [];
|
|
|
|
307
|
+ foreach ($css as $vc) {
|
|
|
|
308
|
+ $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url);
|
|
|
|
309
|
+ $check_vc && $source[] = $check_vc;
|
|
|
|
310
|
+ }
|
|
|
|
311
|
+
|
|
|
|
312
|
+ //css background
|
|
|
|
313
|
+ preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b);
|
|
|
|
314
|
+ $css_b = $result_css_b[1] ?? [];
|
|
|
|
315
|
+ foreach ($css_b as $vc_b) {
|
|
|
|
316
|
+ $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url);
|
|
|
|
317
|
+ $check_vc_b && $source[] = $check_vc_b;
|
|
|
|
318
|
+ }
|
|
|
|
319
|
+
|
|
|
|
320
|
+ //a标签下载资源
|
|
|
|
321
|
+ preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a);
|
|
|
|
322
|
+ $down = $result_a[2] ?? [];
|
|
|
|
323
|
+ foreach ($down as $vd) {
|
|
|
|
324
|
+ $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url);
|
|
|
|
325
|
+ $check_vd && $source[] = $check_vd;
|
|
|
|
326
|
+ }
|
|
|
|
327
|
+
|
|
|
|
328
|
+ return $source;
|
|
|
|
329
|
+ }
|
|
|
|
330
|
+
|
|
|
|
331
|
+ //判断资源是否需要下载
|
|
|
|
332
|
+ protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
|
|
|
|
333
|
+ {
|
|
|
|
334
|
+ $url = trim($url);
|
|
|
|
335
|
+ if ($url) {
|
|
|
|
336
|
+ $url = str_replace('"', '', $url);
|
|
|
|
337
|
+ $arr = parse_url($url);
|
|
|
|
338
|
+ $scheme = $arr['scheme'] ?? '';
|
|
|
|
339
|
+ $host = $arr['host'] ?? '';
|
|
|
|
340
|
+ $path = $arr['path'] ?? '';
|
|
|
|
341
|
+ $query = $arr['query'] ?? '';
|
|
|
|
342
|
+
|
|
|
|
343
|
+ $path_arr = explode('.', $path);
|
|
|
|
344
|
+ $path_end = end($path_arr);
|
|
|
|
345
|
+ if (
|
|
|
|
346
|
+ (empty($scheme) || $scheme == 'https' || $scheme == 'http')
|
|
|
|
347
|
+ && (empty($host) || (strpos($web_url_domain, $host) !== false) || (strpos($home_url, $host) !== false))
|
|
|
|
348
|
+ && $path
|
|
|
|
349
|
+ && (substr($path, 0, 1) == '/')
|
|
|
|
350
|
+ && (strpos($path, '.') !== false)
|
|
|
|
351
|
+ && (strpos($path_end, 'html') === false)
|
|
|
|
352
|
+ && (strpos($path_end, 'php') === false)
|
|
|
|
353
|
+ && (strpos($path_end, 'com') === false)
|
|
|
|
354
|
+ && (strpos($path_end, 'xml') === false)
|
|
|
|
355
|
+ ) {
|
|
|
|
356
|
+ $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
|
|
|
|
357
|
+ if (!$source) {
|
|
|
|
358
|
+ return [
|
|
|
|
359
|
+ 'download' => true,
|
|
|
|
360
|
+ 'url' => $url,
|
|
|
|
361
|
+ 'url_complete' => ($scheme ?: 'https') . '://' . $domain . $path . ($query ? '?' . $query : '')
|
|
|
|
362
|
+ ];
|
|
|
|
363
|
+ } else {
|
|
|
|
364
|
+ return [
|
|
|
|
365
|
+ 'download' => false,
|
|
|
|
366
|
+ 'url' => $url,
|
|
|
|
367
|
+ 'url_complete' => $source['target']
|
|
|
|
368
|
+ ];
|
|
|
|
369
|
+ }
|
|
|
|
370
|
+ } else {
|
|
|
|
371
|
+ return false;
|
|
|
|
372
|
+ }
|
|
|
|
373
|
+ } else {
|
|
|
|
374
|
+ return false;
|
|
|
|
375
|
+ }
|
|
|
|
376
|
+ }
|
|
|
|
377
|
+
|
|
|
|
378
|
+ //下载并替换资源
|
|
|
|
379
|
+ protected function upload_source($html, $source, $project_id, $domain, $web_url_domain, $home_url)
|
|
|
|
380
|
+ {
|
|
|
|
381
|
+ foreach ($source as $vs) {
|
|
|
|
382
|
+
|
|
|
|
383
|
+ if ($vs['download']) {
|
|
|
|
384
|
+ $new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);
|
|
|
|
385
|
+ if ($new_source) {
|
|
|
|
386
|
+ CollectSource::insert([
|
|
|
|
387
|
+ 'project_id' => $project_id,
|
|
|
|
388
|
+ 'origin' => $vs['url'],
|
|
|
|
389
|
+ 'target' => $new_source,
|
|
|
|
390
|
+ 'created_at' => date('Y-m-d H:i:s'),
|
|
|
|
391
|
+ 'updated_at' => date('Y-m-d H:i:s'),
|
|
|
|
392
|
+ ]);
|
|
|
|
393
|
+ $html = str_replace($vs['url'], getImageUrl($new_source), $html);
|
|
|
|
394
|
+
|
|
|
|
395
|
+ if (substr($new_source, -3, 3) == 'css' || substr($new_source, -2, 2) == 'js') {
|
|
|
|
396
|
+
|
|
|
|
397
|
+ $source_html = curl_c(getImageUrl($new_source), false);
|
|
|
|
398
|
+
|
|
|
|
399
|
+ if (substr($new_source, -3, 3) == 'css') {
|
|
|
|
400
|
+ preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $source_html, $result_source);
|
|
|
|
401
|
+ } else {
|
|
|
|
402
|
+ preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source);
|
|
|
|
403
|
+ }
|
|
|
|
404
|
+
|
|
|
|
405
|
+ $js_css_source = $result_source[1] ?? [];
|
|
|
|
406
|
+ if ($js_css_source) {
|
|
|
|
407
|
+ foreach ($js_css_source as $vjs) {
|
|
|
|
408
|
+ if (strpos($vjs, 'URL:"') !== false) {
|
|
|
|
409
|
+ $vjs = substr($vjs, strpos($vjs, 'URL:"') + 5);
|
|
|
|
410
|
+ }
|
|
|
|
411
|
+
|
|
|
|
412
|
+ $vjs_down = str_replace('"', '', $vjs);
|
|
|
|
413
|
+ if (strpos($vjs_down, 'data:') !== false) {
|
|
|
|
414
|
+ //过滤二进制文件
|
|
|
|
415
|
+ continue;
|
|
|
|
416
|
+ }
|
|
|
|
417
|
+ if (strlen($vjs_down) > 255) {
|
|
|
|
418
|
+ //过滤太长文件
|
|
|
|
419
|
+ continue;
|
|
|
|
420
|
+ }
|
|
|
|
421
|
+
|
|
|
|
422
|
+ $vjs_down_arr = parse_url($vjs_down);
|
|
|
|
423
|
+ $vjs_down_host = $vjs_down_arr['host'] ?? '';
|
|
|
|
424
|
+
|
|
|
|
425
|
+ $cos = config('filesystems.disks.cos');
|
|
|
|
426
|
+ $cosCdn = $cos['cdn'];
|
|
|
|
427
|
+
|
|
|
|
428
|
+ if ($vjs_down_host && $vjs_down_host == $cosCdn) {
|
|
|
|
429
|
+ //过滤已经下载的
|
|
|
|
430
|
+ continue;
|
|
|
|
431
|
+ }
|
|
|
|
432
|
+
|
|
|
|
433
|
+ if (empty($vjs_down_host) && substr($vjs_down, 0, 1) != '/') {
|
|
|
|
434
|
+ //相对路径
|
|
|
|
435
|
+ $url_arr = explode('/', $vs['url']);
|
|
|
|
436
|
+ $url_arr[count($url_arr) - 1] = $vjs_down;
|
|
|
|
437
|
+ $vjs_down = implode('/', $url_arr);
|
|
|
|
438
|
+ }
|
|
|
|
439
|
+
|
|
|
|
440
|
+ $vjs_result = $this->url_check($vjs_down, $project_id, $domain, $web_url_domain, $home_url);
|
|
|
|
441
|
+ if (!$vjs_result) {
|
|
|
|
442
|
+ continue;
|
|
|
|
443
|
+ }
|
|
|
|
444
|
+
|
|
|
|
445
|
+ if ($vjs_result['download']) {
|
|
|
|
446
|
+ $new_vjs = CosService::uploadRemote($project_id, 'source', $vjs_result['url_complete']);
|
|
|
|
447
|
+ if ($new_vjs) {
|
|
|
|
448
|
+ CollectSource::insert([
|
|
|
|
449
|
+ 'project_id' => $project_id,
|
|
|
|
450
|
+ 'origin' => $vjs_result['url'],
|
|
|
|
451
|
+ 'target' => $new_vjs,
|
|
|
|
452
|
+ 'created_at' => date('Y-m-d H:i:s'),
|
|
|
|
453
|
+ 'updated_at' => date('Y-m-d H:i:s'),
|
|
|
|
454
|
+ ]);
|
|
|
|
455
|
+ $source_html = str_replace($vjs, getImageUrl($new_vjs), $source_html);
|
|
|
|
456
|
+ }
|
|
|
|
457
|
+ } else {
|
|
|
|
458
|
+ $source_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $source_html);
|
|
|
|
459
|
+ }
|
|
|
|
460
|
+ }
|
|
|
|
461
|
+
|
|
|
|
462
|
+ CosService::uploadRemote($project_id, 'source', $new_source, $new_source, $source_html);
|
|
|
|
463
|
+ }
|
|
|
|
464
|
+ }
|
|
|
|
465
|
+ }
|
|
|
|
466
|
+ } else {
|
|
|
|
467
|
+ $html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html);
|
|
|
|
468
|
+ }
|
|
|
|
469
|
+ }
|
|
|
|
470
|
+
|
|
|
|
471
|
+ return $html;
|
|
|
|
472
|
+ }
|
|
|
|
473
|
+} |