|
@@ -69,7 +69,7 @@ class HtmlCollect extends Command |
|
@@ -69,7 +69,7 @@ class HtmlCollect extends Command |
|
69
|
return true;
|
69
|
return true;
|
|
70
|
}
|
70
|
}
|
|
71
|
|
71
|
|
|
72
|
- echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', task_type: ' . $collect_id . ', collect start' . PHP_EOL;
|
72
|
+ echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', collect start' . PHP_EOL;
|
|
73
|
|
73
|
|
|
74
|
$collect_info->status = CollectTask::STATUS_ING;
|
74
|
$collect_info->status = CollectTask::STATUS_ING;
|
|
75
|
$collect_info->save();
|
75
|
$collect_info->save();
|
|
@@ -83,7 +83,7 @@ class HtmlCollect extends Command |
|
@@ -83,7 +83,7 @@ class HtmlCollect extends Command |
|
83
|
$html = $this->upload_source($html, $source_list, $project_id);
|
83
|
$html = $this->upload_source($html, $source_list, $project_id);
|
|
84
|
}
|
84
|
}
|
|
85
|
} catch (\Exception $e) {
|
85
|
} catch (\Exception $e) {
|
|
86
|
- echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', task_type: ' . $collect_id . ', error: ' . $e->getMessage() . PHP_EOL;
|
86
|
+ echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: ' . $e->getMessage() . PHP_EOL;
|
|
87
|
return true;
|
87
|
return true;
|
|
88
|
}
|
88
|
}
|
|
89
|
|
89
|
|
|
@@ -91,7 +91,7 @@ class HtmlCollect extends Command |
|
@@ -91,7 +91,7 @@ class HtmlCollect extends Command |
|
91
|
$collect_info->status = CollectTask::STATUS_COM;
|
91
|
$collect_info->status = CollectTask::STATUS_COM;
|
|
92
|
$collect_info->save();
|
92
|
$collect_info->save();
|
|
93
|
|
93
|
|
|
94
|
- echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', task_type: ' . $collect_id . ', collect end' . PHP_EOL;
|
94
|
+ echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', collect end' . PHP_EOL;
|
|
95
|
}
|
95
|
}
|
|
96
|
//关闭数据库
|
96
|
//关闭数据库
|
|
97
|
DB::disconnect('custom_mysql');
|
97
|
DB::disconnect('custom_mysql');
|
|
@@ -149,7 +149,7 @@ class HtmlCollect extends Command |
|
@@ -149,7 +149,7 @@ class HtmlCollect extends Command |
|
149
|
return $source;
|
149
|
return $source;
|
|
150
|
}
|
150
|
}
|
|
151
|
|
151
|
|
|
152
|
- //图片
|
152
|
+ //image
|
|
153
|
preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
|
153
|
preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
|
|
154
|
$img = $result_img[2] ?? [];
|
154
|
$img = $result_img[2] ?? [];
|
|
155
|
foreach ($img as $vi) {
|
155
|
foreach ($img as $vi) {
|
|
@@ -202,9 +202,11 @@ class HtmlCollect extends Command |
|
@@ -202,9 +202,11 @@ class HtmlCollect extends Command |
|
202
|
$host = $arr['host'] ?? '';
|
202
|
$host = $arr['host'] ?? '';
|
|
203
|
$path = $arr['path'] ?? '';
|
203
|
$path = $arr['path'] ?? '';
|
|
204
|
|
204
|
|
|
205
|
- if ((strpos($host, '.globalso.') === false)
|
|
|
|
206
|
- && (strpos($host, '.goodao.') === false)
|
|
|
|
207
|
- && $path && (strpos($path, '.') !== false)) {
|
205
|
+ if (
|
|
|
|
206
|
+ (strpos($host, '.globalso.') === false) &&
|
|
|
|
207
|
+// (strpos($host, '.goodao.') === false) &&
|
|
|
|
208
|
+ $path && (strpos($path, '.') !== false)
|
|
|
|
209
|
+ ) {
|
|
208
|
|
210
|
|
|
209
|
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
|
211
|
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
|
|
210
|
if (!$source) {
|
212
|
if (!$source) {
|
|
@@ -244,6 +246,45 @@ class HtmlCollect extends Command |
|
@@ -244,6 +246,45 @@ class HtmlCollect extends Command |
|
244
|
'updated_at' => date('Y-m-d H:i:s'),
|
246
|
'updated_at' => date('Y-m-d H:i:s'),
|
|
245
|
]);
|
247
|
]);
|
|
246
|
$html = str_replace($vs['url'], getImageUrl($new_source), $html);
|
248
|
$html = str_replace($vs['url'], getImageUrl($new_source), $html);
|
|
|
|
249
|
+
|
|
|
|
250
|
+ if (substr($new_source, -1, 3) == 'css') {
|
|
|
|
251
|
+ // 下载css文件中的资源
|
|
|
|
252
|
+ $css_html = file_get_contents($vs['url_complete']);
|
|
|
|
253
|
+ preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
|
|
|
|
254
|
+ $css_source = $result_css_source[1] ?? [];
|
|
|
|
255
|
+
|
|
|
|
256
|
+ $url_arr = explode('/', $vs['url_complete']);
|
|
|
|
257
|
+ $target_arr = explode('/', $new_source);
|
|
|
|
258
|
+ foreach ($css_source as $vcs) {
|
|
|
|
259
|
+ $vcs_arr = parse_url($vcs);
|
|
|
|
260
|
+ if (isset($vcs_arr['domain'])) {
|
|
|
|
261
|
+ //不是相对路径,不下载
|
|
|
|
262
|
+ continue;
|
|
|
|
263
|
+ }
|
|
|
|
264
|
+
|
|
|
|
265
|
+ $source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first();
|
|
|
|
266
|
+ if ($source_info) {
|
|
|
|
267
|
+ //已存在,不下载
|
|
|
|
268
|
+ continue;
|
|
|
|
269
|
+ }
|
|
|
|
270
|
+
|
|
|
|
271
|
+ $url_arr[count($url_arr) - 1] = $vcs;
|
|
|
|
272
|
+ $url_css_complete = implode('/', $url_arr);
|
|
|
|
273
|
+ $target_arr[count($target_arr) - 1] = $vcs;
|
|
|
|
274
|
+ $path = implode('/', $target_arr);
|
|
|
|
275
|
+
|
|
|
|
276
|
+ $new_source_css = CosService::uploadRemote($project_id, 'source', $url_css_complete, $path);
|
|
|
|
277
|
+ if ($new_source_css) {
|
|
|
|
278
|
+ CollectSource::insert([
|
|
|
|
279
|
+ 'project_id' => $project_id,
|
|
|
|
280
|
+ 'origin' => $vcs,
|
|
|
|
281
|
+ 'target' => $new_source_css,
|
|
|
|
282
|
+ 'created_at' => date('Y-m-d H:i:s'),
|
|
|
|
283
|
+ 'updated_at' => date('Y-m-d H:i:s'),
|
|
|
|
284
|
+ ]);
|
|
|
|
285
|
+ }
|
|
|
|
286
|
+ }
|
|
|
|
287
|
+ }
|
|
247
|
}
|
288
|
}
|
|
248
|
} else {
|
289
|
} else {
|
|
249
|
$html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html);
|
290
|
$html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html);
|