正在显示
1 个修改的文件
包含
10 行增加
和
2 行删除
| @@ -41,7 +41,7 @@ class HtmlCollect extends Command | @@ -41,7 +41,7 @@ class HtmlCollect extends Command | ||
| 41 | public function handle() | 41 | public function handle() |
| 42 | { | 42 | { |
| 43 | while (true) { | 43 | while (true) { |
| 44 | - $this->start_collect(); | 44 | + $this->start_collect(); |
| 45 | } | 45 | } |
| 46 | } | 46 | } |
| 47 | 47 | ||
| @@ -268,6 +268,14 @@ class HtmlCollect extends Command | @@ -268,6 +268,14 @@ class HtmlCollect extends Command | ||
| 268 | $check_vc_b && $source[] = $check_vc_b; | 268 | $check_vc_b && $source[] = $check_vc_b; |
| 269 | } | 269 | } |
| 270 | 270 | ||
| 271 | + //a标签下载资源 | ||
| 272 | + preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a); | ||
| 273 | + $down = $result_a[2] ?? []; | ||
| 274 | + foreach ($down as $vd) { | ||
| 275 | + $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url); | ||
| 276 | + $check_vd && $source[] = $check_vd; | ||
| 277 | + } | ||
| 278 | + | ||
| 271 | return $source; | 279 | return $source; |
| 272 | } | 280 | } |
| 273 | 281 | ||
| @@ -286,7 +294,7 @@ class HtmlCollect extends Command | @@ -286,7 +294,7 @@ class HtmlCollect extends Command | ||
| 286 | (empty($host) || $host == $web_url_domain || $host == $home_url) | 294 | (empty($host) || $host == $web_url_domain || $host == $home_url) |
| 287 | && $path | 295 | && $path |
| 288 | && (strpos($path, '.') !== false) | 296 | && (strpos($path, '.') !== false) |
| 289 | - && (end($path_arr) != 'html') | 297 | + && (!in_array(end($path_arr), ['html', 'com', 'xml'])) |
| 290 | ) { | 298 | ) { |
| 291 | $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); | 299 | $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); |
| 292 | if (!$source) { | 300 | if (!$source) { |
-
请 注册 或 登录 后发表评论