Merge branch 'master' of http://47.244.231.31:8099/zhl/globalso-v6
正在显示
2 个修改的文件
包含
18 行增加
和
4 行删除
| @@ -79,7 +79,13 @@ class ProjectImport extends Command | @@ -79,7 +79,13 @@ class ProjectImport extends Command | ||
| 79 | //读取csv文件 | 79 | //读取csv文件 |
| 80 | $line_of_text = []; | 80 | $line_of_text = []; |
| 81 | try { | 81 | try { |
| 82 | - $file_handle = fopen($task->file_url, 'r'); | 82 | + $opts = [ |
| 83 | + 'http' => [ | ||
| 84 | + 'method' => 'GET', | ||
| 85 | + 'header' => 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246' | ||
| 86 | + ] | ||
| 87 | + ]; | ||
| 88 | + $file_handle = fopen($task->file_url, 'r', null, stream_context_create($opts)); | ||
| 83 | while (!feof($file_handle)) { | 89 | while (!feof($file_handle)) { |
| 84 | $line_of_text[] = fgetcsv($file_handle, 0, ','); | 90 | $line_of_text[] = fgetcsv($file_handle, 0, ','); |
| 85 | } | 91 | } |
| @@ -183,7 +189,7 @@ class ProjectImport extends Command | @@ -183,7 +189,7 @@ class ProjectImport extends Command | ||
| 183 | protected function get_code_type($file) | 189 | protected function get_code_type($file) |
| 184 | { | 190 | { |
| 185 | $list = array('GBK', 'UTF-8'); | 191 | $list = array('GBK', 'UTF-8'); |
| 186 | - $str = curl_c($file,false); | 192 | + $str = curl_c($file, false); |
| 187 | foreach ($list as $item) { | 193 | foreach ($list as $item) { |
| 188 | $tmp = mb_convert_encoding($str, $item, $item); | 194 | $tmp = mb_convert_encoding($str, $item, $item); |
| 189 | if (md5($tmp) == md5($str)) { | 195 | if (md5($tmp) == md5($str)) { |
| @@ -41,7 +41,7 @@ class HtmlCollect extends Command | @@ -41,7 +41,7 @@ class HtmlCollect extends Command | ||
| 41 | public function handle() | 41 | public function handle() |
| 42 | { | 42 | { |
| 43 | while (true) { | 43 | while (true) { |
| 44 | - $this->start_collect(); | 44 | + $this->start_collect(); |
| 45 | } | 45 | } |
| 46 | } | 46 | } |
| 47 | 47 | ||
| @@ -268,6 +268,14 @@ class HtmlCollect extends Command | @@ -268,6 +268,14 @@ class HtmlCollect extends Command | ||
| 268 | $check_vc_b && $source[] = $check_vc_b; | 268 | $check_vc_b && $source[] = $check_vc_b; |
| 269 | } | 269 | } |
| 270 | 270 | ||
| 271 | + //a标签下载资源 | ||
| 272 | + preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a); | ||
| 273 | + $down = $result_a[2] ?? []; | ||
| 274 | + foreach ($down as $vd) { | ||
| 275 | + $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url); | ||
| 276 | + $check_vd && $source[] = $check_vd; | ||
| 277 | + } | ||
| 278 | + | ||
| 271 | return $source; | 279 | return $source; |
| 272 | } | 280 | } |
| 273 | 281 | ||
| @@ -286,7 +294,7 @@ class HtmlCollect extends Command | @@ -286,7 +294,7 @@ class HtmlCollect extends Command | ||
| 286 | (empty($host) || $host == $web_url_domain || $host == $home_url) | 294 | (empty($host) || $host == $web_url_domain || $host == $home_url) |
| 287 | && $path | 295 | && $path |
| 288 | && (strpos($path, '.') !== false) | 296 | && (strpos($path, '.') !== false) |
| 289 | - && (end($path_arr) != 'html') | 297 | + && (!in_array(end($path_arr), ['html', 'com', 'xml'])) |
| 290 | ) { | 298 | ) { |
| 291 | $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); | 299 | $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); |
| 292 | if (!$source) { | 300 | if (!$source) { |
-
请 注册 或 登录 后发表评论