作者 lyh
... ... @@ -79,7 +79,13 @@ class ProjectImport extends Command
//读取csv文件
$line_of_text = [];
try {
$file_handle = fopen($task->file_url, 'r');
$opts = [
'http' => [
'method' => 'GET',
'header' => 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'
]
];
$file_handle = fopen($task->file_url, 'r', null, stream_context_create($opts));
while (!feof($file_handle)) {
$line_of_text[] = fgetcsv($file_handle, 0, ',');
}
... ... @@ -183,7 +189,7 @@ class ProjectImport extends Command
protected function get_code_type($file)
{
$list = array('GBK', 'UTF-8');
$str = curl_c($file,false);
$str = curl_c($file, false);
foreach ($list as $item) {
$tmp = mb_convert_encoding($str, $item, $item);
if (md5($tmp) == md5($str)) {
... ...
... ... @@ -41,7 +41,7 @@ class HtmlCollect extends Command
public function handle()
{
while (true) {
$this->start_collect();
$this->start_collect();
}
}
... ... @@ -268,6 +268,14 @@ class HtmlCollect extends Command
$check_vc_b && $source[] = $check_vc_b;
}
//a标签下载资源
preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a);
$down = $result_a[2] ?? [];
foreach ($down as $vd) {
$check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url);
$check_vd && $source[] = $check_vd;
}
return $source;
}
... ... @@ -286,7 +294,7 @@ class HtmlCollect extends Command
(empty($host) || $host == $web_url_domain || $host == $home_url)
&& $path
&& (strpos($path, '.') !== false)
&& (end($path_arr) != 'html')
&& (!in_array(end($path_arr), ['html', 'com', 'xml']))
) {
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
... ...