作者 刘锟

合并分支 'akun' 到 'master'

Akun



查看合并请求 !418
... ... @@ -780,6 +780,26 @@ class ProjectUpdate extends Command
$image = $item['images'] ?? '';
}
$new_img = $this->source_download($image, $project_id, $domain, $web_url_domain, $home_url);
//描述
if (isset($item['description'])) {
//匹配描述资源
$source_list = $this->html_preg($item['description'], $project_id, $domain, $web_url_domain, $home_url);
if ($source_list) {
foreach ($source_list as $vs) {
if ($vs['download']) {
//需要下载资源
$down_url = CosService::uploadRemote($project_id, 'image_product_category', $vs['url_complete']);
if ($down_url) {
$item['description'] = str_replace($vs['url'], $down_url, $item['description']);
}
} else {
//已经下载过资源
$item['description'] = str_replace($vs['url'], $vs['url_complete'], $item['description']);
}
break;
}
}
}
$parent_id = $model->addReturnId([
'project_id' => $project_id,
'title' => $item['name'],
... ... @@ -978,4 +998,101 @@ class ProjectUpdate extends Command
return $key . $i;
}
}
//正则匹配html资源
protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
{
$source = [];
if (!$html) {
return $source;
}
//image
preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
$img = $result_img[2] ?? [];
foreach ($img as $vi) {
$check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url);
if ($check_vi && (!in_array($check_vi, $source))) {
$source[] = $check_vi;
}
}
//video
preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
$video = $result_video[2] ?? [];
foreach ($video as $vv) {
$check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url);
if ($check_vv && (!in_array($check_vv, $source))) {
$source[] = $check_vv;
}
}
preg_match_all('/<video\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video_2);
$video_2 = $result_video_2[2] ?? [];
foreach ($video_2 as $vv2) {
$check_vv2 = $this->url_check($vv2, $project_id, $domain, $web_url_domain, $home_url);
if ($check_vv2 && (!in_array($check_vv2, $source))) {
$source[] = $check_vv2;
}
}
//a标签下载资源
preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a);
$down = $result_a[2] ?? [];
foreach ($down as $vd) {
$check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url);
if ($check_vd && (!in_array($check_vd, $source))) {
$source[] = $check_vd;
}
}
return $source;
}
//判断资源是否需要下载
protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
{
$url = trim($url);
if ($url) {
$url = str_replace('&quot;', '', $url);
$arr = parse_url($url);
$scheme = $arr['scheme'] ?? '';
$host = $arr['host'] ?? '';
$path = $arr['path'] ?? '';
$query = $arr['query'] ?? '';
$path_arr = explode('.', $path);
$path_end = end($path_arr);
if (
(empty($scheme) || $scheme == 'https' || $scheme == 'http')
&& (empty($host) || (strpos($web_url_domain, $host) !== false) || (strpos($home_url, $host) !== false))
&& $path
&& (substr($path, 0, 1) == '/')
&& (strpos($path, '.') !== false)
&& (strpos($path_end, 'html') === false)
&& (strpos($path_end, 'php') === false)
&& (strpos($path_end, 'com') === false)
&& (strpos($path_end, 'xml') === false)
) {
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
return [
'download' => true,
'url' => $url,
'url_complete' => ($scheme ?: 'https') . '://' . $domain . $path . ($query ? '?' . $query : '')
];
} else {
return [
'download' => false,
'url' => $url,
'url_complete' => $source['target']
];
}
} else {
return false;
}
} else {
return false;
}
}
}
... ...