作者 刘锟

update

... ... @@ -94,11 +94,20 @@ class HtmlCollect extends Command
//采集html页面,下载资源到本地并替换
try {
$html = file_get_contents('https://' . $collect_info->domain . $collect_info->route);
$opts = [
'http' => [
'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0'
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
]
];
$html = file_get_contents('https://' . $collect_info->domain . $collect_info->route, false, stream_context_create($opts));
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
if ($source_list) {
$html = $this->upload_source($html, $source_list, $project_id);
$html = $this->upload_source($html, $source_list, $project_id, $opts);
}
} catch (\Exception $e) {
$collect_info->status = CollectTask::STATUS_FAIL;
... ... @@ -271,7 +280,7 @@ class HtmlCollect extends Command
}
//下载并替换资源
protected function upload_source($html, $source, $project_id)
protected function upload_source($html, $source, $project_id, $opts)
{
foreach ($source as $vs) {
... ... @@ -289,7 +298,7 @@ class HtmlCollect extends Command
if (substr($new_source, -3, 3) == 'css') {
// 下载css文件中的资源
$css_html = file_get_contents($vs['url_complete']);
$css_html = file_get_contents($vs['url_complete'], false, stream_context_create($opts));
preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
$css_source = $result_css_source[1] ?? [];
... ...
... ... @@ -93,11 +93,20 @@ class HtmlLanguageCollect extends Command
//采集html页面,下载资源到本地并替换
try {
$html = file_get_contents('https://' . $collect_info->domain . $collect_info->route);
$opts = [
'http' => [
'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0'
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
]
];
$html = file_get_contents('https://' . $collect_info->domain . $collect_info->route, false, stream_context_create($opts));
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
if ($source_list) {
$html = $this->upload_source($html, $source_list, $project_id);
$html = $this->upload_source($html, $source_list, $project_id, $opts);
}
} catch (\Exception $e) {
$collect_info->status = CollectTask::STATUS_FAIL;
... ... @@ -270,7 +279,7 @@ class HtmlLanguageCollect extends Command
}
//下载并替换资源
protected function upload_source($html, $source, $project_id)
protected function upload_source($html, $source, $project_id, $opts)
{
foreach ($source as $vs) {
... ... @@ -288,7 +297,7 @@ class HtmlLanguageCollect extends Command
if (substr($new_source, -3, 3) == 'css') {
// 下载css文件中的资源
$css_html = file_get_contents($vs['url_complete']);
$css_html = file_get_contents($vs['url_complete'], false, stream_context_create($opts));
preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
$css_source = $result_css_source[1] ?? [];
... ...