|
...
|
...
|
@@ -79,7 +79,7 @@ class HtmlCollect extends Command |
|
|
|
$web_url_domain = $collect_info->domain;
|
|
|
|
$home_url = $collect_info->domain;
|
|
|
|
$url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';
|
|
|
|
$data_config = http_get($url_web_config, ['charset' => 'UTF-8']);
|
|
|
|
$data_config = curl_c($url_web_config);
|
|
|
|
if ($data_config) {
|
|
|
|
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
|
|
|
|
if (isset($web_url_arr['host'])) {
|
|
...
|
...
|
@@ -94,20 +94,11 @@ class HtmlCollect extends Command |
|
|
|
|
|
|
|
//采集html页面,下载资源到本地并替换
|
|
|
|
try {
|
|
|
|
$opts = [
|
|
|
|
'http' => [
|
|
|
|
'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0'
|
|
|
|
],
|
|
|
|
'ssl' => [
|
|
|
|
'verify_peer' => false,
|
|
|
|
'verify_peer_name' => false,
|
|
|
|
]
|
|
|
|
];
|
|
|
|
$html = file_get_contents('https://' . $collect_info->domain . $collect_info->route, false, stream_context_create($opts));
|
|
|
|
$html = curl_c('https://' . $collect_info->domain . $collect_info->route, false);
|
|
|
|
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
|
|
|
|
|
|
|
|
if ($source_list) {
|
|
|
|
$html = $this->upload_source($html, $source_list, $project_id, $opts);
|
|
|
|
$html = $this->upload_source($html, $source_list, $project_id);
|
|
|
|
}
|
|
|
|
} catch (\Exception $e) {
|
|
|
|
$collect_info->status = CollectTask::STATUS_FAIL;
|
|
...
|
...
|
@@ -280,7 +271,7 @@ class HtmlCollect extends Command |
|
|
|
}
|
|
|
|
|
|
|
|
//下载并替换资源
|
|
|
|
protected function upload_source($html, $source, $project_id, $opts)
|
|
|
|
protected function upload_source($html, $source, $project_id)
|
|
|
|
{
|
|
|
|
foreach ($source as $vs) {
|
|
|
|
|
|
...
|
...
|
@@ -298,7 +289,7 @@ class HtmlCollect extends Command |
|
|
|
|
|
|
|
if (substr($new_source, -3, 3) == 'css') {
|
|
|
|
// 下载css文件中的资源
|
|
|
|
$css_html = file_get_contents($vs['url_complete'], false, stream_context_create($opts));
|
|
|
|
$css_html = curl_c($vs['url_complete'], false);
|
|
|
|
preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
|
|
|
|
$css_source = $result_css_source[1] ?? [];
|
|
|
|
|
...
|
...
|
|