作者 刘锟

update

@@ -94,11 +94,20 @@ class HtmlCollect extends Command @@ -94,11 +94,20 @@ class HtmlCollect extends Command
94 94
95 //采集html页面,下载资源到本地并替换 95 //采集html页面,下载资源到本地并替换
96 try { 96 try {
97 - $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route); 97 + $opts = [
  98 + 'http' => [
  99 + 'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0'
  100 + ],
  101 + 'ssl' => [
  102 + 'verify_peer' => false,
  103 + 'verify_peer_name' => false,
  104 + ]
  105 + ];
  106 + $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route, false, stream_context_create($opts));
98 $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url); 107 $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
99 108
100 if ($source_list) { 109 if ($source_list) {
101 - $html = $this->upload_source($html, $source_list, $project_id); 110 + $html = $this->upload_source($html, $source_list, $project_id, $opts);
102 } 111 }
103 } catch (\Exception $e) { 112 } catch (\Exception $e) {
104 $collect_info->status = CollectTask::STATUS_FAIL; 113 $collect_info->status = CollectTask::STATUS_FAIL;
@@ -271,7 +280,7 @@ class HtmlCollect extends Command @@ -271,7 +280,7 @@ class HtmlCollect extends Command
271 } 280 }
272 281
273 //下载并替换资源 282 //下载并替换资源
274 - protected function upload_source($html, $source, $project_id) 283 + protected function upload_source($html, $source, $project_id, $opts)
275 { 284 {
276 foreach ($source as $vs) { 285 foreach ($source as $vs) {
277 286
@@ -289,7 +298,7 @@ class HtmlCollect extends Command @@ -289,7 +298,7 @@ class HtmlCollect extends Command
289 298
290 if (substr($new_source, -3, 3) == 'css') { 299 if (substr($new_source, -3, 3) == 'css') {
291 // 下载css文件中的资源 300 // 下载css文件中的资源
292 - $css_html = file_get_contents($vs['url_complete']); 301 + $css_html = file_get_contents($vs['url_complete'], false, stream_context_create($opts));
293 preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source); 302 preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
294 $css_source = $result_css_source[1] ?? []; 303 $css_source = $result_css_source[1] ?? [];
295 304
@@ -93,11 +93,20 @@ class HtmlLanguageCollect extends Command @@ -93,11 +93,20 @@ class HtmlLanguageCollect extends Command
93 93
94 //采集html页面,下载资源到本地并替换 94 //采集html页面,下载资源到本地并替换
95 try { 95 try {
96 - $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route); 96 + $opts = [
  97 + 'http' => [
  98 + 'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0'
  99 + ],
  100 + 'ssl' => [
  101 + 'verify_peer' => false,
  102 + 'verify_peer_name' => false,
  103 + ]
  104 + ];
  105 + $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route, false, stream_context_create($opts));
97 $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url); 106 $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
98 107
99 if ($source_list) { 108 if ($source_list) {
100 - $html = $this->upload_source($html, $source_list, $project_id); 109 + $html = $this->upload_source($html, $source_list, $project_id, $opts);
101 } 110 }
102 } catch (\Exception $e) { 111 } catch (\Exception $e) {
103 $collect_info->status = CollectTask::STATUS_FAIL; 112 $collect_info->status = CollectTask::STATUS_FAIL;
@@ -270,7 +279,7 @@ class HtmlLanguageCollect extends Command @@ -270,7 +279,7 @@ class HtmlLanguageCollect extends Command
270 } 279 }
271 280
272 //下载并替换资源 281 //下载并替换资源
273 - protected function upload_source($html, $source, $project_id) 282 + protected function upload_source($html, $source, $project_id, $opts)
274 { 283 {
275 foreach ($source as $vs) { 284 foreach ($source as $vs) {
276 285
@@ -288,7 +297,7 @@ class HtmlLanguageCollect extends Command @@ -288,7 +297,7 @@ class HtmlLanguageCollect extends Command
288 297
289 if (substr($new_source, -3, 3) == 'css') { 298 if (substr($new_source, -3, 3) == 'css') {
290 // 下载css文件中的资源 299 // 下载css文件中的资源
291 - $css_html = file_get_contents($vs['url_complete']); 300 + $css_html = file_get_contents($vs['url_complete'], false, stream_context_create($opts));
292 preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source); 301 preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
293 $css_source = $result_css_source[1] ?? []; 302 $css_source = $result_css_source[1] ?? [];
294 303