作者 刘锟

update

@@ -63,7 +63,7 @@ class HtmlLanguageCollect extends Command @@ -63,7 +63,7 @@ class HtmlLanguageCollect extends Command
63 //设置数据库 63 //设置数据库
64 $project = ProjectServer::useProject($project_id); 64 $project = ProjectServer::useProject($project_id);
65 if ($project) { 65 if ($project) {
66 - $collect_info = CollectTask::select(['id', 'domain', 'route'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '!=', '')->first(); 66 + $collect_info = CollectTask::select(['id', 'domain', 'route', 'language'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '!=', '')->first();
67 67
68 if (!$collect_info) { 68 if (!$collect_info) {
69 sleep(2); 69 sleep(2);
@@ -75,10 +75,18 @@ class HtmlLanguageCollect extends Command @@ -75,10 +75,18 @@ class HtmlLanguageCollect extends Command
75 $collect_info->status = CollectTask::STATUS_ING; 75 $collect_info->status = CollectTask::STATUS_ING;
76 $collect_info->save(); 76 $collect_info->save();
77 77
78 - $web_url_domain = $collect_info->domain;  
79 - $home_url = $collect_info->domain;  
80 - $url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';  
81 - $data_config = http_get($url_web_config, ['charset' => 'UTF-8']); 78 + //获取英文站域名
  79 + $domain = $collect_info->domain;
  80 + if (strpos($domain, '/') !== false) {
  81 + $domain = substr($domain, 0, strpos($domain, '/'));
  82 + } else {
  83 + $domain = str_replace($collect_info->language, 'www', $domain);
  84 + }
  85 +
  86 + $web_url_domain = $domain;
  87 + $home_url = $domain;
  88 + $url_web_config = 'https://' . $domain . '/wp-content/cache/user_config.text';
  89 + $data_config = curl_c($url_web_config);
82 if ($data_config) { 90 if ($data_config) {
83 $web_url_arr = parse_url($data_config['web_url_domain'] ?? ''); 91 $web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
84 if (isset($web_url_arr['host'])) { 92 if (isset($web_url_arr['host'])) {
@@ -93,20 +101,11 @@ class HtmlLanguageCollect extends Command @@ -93,20 +101,11 @@ class HtmlLanguageCollect extends Command
93 101
94 //采集html页面,下载资源到本地并替换 102 //采集html页面,下载资源到本地并替换
95 try { 103 try {
96 - $opts = [  
97 - 'http' => [  
98 - 'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0'  
99 - ],  
100 - 'ssl' => [  
101 - 'verify_peer' => false,  
102 - 'verify_peer_name' => false,  
103 - ]  
104 - ];  
105 - $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route, false, stream_context_create($opts)); 104 + $html = curl_c('https://' . $collect_info->domain . $collect_info->route, false);
106 $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url); 105 $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
107 106
108 if ($source_list) { 107 if ($source_list) {
109 - $html = $this->upload_source($html, $source_list, $project_id, $opts); 108 + $html = $this->upload_source($html, $source_list, $project_id);
110 } 109 }
111 } catch (\Exception $e) { 110 } catch (\Exception $e) {
112 $collect_info->status = CollectTask::STATUS_FAIL; 111 $collect_info->status = CollectTask::STATUS_FAIL;
@@ -279,7 +278,7 @@ class HtmlLanguageCollect extends Command @@ -279,7 +278,7 @@ class HtmlLanguageCollect extends Command
279 } 278 }
280 279
281 //下载并替换资源 280 //下载并替换资源
282 - protected function upload_source($html, $source, $project_id, $opts) 281 + protected function upload_source($html, $source, $project_id)
283 { 282 {
284 foreach ($source as $vs) { 283 foreach ($source as $vs) {
285 284
@@ -297,7 +296,7 @@ class HtmlLanguageCollect extends Command @@ -297,7 +296,7 @@ class HtmlLanguageCollect extends Command
297 296
298 if (substr($new_source, -3, 3) == 'css') { 297 if (substr($new_source, -3, 3) == 'css') {
299 // 下载css文件中的资源 298 // 下载css文件中的资源
300 - $css_html = file_get_contents($vs['url_complete'], false, stream_context_create($opts)); 299 + $css_html = curl_c($vs['url_complete'], false);
301 preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source); 300 preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
302 $css_source = $result_css_source[1] ?? []; 301 $css_source = $result_css_source[1] ?? [];
303 302
@@ -111,11 +111,12 @@ if (!function_exists('http_get')) { @@ -111,11 +111,12 @@ if (!function_exists('http_get')) {
111 if (!function_exists('curl_c')) { 111 if (!function_exists('curl_c')) {
112 /** 112 /**
113 * @param $url 113 * @param $url
  114 + * @param $is_array
114 * @return [] 115 * @return []
115 * @author Akun 116 * @author Akun
116 * @date 2023/11/22 11:33 117 * @date 2023/11/22 11:33
117 */ 118 */
118 - function curl_c($url){ 119 + function curl_c($url,$is_array=true){
119 $header = array( 120 $header = array(
120 'Expect:', 121 'Expect:',
121 'Content-Type: application/json; charset=utf-8' 122 'Content-Type: application/json; charset=utf-8'
@@ -130,11 +131,12 @@ if (!function_exists('curl_c')) { @@ -130,11 +131,12 @@ if (!function_exists('curl_c')) {
130 curl_setopt($ch, CURLOPT_TIMEOUT, 120); 131 curl_setopt($ch, CURLOPT_TIMEOUT, 120);
131 curl_setopt($ch, CURLOPT_MAXREDIRS, 10); 132 curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
132 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); 133 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  134 + curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
133 curl_setopt($ch, CURLOPT_SSLVERSION, 'all'); 135 curl_setopt($ch, CURLOPT_SSLVERSION, 'all');
134 curl_setopt($ch, CURLOPT_HTTPHEADER, $header); 136 curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
135 $content = curl_exec($ch); 137 $content = curl_exec($ch);
136 curl_close($ch); 138 curl_close($ch);
137 - return json_decode($content, true); 139 + return $is_array ? json_decode($content, true) : $content;
138 } 140 }
139 } 141 }
140 142