|
@@ -63,7 +63,7 @@ class HtmlLanguageCollect extends Command |
|
@@ -63,7 +63,7 @@ class HtmlLanguageCollect extends Command |
|
63
|
//设置数据库
|
63
|
//设置数据库
|
|
64
|
$project = ProjectServer::useProject($project_id);
|
64
|
$project = ProjectServer::useProject($project_id);
|
|
65
|
if ($project) {
|
65
|
if ($project) {
|
|
66
|
- $collect_info = CollectTask::select(['id', 'domain', 'route'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '!=', '')->first();
|
66
|
+ $collect_info = CollectTask::select(['id', 'domain', 'route', 'language'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '!=', '')->first();
|
|
67
|
|
67
|
|
|
68
|
if (!$collect_info) {
|
68
|
if (!$collect_info) {
|
|
69
|
sleep(2);
|
69
|
sleep(2);
|
|
@@ -75,10 +75,18 @@ class HtmlLanguageCollect extends Command |
|
@@ -75,10 +75,18 @@ class HtmlLanguageCollect extends Command |
|
75
|
$collect_info->status = CollectTask::STATUS_ING;
|
75
|
$collect_info->status = CollectTask::STATUS_ING;
|
|
76
|
$collect_info->save();
|
76
|
$collect_info->save();
|
|
77
|
|
77
|
|
|
78
|
- $web_url_domain = $collect_info->domain;
|
|
|
|
79
|
- $home_url = $collect_info->domain;
|
|
|
|
80
|
- $url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';
|
|
|
|
81
|
- $data_config = http_get($url_web_config, ['charset' => 'UTF-8']);
|
78
|
+ //获取英文站域名
|
|
|
|
79
|
+ $domain = $collect_info->domain;
|
|
|
|
80
|
+ if (strpos($domain, '/') !== false) {
|
|
|
|
81
|
+ $domain = substr($domain, 0, strpos($domain, '/'));
|
|
|
|
82
|
+ } else {
|
|
|
|
83
|
+ $domain = str_replace($collect_info->language, 'www', $domain);
|
|
|
|
84
|
+ }
|
|
|
|
85
|
+
|
|
|
|
86
|
+ $web_url_domain = $domain;
|
|
|
|
87
|
+ $home_url = $domain;
|
|
|
|
88
|
+ $url_web_config = 'https://' . $domain . '/wp-content/cache/user_config.text';
|
|
|
|
89
|
+ $data_config = curl_c($url_web_config);
|
|
82
|
if ($data_config) {
|
90
|
if ($data_config) {
|
|
83
|
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
|
91
|
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
|
|
84
|
if (isset($web_url_arr['host'])) {
|
92
|
if (isset($web_url_arr['host'])) {
|
|
@@ -93,20 +101,11 @@ class HtmlLanguageCollect extends Command |
|
@@ -93,20 +101,11 @@ class HtmlLanguageCollect extends Command |
|
93
|
|
101
|
|
|
94
|
//采集html页面,下载资源到本地并替换
|
102
|
//采集html页面,下载资源到本地并替换
|
|
95
|
try {
|
103
|
try {
|
|
96
|
- $opts = [
|
|
|
|
97
|
- 'http' => [
|
|
|
|
98
|
- 'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0'
|
|
|
|
99
|
- ],
|
|
|
|
100
|
- 'ssl' => [
|
|
|
|
101
|
- 'verify_peer' => false,
|
|
|
|
102
|
- 'verify_peer_name' => false,
|
|
|
|
103
|
- ]
|
|
|
|
104
|
- ];
|
|
|
|
105
|
- $html = file_get_contents('https://' . $collect_info->domain . $collect_info->route, false, stream_context_create($opts));
|
104
|
+ $html = curl_c('https://' . $collect_info->domain . $collect_info->route, false);
|
|
106
|
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
|
105
|
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
|
|
107
|
|
106
|
|
|
108
|
if ($source_list) {
|
107
|
if ($source_list) {
|
|
109
|
- $html = $this->upload_source($html, $source_list, $project_id, $opts);
|
108
|
+ $html = $this->upload_source($html, $source_list, $project_id);
|
|
110
|
}
|
109
|
}
|
|
111
|
} catch (\Exception $e) {
|
110
|
} catch (\Exception $e) {
|
|
112
|
$collect_info->status = CollectTask::STATUS_FAIL;
|
111
|
$collect_info->status = CollectTask::STATUS_FAIL;
|
|
@@ -279,7 +278,7 @@ class HtmlLanguageCollect extends Command |
|
@@ -279,7 +278,7 @@ class HtmlLanguageCollect extends Command |
|
279
|
}
|
278
|
}
|
|
280
|
|
279
|
|
|
281
|
//下载并替换资源
|
280
|
//下载并替换资源
|
|
282
|
- protected function upload_source($html, $source, $project_id, $opts)
|
281
|
+ protected function upload_source($html, $source, $project_id)
|
|
283
|
{
|
282
|
{
|
|
284
|
foreach ($source as $vs) {
|
283
|
foreach ($source as $vs) {
|
|
285
|
|
284
|
|
|
@@ -297,7 +296,7 @@ class HtmlLanguageCollect extends Command |
|
@@ -297,7 +296,7 @@ class HtmlLanguageCollect extends Command |
|
297
|
|
296
|
|
|
298
|
if (substr($new_source, -3, 3) == 'css') {
|
297
|
if (substr($new_source, -3, 3) == 'css') {
|
|
299
|
// 下载css文件中的资源
|
298
|
// 下载css文件中的资源
|
|
300
|
- $css_html = file_get_contents($vs['url_complete'], false, stream_context_create($opts));
|
299
|
+ $css_html = curl_c($vs['url_complete'], false);
|
|
301
|
preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
|
300
|
preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
|
|
302
|
$css_source = $result_css_source[1] ?? [];
|
301
|
$css_source = $result_css_source[1] ?? [];
|
|
303
|
|
302
|
|