|
@@ -2,11 +2,17 @@ |
|
@@ -2,11 +2,17 @@ |
|
2
|
|
2
|
|
|
3
|
namespace App\Console\Commands\Update;
|
3
|
namespace App\Console\Commands\Update;
|
|
4
|
|
4
|
|
|
|
|
5
|
+use App\Helper\Arr;
|
|
|
|
6
|
+use App\Models\Blog\Blog;
|
|
5
|
use App\Models\Collect\CollectSource;
|
7
|
use App\Models\Collect\CollectSource;
|
|
6
|
use App\Models\Collect\CollectTask;
|
8
|
use App\Models\Collect\CollectTask;
|
|
7
|
use App\Models\Com\UpdateLog;
|
9
|
use App\Models\Com\UpdateLog;
|
|
8
|
use App\Models\Com\UpdateOldInfo;
|
10
|
use App\Models\Com\UpdateOldInfo;
|
|
|
|
11
|
+use App\Models\CustomModule\CustomModuleContent;
|
|
|
|
12
|
+use App\Models\News\News;
|
|
|
|
13
|
+use App\Models\Product\Product;
|
|
9
|
use App\Models\RouteMap\RouteMap;
|
14
|
use App\Models\RouteMap\RouteMap;
|
|
|
|
15
|
+use App\Models\Template\BCustomTemplate;
|
|
10
|
use App\Services\CosService;
|
16
|
use App\Services\CosService;
|
|
11
|
use App\Services\ProjectServer;
|
17
|
use App\Services\ProjectServer;
|
|
12
|
use Illuminate\Console\Command;
|
18
|
use Illuminate\Console\Command;
|
|
@@ -65,7 +71,7 @@ class HtmlCollect extends Command |
|
@@ -65,7 +71,7 @@ class HtmlCollect extends Command |
|
65
|
//设置数据库
|
71
|
//设置数据库
|
|
66
|
$project = ProjectServer::useProject($project_id);
|
72
|
$project = ProjectServer::useProject($project_id);
|
|
67
|
if ($project) {
|
73
|
if ($project) {
|
|
68
|
- $collect_info = CollectTask::select(['id', 'domain', 'route'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '')->first();
|
74
|
+ $collect_info = CollectTask::select(['id', 'domain', 'route', 'source', 'source_id'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '')->first();
|
|
69
|
|
75
|
|
|
70
|
if (!$collect_info) {
|
76
|
if (!$collect_info) {
|
|
71
|
sleep(2);
|
77
|
sleep(2);
|
|
@@ -101,6 +107,9 @@ class HtmlCollect extends Command |
|
@@ -101,6 +107,9 @@ class HtmlCollect extends Command |
|
101
|
$new_html = str_replace($v64, '', $new_html);
|
107
|
$new_html = str_replace($v64, '', $new_html);
|
|
102
|
}
|
108
|
}
|
|
103
|
|
109
|
|
|
|
|
110
|
+ //提取页面tdk
|
|
|
|
111
|
+ $this->get_site_meta($new_html, $collect_info);
|
|
|
|
112
|
+
|
|
104
|
$source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
|
113
|
$source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
|
|
105
|
|
114
|
|
|
106
|
if ($source_list) {
|
115
|
if ($source_list) {
|
|
@@ -188,6 +197,73 @@ class HtmlCollect extends Command |
|
@@ -188,6 +197,73 @@ class HtmlCollect extends Command |
|
188
|
return $task_id;
|
197
|
return $task_id;
|
|
189
|
}
|
198
|
}
|
|
190
|
|
199
|
|
|
|
|
200
|
+ //获取META信息
|
|
|
|
201
|
+ private function get_site_meta($html, $collect_info)
|
|
|
|
202
|
+ {
|
|
|
|
203
|
+ $meta = [];
|
|
|
|
204
|
+ if (!empty($html)) {
|
|
|
|
205
|
+ #Title
|
|
|
|
206
|
+ preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches);
|
|
|
|
207
|
+ if (!empty($matches[1])) {
|
|
|
|
208
|
+ $meta['title'] = substr($matches[1][0], 0, 70);
|
|
|
|
209
|
+ }
|
|
|
|
210
|
+
|
|
|
|
211
|
+ #Keywords
|
|
|
|
212
|
+ preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
|
|
|
|
213
|
+ if (!empty($matches[1])) {
|
|
|
|
214
|
+ $meta['keyword'] = substr($matches[1][0], 0, 200);
|
|
|
|
215
|
+ }
|
|
|
|
216
|
+
|
|
|
|
217
|
+ #Description
|
|
|
|
218
|
+ preg_match_all('/<meta name=[\'|\"]description[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
|
|
|
|
219
|
+ if (!empty($matches[1])) {
|
|
|
|
220
|
+ $meta['description'] = substr($matches[1][0], 0, 255);
|
|
|
|
221
|
+ }
|
|
|
|
222
|
+ }
|
|
|
|
223
|
+
|
|
|
|
224
|
+ if (!empty($meta)) {
|
|
|
|
225
|
+ $id = $collect_info->source_id;
|
|
|
|
226
|
+
|
|
|
|
227
|
+ switch ($collect_info->source) {
|
|
|
|
228
|
+ case RouteMap::SOURCE_PRODUCT:
|
|
|
|
229
|
+ $seo_mate = [
|
|
|
|
230
|
+ 'title' => $meta['title'] ?? '',
|
|
|
|
231
|
+ 'keyword' => $meta['keyword'] ?? '',
|
|
|
|
232
|
+ 'description' => $meta['description'] ?? ''
|
|
|
|
233
|
+ ];
|
|
|
|
234
|
+ Product::where('id', $id)->update(['seo_mate' => Arr::a2s($seo_mate)]);
|
|
|
|
235
|
+ break;
|
|
|
|
236
|
+ case RouteMap::SOURCE_NEWS:
|
|
|
|
237
|
+ News::where('id', $id)->update([
|
|
|
|
238
|
+ 'seo_title' => $meta['title'] ?? '',
|
|
|
|
239
|
+ 'seo_keywords' => $meta['keyword'] ?? '',
|
|
|
|
240
|
+ 'seo_description' => $meta['description'] ?? ''
|
|
|
|
241
|
+ ]);
|
|
|
|
242
|
+ break;
|
|
|
|
243
|
+ case RouteMap::SOURCE_BLOG:
|
|
|
|
244
|
+ Blog::where('id', $id)->update([
|
|
|
|
245
|
+ 'seo_title' => $meta['title'] ?? '',
|
|
|
|
246
|
+ 'seo_keywords' => $meta['keyword'] ?? '',
|
|
|
|
247
|
+ 'seo_description' => $meta['description'] ?? ''
|
|
|
|
248
|
+ ]);
|
|
|
|
249
|
+ break;
|
|
|
|
250
|
+ case RouteMap::SOURCE_PAGE:
|
|
|
|
251
|
+ BCustomTemplate::where('id', $id)->update([
|
|
|
|
252
|
+ 'title' => $meta['title'] ?? '',
|
|
|
|
253
|
+ 'keywords' => $meta['keyword'] ?? '',
|
|
|
|
254
|
+ 'description' => $meta['description'] ?? ''
|
|
|
|
255
|
+ ]);
|
|
|
|
256
|
+ break;
|
|
|
|
257
|
+ default:
|
|
|
|
258
|
+ CustomModuleContent::where('id', $id)->update([
|
|
|
|
259
|
+ 'seo_title' => $meta['title'] ?? '',
|
|
|
|
260
|
+ 'seo_keywords' => $meta['keyword'] ?? '',
|
|
|
|
261
|
+ 'seo_description' => $meta['description'] ?? ''
|
|
|
|
262
|
+ ]);
|
|
|
|
263
|
+ }
|
|
|
|
264
|
+ }
|
|
|
|
265
|
+ }
|
|
|
|
266
|
+
|
|
191
|
//正则匹配html资源
|
267
|
//正则匹配html资源
|
|
192
|
protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
|
268
|
protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
|
|
193
|
{
|
269
|
{
|