|
...
|
...
|
@@ -2,11 +2,17 @@ |
|
|
|
|
|
|
|
namespace App\Console\Commands\Update;
|
|
|
|
|
|
|
|
use App\Helper\Arr;
|
|
|
|
use App\Models\Blog\Blog;
|
|
|
|
use App\Models\Collect\CollectSource;
|
|
|
|
use App\Models\Collect\CollectTask;
|
|
|
|
use App\Models\Com\UpdateLog;
|
|
|
|
use App\Models\Com\UpdateOldInfo;
|
|
|
|
use App\Models\CustomModule\CustomModuleContent;
|
|
|
|
use App\Models\News\News;
|
|
|
|
use App\Models\Product\Product;
|
|
|
|
use App\Models\RouteMap\RouteMap;
|
|
|
|
use App\Models\Template\BCustomTemplate;
|
|
|
|
use App\Services\CosService;
|
|
|
|
use App\Services\ProjectServer;
|
|
|
|
use Illuminate\Console\Command;
|
|
...
|
...
|
@@ -65,7 +71,7 @@ class HtmlCollect extends Command |
|
|
|
//设置数据库
|
|
|
|
$project = ProjectServer::useProject($project_id);
|
|
|
|
if ($project) {
|
|
|
|
$collect_info = CollectTask::select(['id', 'domain', 'route'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '')->first();
|
|
|
|
$collect_info = CollectTask::select(['id', 'domain', 'route', 'source', 'source_id'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '')->first();
|
|
|
|
|
|
|
|
if (!$collect_info) {
|
|
|
|
sleep(2);
|
|
...
|
...
|
@@ -101,6 +107,9 @@ class HtmlCollect extends Command |
|
|
|
$new_html = str_replace($v64, '', $new_html);
|
|
|
|
}
|
|
|
|
|
|
|
|
//提取页面tdk
|
|
|
|
$this->get_site_meta($new_html, $collect_info);
|
|
|
|
|
|
|
|
$source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
|
|
|
|
|
|
|
|
if ($source_list) {
|
|
...
|
...
|
@@ -188,6 +197,73 @@ class HtmlCollect extends Command |
|
|
|
return $task_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
//获取META信息
|
|
|
|
private function get_site_meta($html, $collect_info)
|
|
|
|
{
|
|
|
|
$meta = [];
|
|
|
|
if (!empty($html)) {
|
|
|
|
#Title
|
|
|
|
preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches);
|
|
|
|
if (!empty($matches[1])) {
|
|
|
|
$meta['title'] = substr($matches[1][0], 0, 70);
|
|
|
|
}
|
|
|
|
|
|
|
|
#Keywords
|
|
|
|
preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
|
|
|
|
if (!empty($matches[1])) {
|
|
|
|
$meta['keyword'] = substr($matches[1][0], 0, 200);
|
|
|
|
}
|
|
|
|
|
|
|
|
#Description
|
|
|
|
preg_match_all('/<meta name=[\'|\"]description[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
|
|
|
|
if (!empty($matches[1])) {
|
|
|
|
$meta['description'] = substr($matches[1][0], 0, 255);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!empty($meta)) {
|
|
|
|
$id = $collect_info->source_id;
|
|
|
|
|
|
|
|
switch ($collect_info->source) {
|
|
|
|
case RouteMap::SOURCE_PRODUCT:
|
|
|
|
$seo_mate = [
|
|
|
|
'title' => $meta['title'] ?? '',
|
|
|
|
'keyword' => $meta['keyword'] ?? '',
|
|
|
|
'description' => $meta['description'] ?? ''
|
|
|
|
];
|
|
|
|
Product::where('id', $id)->update(['seo_mate' => Arr::a2s($seo_mate)]);
|
|
|
|
break;
|
|
|
|
case RouteMap::SOURCE_NEWS:
|
|
|
|
News::where('id', $id)->update([
|
|
|
|
'seo_title' => $meta['title'] ?? '',
|
|
|
|
'seo_keywords' => $meta['keyword'] ?? '',
|
|
|
|
'seo_description' => $meta['description'] ?? ''
|
|
|
|
]);
|
|
|
|
break;
|
|
|
|
case RouteMap::SOURCE_BLOG:
|
|
|
|
Blog::where('id', $id)->update([
|
|
|
|
'seo_title' => $meta['title'] ?? '',
|
|
|
|
'seo_keywords' => $meta['keyword'] ?? '',
|
|
|
|
'seo_description' => $meta['description'] ?? ''
|
|
|
|
]);
|
|
|
|
break;
|
|
|
|
case RouteMap::SOURCE_PAGE:
|
|
|
|
BCustomTemplate::where('id', $id)->update([
|
|
|
|
'title' => $meta['title'] ?? '',
|
|
|
|
'keywords' => $meta['keyword'] ?? '',
|
|
|
|
'description' => $meta['description'] ?? ''
|
|
|
|
]);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
CustomModuleContent::where('id', $id)->update([
|
|
|
|
'seo_title' => $meta['title'] ?? '',
|
|
|
|
'seo_keywords' => $meta['keyword'] ?? '',
|
|
|
|
'seo_description' => $meta['description'] ?? ''
|
|
|
|
]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//正则匹配html资源
|
|
|
|
protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
|
|
|
|
{
|
...
|
...
|
|