作者 lyh

Merge branch 'master' of http://47.244.231.31:8099/zhl/globalso-v6 into develop

<?php
namespace App\Console\Commands\Update;
use App\Models\Collect\CollectSource;
use App\Models\Collect\CollectTask;
use App\Models\Com\UpdateLog;
use App\Services\CosService;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Redis;
/**
* 4.0,5.0升级到6.0,页面采集
* Class ProjectImport
* @package App\Console\Commands
* @author Akun
* @date 2023/11/10 16:04
*/
class HtmlCollect extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'project_html_collect';
/**
* The console command description.
*
* @var string
*/
protected $description = '执行项目html页面采集';
public function handle()
{
// while (true) {
$this->start_update();
// }
}
protected function start_update()
{
// $task_id = $this->get_task();
$task_id = '298_1';
if ($task_id === false) {
//所有项目采集完成
sleep(60);
return true;
} elseif ($task_id === 0) {
//当前项目采集完成
sleep(2);
return true;
}
$task_arr = explode('_', $task_id);
$project_id = $task_arr[0];
$collect_id = $task_arr[1];
//设置数据库
$project = ProjectServer::useProject($project_id);
if ($project) {
$collect_info = CollectTask::select(['id', 'domain', 'route'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->first();
if (!$collect_info) {
sleep(2);
return true;
}
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', task_type: ' . $collect_id . ', collect start' . PHP_EOL;
$collect_info->status = CollectTask::STATUS_ING;
$collect_info->save();
//采集html页面,下载资源到本地并替换
try {
$html = file_get_contents('https://' . $collect_info->domain . $collect_info->route);
$source_list = $this->html_preg($html, $project_id, $collect_info->domain);
if ($source_list) {
$html = $this->upload_source($html, $source_list, $project_id);
}
} catch (\Exception $e) {
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', task_type: ' . $collect_id . ', error: ' . $e->getMessage() . PHP_EOL;
return true;
}
$collect_info->html = $html;
$collect_info->status = CollectTask::STATUS_COM;
$collect_info->save();
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', task_type: ' . $collect_id . ', collect end' . PHP_EOL;
}
//关闭数据库
DB::disconnect('custom_mysql');
sleep(2);
}
//获取任务
protected function get_task()
{
$key = 'console_html_collect_task';
$task_id = Redis::rpop($key);
if ($task_id) {
return $task_id;
}
$update_log = UpdateLog::where('status', UpdateLog::STATUS_COM)->where('collect_status', UpdateLog::COLLECT_STATUS_UN)->orderBy('project_id', 'asc')->first();
if (!$update_log) {
return false;
}
$complete = false;
//设置数据库
$project = ProjectServer::useProject($update_log->project_id);
if ($project) {
$collect_list = CollectTask::select(['id', 'project_id'])->where('project_id', $update_log['project_id'])->where('status', CollectTask::STATUS_UN)->limit(50)->get();
if ($collect_list->count() == 0) {
$complete = true;
} else {
foreach ($collect_list as $collect) {
Redis::lpush($key, $collect['project_id'] . '_' . $collect['id']);
}
}
}
//关闭数据库
DB::disconnect('custom_mysql');
if ($complete) {
$update_log->collect_status = UpdateLog::COLLECT_STATUS_COM;
return 0;
}
$task_id = Redis::rpop($key);
return $task_id;
}
//正则匹配html资源
protected function html_preg($html, $project_id, $domain)
{
$source = [];
if (!$html) {
return $source;
}
//图片
preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
$img = $result_img[2] ?? [];
foreach ($img as $vi) {
$check_vi = $this->url_check($vi, $project_id, $domain);
$check_vi && $source[] = $check_vi;
}
//js
preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js);
$js = $result_js[2] ?? [];
foreach ($js as $vj) {
$check_vj = $this->url_check($vj, $project_id, $domain);
$check_vj && $source[] = $check_vj;
}
//video
preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
$video = $result_video[2] ?? [];
foreach ($video as $vv) {
$check_vv = $this->url_check($vv, $project_id, $domain);
$check_vv && $source[] = $check_vv;
}
//css
preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css);
$css = $result_css[2] ?? [];
foreach ($css as $vc) {
$check_vc = $this->url_check($vc, $project_id, $domain);
$check_vc && $source[] = $check_vc;
}
return $source;
}
//判断资源是否需要下载
protected function url_check($url, $project_id, $domain)
{
if ($url) {
$arr = parse_url($url);
$scheme = $arr['scheme'] ?? '';
$host = $arr['host'] ?? '';
$path = $arr['path'] ?? '';
if ((strpos($host, '.globalso.') === false)
&& (strpos($host, '.goodao.') === false)
&& $path && (strpos($path, '.') !== false)) {
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
return [
'download' => true,
'url' => $url,
'url_complete' => ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path
];
} else {
return [
'download' => false,
'url' => $url,
'url_complete' => $source['target']
];
}
} else {
return false;
}
} else {
return false;
}
}
//下载并替换资源
protected function upload_source($html, $source, $project_id)
{
foreach ($source as $vs) {
if ($vs['download']) {
$new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);
if ($new_source) {
CollectSource::insert([
'project_id' => $project_id,
'origin' => $vs['url'],
'target' => $new_source,
'created_at' => date('Y-m-d H:i:s'),
'updated_at' => date('Y-m-d H:i:s'),
]);
$html = str_replace($vs['url'], getImageUrl($new_source), $html);
}
} else {
$html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html);
}
}
return $html;
}
}
... ...
... ... @@ -4,8 +4,8 @@ namespace App\Console\Commands\Update;
use App\Helper\Arr;
use App\Http\Logic\Bside\Product\CategoryLogic;
use App\Http\Logic\Bside\Product\KeywordLogic;
use App\Models\Blog\Blog;
use App\Models\Collect\CollectTask;
use App\Models\Com\UpdateLog;
use App\Models\News\News;
use App\Models\Product\Category;
... ... @@ -77,7 +77,7 @@ class ProjectUpdate extends Command
$task->save();
//设置数据库
$project = ProjectServer::useProject($task->project_id);
$project = ProjectServer::useProject($project_id);
if ($project) {
if ($api_type == 'category') {
//分类
... ... @@ -86,48 +86,6 @@ class ProjectUpdate extends Command
if (isset($data['code']) && $data['code'] == 200) {
$items = $data['data'] ?? [];
$this->category_insert($project_id, $items, 0);
// $model = new Category();
// foreach ($items as $item) {
// $parent = $model->read(['pid' => 0, 'title' => $item['name']], 'id');
// if (!$parent) {
// try {
// $parent_id = $model->addReturnId([
// 'project_id' => $project_id,
// 'title' => $item['name'],
// 'pid' => 0,
// 'keywords' => $item['keywords'],
// 'describe' => $item['description']
// ]);
// $route = RouteMap::setRoute($item['url'] ? $this->get_url_route($item['url']) : $item['name'], RouteMap::SOURCE_PRODUCT_CATE, $parent_id, $project_id);
// $model->edit(['route' => $route], ['id' => $parent_id]);
// } catch (\Exception $e) {
// echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', error: ' . $e->getMessage() . PHP_EOL;
// continue;
// }
// } else {
// $parent_id = $parent['id'];
// }
//
// foreach ($item['children'] as $child) {
// $child_info = $model->read(['pid' => $parent_id, 'title' => $child['name']]);
// if (!$child_info) {
// try {
// $child_id = $model->addReturnId([
// 'project_id' => $project_id,
// 'title' => $child['name'],
// 'pid' => $parent_id,
// 'keywords' => $child['keywords'],
// 'describe' => $child['description']
// ]);
// $route = RouteMap::setRoute($child['url'] ? $this->get_url_route($child['url']) : $child['name'], RouteMap::SOURCE_PRODUCT_CATE, $child_id, $project_id);
// $model->edit(['route' => $route], ['id' => $child_id]);
// } catch (\Exception $e) {
// echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', error: ' . $e->getMessage() . PHP_EOL;
// continue;
// }
// }
// }
// }
} else {
return true;
}
... ... @@ -164,7 +122,11 @@ class ProjectUpdate extends Command
echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', error: ' . $e->getMessage() . PHP_EOL;
continue;
}
} else {
$id = $keyword['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT_KEYWORD, $id);
}
}
}
... ... @@ -279,7 +241,11 @@ class ProjectUpdate extends Command
echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', error: ' . $e->getMessage() . PHP_EOL;
continue;
}
} else {
$id = $product['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT, $id);
}
}
}
... ... @@ -329,7 +295,11 @@ class ProjectUpdate extends Command
echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', error: ' . $e->getMessage() . PHP_EOL;
continue;
}
} else {
$id = $news['id'];
}
CollectTask::_insert($item['url'], $project_id, $api_type == 'news' ? RouteMap::SOURCE_NEWS : RouteMap::SOURCE_BLOG, $id);
}
}
}
... ... @@ -373,7 +343,11 @@ class ProjectUpdate extends Command
echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', error: ' . $e->getMessage() . PHP_EOL;
continue;
}
} else {
$id = $custom['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PAGE, $id);
}
}
}
... ... @@ -423,6 +397,7 @@ class ProjectUpdate extends Command
return $arr[count($arr) - 2];
}
//多级分类入库
protected function category_insert($project_id, $items, $pid = 0)
{
$model = new Category();
... ...
<?php
namespace App\Http\Controllers\Aside;
use App\Http\Logic\Aside\CollectLogic;
/**
* 提供给AICC采集
* Class CollectController
* @package App\Http\Controllers\Aside
* @author zbj
* @date 2023/11/10
*/
class CollectController extends BaseController
{
/**
* @author zbj
* @date 2023/11/10
*/
public function index(CollectLogic $collectLogic)
{
$data = $collectLogic->collect_data();
return $this->success($data);
}
}
... ...
... ... @@ -801,7 +801,7 @@ class ProjectController extends BaseController
'project_id.required' => 'project_id不能为空',
]);
$token = $logic->getAiccToken($this->map);
$token = $logic->getSiteToken($this->map);
$this->response('success',Code::SUCCESS,['site_token' => $token]);
... ...
<?php
namespace App\Http\Logic\Aside;
use App\Helper\Arr;
use App\Http\Logic\Logic;
use App\Models\Blog\Blog;
use App\Models\Domain\DomainInfo;
use App\Models\News\News;
use App\Models\Product\Keyword;
use App\Models\Product\Product;
use App\Models\Project\Project;
use App\Services\ProjectServer;
/**
* Class CollectLogic
* @package App\Http\Logic\Aside
* @author zbj
* @date 2023/11/10
*/
class CollectLogic extends Logic
{
protected $project;
protected $domain;
protected $type;
protected $page_size = 100;
public function __construct()
{
$this->checkAuth();
}
/**
* 校验权限
* @throws \App\Exceptions\AsideGlobalException
* @throws \App\Exceptions\BsideGlobalException
* @author zbj
* @date 2023/11/10
*/
public function checkAuth()
{
$request = request();
$site_token = $request->header('site-token');
$domain = $request->input('domain');
if (!$site_token) {
$this->fail('参数异常');
}
$this->project = Project::where('site_token', $site_token)->first();
if (!$this->project) {
$this->fail('授权码无效');
}
$domain_info = DomainInfo::where('project_id', $this->project->id)->where('domain', $domain)->first();
if (!$domain_info) {
$this->fail('域名不匹配');
}
$this->domain = 'https://' . $domain_info['domain'] . '/';
$this->type = $request->input('type', '');
}
public function collect_data()
{
ProjectServer::useProject($this->project->id);
$action = $this->type;
return $this->$action();
}
public function __call($name, $param)
{
return [];
}
public function product()
{
$this->model = new Product();
$where[] = ['status' => Product::STATUS_ON];
$sort = ['sort' => 'desc'];
$columns = ['title', 'content', 'gallery', 'seo_mate', 'intro', 'route', 'keyword_id'];
$list = self::getList($where,$sort, $columns, $this->page_size);
$data =[];
foreach ($list['list'] as $item){
//关键词标签 没有就取seo 键词
if($item['keyword_id']){
$keyword = Keyword::whereIn('id', $item['keyword_id'])->pluck('title')->toArray();
if($keyword){
$keyword = implode(',', $keyword);
}
}
$keyword = $keyword ?: ($item['seo_mate']['keyword'] ?? '');
$data[] = [
'title' => $item['title'],
'url' => $this->domain . $item['route'],
'keywords' => $keyword,
'description' => strip_tags($item['intro']?:''),
'content' => strip_tags($item['content'] ?: ''),
'img' => array_column($item['gallery'] ?: [], 'url')
];
}
$list['list'] = $data;
return $list;
}
public function news()
{
$this->model = new News();
$where[] = ['status' => News::STATUS_ONE];
$sort = ['sort' => 'desc'];
$columns = ['name', 'text', 'image', 'seo_keywords', 'remark', 'url'];
$list = self::getList($where,$sort, $columns, $this->page_size);
$data =[];
foreach ($list['list'] as $item){
$data[] = [
'title' => $item['name'],
'url' => $this->domain . $item['url'],
'keywords' => $item['seo_keywords'],
'description' => strip_tags($item['remark']?:''),
'content' => strip_tags($item['text'] ?: ''),
'img' => $item['image'] ?:''
];
}
$list['list'] = $data;
return $list;
}
public function blog()
{
$this->model = new Blog();
$where[] = ['status' => Blog::STATUS_ONE];
$sort = ['sort' => 'desc'];
$columns = ['name', 'text', 'image', 'seo_keywords', 'remark', 'url'];
$list = self::getList($where,$sort, $columns, $this->page_size);
$data =[];
foreach ($list['list'] as $item){
$data[] = [
'title' => $item['name'],
'url' => $this->domain . $item['url'],
'keywords' => $item['seo_keywords'],
'description' => strip_tags($item['remark']?:''),
'content' => strip_tags($item['text'] ?: ''),
'img' => $item['image'] ?:''
];
}
$list['list'] = $data;
return $list;
}
}
... ...
... ... @@ -651,6 +651,7 @@ class ProjectLogic extends BaseLogic
$query->select('*')->from("{$name}");
}
);
if (Schema::connection('custom_mysql')->hasColumn($table, 'project_id')) {
DB::connection('custom_mysql')->table($table)->update(['project_id' => $news_project_id]);
}
... ... @@ -659,16 +660,16 @@ class ProjectLogic extends BaseLogic
}
/**
* 获取AICC采集数据接口token
* 对外接口token
* @param $data
* @return string
* @author zbj
* @date 2023/11/10
*/
public function getAiccToken($data){
public function getSiteToken($data){
$project = $this->getCacheInfo($data['project_id']);
if(empty($project['site_token']) || !empty($data['refresh'])){
$token = strtolower(Str::random() . base64_encode("globalso_v6"));
$token = strtolower(base64_encode("6.0") . md5('project_' . $data['project_id'] . '_' . time()));
$project->site_token = $token;
$project->save();
}
... ...
... ... @@ -69,8 +69,6 @@ class CustomTemplateLogic extends BaseLogic
}else{
if($this->param['url'] == $this->model::NOT_FOUND_PAGE_URL){
$this->fail('404页面已存在');
}else{
$this->param['url'] = $this->param['url'].'-tag';
}
$this->param['project_id'] = $this->user['project_id'];
$id = $this->model->addReturnId($this->param);
... ...
<?php
namespace App\Models\Collect;
use App\Models\Base;
class CollectSource extends Base
{
//设置关联表名
protected $table = 'gl_collect_source';
//连接数据库
protected $connection = 'custom_mysql';
}
... ...
<?php
namespace App\Models\Collect;
use App\Models\Base;
class CollectTask extends Base
{
//设置关联表名
protected $table = 'gl_collect_task';
//连接数据库
protected $connection = 'custom_mysql';
const STATUS_UN = 0;
const STATUS_ING = 1;
const STATUS_COM= 2;
public static function _insert($url, $project_id, $source, $source_id)
{
if(!$url){
return;
}
$url_arr = parse_url($url);
$data = [
'project_id' => $project_id,
'source' => $source,
'source_id' => $source_id,
'domain' => $url_arr['host'],
'route' => $url_arr['path']
];
$task = self::where($data)->first();
if(!$task){
$data['created_at'] = $data['updated_at'] = date('Y-m-d H:i:s');
self::insert($data);
}
}
}
... ...
... ... @@ -13,6 +13,9 @@ class UpdateLog extends Model
const STATUS_ING = 1;//导入中
const STATUS_COM = 2;//导入完成
const COLLECT_STATUS_UN = 0;//未开始
const COLLECT_STATUS_COM = 1;//采集完成
/**
* 创建更新日志
* @param $project_id
... ... @@ -30,6 +33,7 @@ class UpdateLog extends Model
$log->api_type = $type;
$log->api_url = $url;
$log->sort = $type == 'category' ? 0 :1;
$log->collect_status = in_array($type, ['website_info', 'category']) ? 1 : 0;
return $log->save();
}
return true;
... ...
... ... @@ -340,6 +340,8 @@ Route::group([], function () {
// 提供模板 提单后台查看
Route::any('get_template_list', [Aside\Template\ATemplateController::class, 'getTemplateList'])->name('admin.get_template_list');
Route::any('get_template_detail', [Aside\Template\ATemplateController::class, 'getTemplateDetail'])->name('admin.get_template_detail');
Route::any('/collect', [Aside\CollectController::class, 'index'])->name('admin.collect');
});
... ...