作者 lyh

Merge branch 'master' of http://47.244.231.31:8099/zhl/globalso-v6 into develop

... ... @@ -75,10 +75,27 @@ class HtmlCollect extends Command
$collect_info->status = CollectTask::STATUS_ING;
$collect_info->save();
//获取站点正式和测试域名
$web_url_domain = $collect_info->domain;
$home_url = $collect_info->domain;
$url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';
$data_config = http_get($url_web_config, ['charset' => 'UTF-8']);
if ($data_config) {
$web_url_arr = parse_url($data_config['web_url_domain']);
if (isset($web_url_arr['host'])) {
$web_url_domain = $web_url_arr['host'];
}
$home_url_arr = parse_url($data_config['home_url']);
if (isset($home_url_arr['host'])) {
$home_url = $home_url_arr['host'];
}
}
//采集html页面,下载资源到本地并替换
try {
$html = file_get_contents('https://' . $collect_info->domain . $collect_info->route);
$source_list = $this->html_preg($html, $project_id, $collect_info->domain);
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
if ($source_list) {
$html = $this->upload_source($html, $source_list, $project_id);
... ... @@ -164,7 +181,7 @@ class HtmlCollect extends Command
}
//正则匹配html资源
protected function html_preg($html, $project_id, $domain)
protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
{
$source = [];
... ... @@ -176,7 +193,7 @@ class HtmlCollect extends Command
preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
$img = $result_img[2] ?? [];
foreach ($img as $vi) {
$check_vi = $this->url_check($vi, $project_id, $domain);
$check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url);
$check_vi && $source[] = $check_vi;
}
... ... @@ -184,7 +201,7 @@ class HtmlCollect extends Command
preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js);
$js = $result_js[2] ?? [];
foreach ($js as $vj) {
$check_vj = $this->url_check($vj, $project_id, $domain);
$check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url);
$check_vj && $source[] = $check_vj;
}
... ... @@ -192,7 +209,7 @@ class HtmlCollect extends Command
preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
$video = $result_video[2] ?? [];
foreach ($video as $vv) {
$check_vv = $this->url_check($vv, $project_id, $domain);
$check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url);
$check_vv && $source[] = $check_vv;
}
... ... @@ -200,7 +217,7 @@ class HtmlCollect extends Command
preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css);
$css = $result_css[2] ?? [];
foreach ($css as $vc) {
$check_vc = $this->url_check($vc, $project_id, $domain);
$check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url);
$check_vc && $source[] = $check_vc;
}
... ... @@ -208,7 +225,7 @@ class HtmlCollect extends Command
preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b);
$css_b = $result_css_b[1] ?? [];
foreach ($css_b as $vc_b) {
$check_vc_b = $this->url_check($vc_b, $project_id, $domain);
$check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url);
$check_vc_b && $source[] = $check_vc_b;
}
... ... @@ -217,7 +234,7 @@ class HtmlCollect extends Command
}
//判断资源是否需要下载
protected function url_check($url, $project_id, $domain)
protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
{
if ($url) {
$arr = parse_url($url);
... ... @@ -227,11 +244,10 @@ class HtmlCollect extends Command
$query = $arr['query'] ?? '';
if (
(strpos($host, '.globalso.') === false) &&
(strpos($host, '.goodao.') === false) &&
$path && (strpos($path, '.') !== false)
(empty($host) || $host == $web_url_domain || $host == $home_url)
&& $path
&& (strpos($path, '.') !== false)
) {
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
return [
... ... @@ -290,6 +306,9 @@ class HtmlCollect extends Command
if (!$vcs) {
continue;
}
if (strpos($vcs, '.') === false) {
continue;
}
$source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first();
if ($source_info) {
... ...
... ... @@ -75,10 +75,26 @@ class HtmlLanguageCollect extends Command
$collect_info->status = CollectTask::STATUS_ING;
$collect_info->save();
$web_url_domain = $collect_info->domain;
$home_url = $collect_info->domain;
$url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';
$data_config = http_get($url_web_config, ['charset' => 'UTF-8']);
if ($data_config) {
$web_url_arr = parse_url($data_config['web_url_domain']);
if (isset($web_url_arr['host'])) {
$web_url_domain = $web_url_arr['host'];
}
$home_url_arr = parse_url($data_config['home_url']);
if (isset($home_url_arr['host'])) {
$home_url = $home_url_arr['host'];
}
}
//采集html页面,下载资源到本地并替换
try {
$html = file_get_contents('https://' . $collect_info->domain . $collect_info->route);
$source_list = $this->html_preg($html, $project_id, $collect_info->domain);
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
if ($source_list) {
$html = $this->upload_source($html, $source_list, $project_id);
... ... @@ -164,7 +180,7 @@ class HtmlLanguageCollect extends Command
}
//正则匹配html资源
protected function html_preg($html, $project_id, $domain)
protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
{
$source = [];
... ... @@ -176,7 +192,7 @@ class HtmlLanguageCollect extends Command
preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
$img = $result_img[2] ?? [];
foreach ($img as $vi) {
$check_vi = $this->url_check($vi, $project_id, $domain);
$check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url);
$check_vi && $source[] = $check_vi;
}
... ... @@ -184,7 +200,7 @@ class HtmlLanguageCollect extends Command
preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js);
$js = $result_js[2] ?? [];
foreach ($js as $vj) {
$check_vj = $this->url_check($vj, $project_id, $domain);
$check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url);
$check_vj && $source[] = $check_vj;
}
... ... @@ -192,7 +208,7 @@ class HtmlLanguageCollect extends Command
preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
$video = $result_video[2] ?? [];
foreach ($video as $vv) {
$check_vv = $this->url_check($vv, $project_id, $domain);
$check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url);
$check_vv && $source[] = $check_vv;
}
... ... @@ -200,7 +216,7 @@ class HtmlLanguageCollect extends Command
preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css);
$css = $result_css[2] ?? [];
foreach ($css as $vc) {
$check_vc = $this->url_check($vc, $project_id, $domain);
$check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url);
$check_vc && $source[] = $check_vc;
}
... ... @@ -208,7 +224,7 @@ class HtmlLanguageCollect extends Command
preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b);
$css_b = $result_css_b[1] ?? [];
foreach ($css_b as $vc_b) {
$check_vc_b = $this->url_check($vc_b, $project_id, $domain);
$check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url);
$check_vc_b && $source[] = $check_vc_b;
}
... ... @@ -217,7 +233,7 @@ class HtmlLanguageCollect extends Command
}
//判断资源是否需要下载
protected function url_check($url, $project_id, $domain)
protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
{
if ($url) {
$arr = parse_url($url);
... ... @@ -227,11 +243,10 @@ class HtmlLanguageCollect extends Command
$query = $arr['query'] ?? '';
if (
(strpos($host, '.globalso.') === false) &&
(strpos($host, '.goodao.') === false) &&
$path && (strpos($path, '.') !== false)
(empty($host) || $host == $web_url_domain || $host == $home_url)
&& $path
&& (strpos($path, '.') !== false)
) {
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
return [
... ... @@ -290,6 +305,9 @@ class HtmlLanguageCollect extends Command
if (!$vcs) {
continue;
}
if (strpos($vcs, '.') === false) {
continue;
}
$source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first();
if ($source_info) {
... ...
... ... @@ -91,6 +91,13 @@ class ProjectUpdate extends Command
if ($data_language) {
$language_list = array_column($data_language, 'short');
}
//获取所有页面
$page_list = [];
$url_page = 'https://' . $domain_arr['host'] . '/wp-content/cache/pages_list.json';
$data_page = http_get($url_page, ['charset' => 'UTF-8']);
if ($data_page) {
$page_list = array_column($data_page, 'path');
}
//设置数据库
$project = ProjectServer::useProject($project_id);
... ... @@ -142,7 +149,7 @@ class ProjectUpdate extends Command
$id = $keyword['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT_KEYWORD, $id, $link_type, $language_list);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT_KEYWORD, $id, $link_type, $language_list, $page_list);
}
}
}
... ... @@ -274,7 +281,7 @@ class ProjectUpdate extends Command
$id = $product['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT, $id, $link_type, $language_list);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT, $id, $link_type, $language_list, $page_list);
}
}
}
... ... @@ -328,7 +335,7 @@ class ProjectUpdate extends Command
$id = $news['id'];
}
CollectTask::_insert($item['url'], $project_id, $api_type == 'news' ? RouteMap::SOURCE_NEWS : RouteMap::SOURCE_BLOG, $id, $link_type, $language_list);
CollectTask::_insert($item['url'], $project_id, $api_type == 'news' ? RouteMap::SOURCE_NEWS : RouteMap::SOURCE_BLOG, $id, $link_type, $language_list, $page_list);
}
}
}
... ... @@ -376,7 +383,7 @@ class ProjectUpdate extends Command
$id = $custom['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PAGE, $id, $link_type, $language_list);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PAGE, $id, $link_type, $language_list, $page_list);
}
}
}
... ...
... ... @@ -177,10 +177,11 @@ class UpdateSeoTdk extends Command
if(!Redis::setnx($cache_key, 1)){
continue;
}
Redis::expire($cache_key, 120);
Redis::expire($cache_key, 300);
echo date('Y-m-d H:i:s') . '更新--' . $table . ': 项目id' . $project_id . ':id' . $v['id'] . PHP_EOL;
$v = DB::connection('custom_mysql')->table($table)->where('id', $v['id'])->first();
$v = (array)$v;
$data = [];
$json_field = '';
foreach ($map as $ai_key => $field) {
... ...
<?php
namespace App\Console\Commands;
use App\Helper\Arr;
use App\Helper\Common;
use App\Helper\Gpt;
use App\Helper\Translate;
use App\Models\Ai\AiCommand;
use App\Models\Mail\Mail;
use App\Models\Project\DeployOptimize;
use App\Models\Project\Project;
use App\Models\Project\ProjectUpdateTdk;
use App\Models\User\User;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Redis;
/**
* 初始化项目
* Class InitProject
* @package App\Console\Commands
* @author zbj
* @date 2023/10/8
*/
class UpdateSeoTdkCrontab extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'update_seo_tdk_crontab';
/**
* The console command description.
*
* @var string
*/
protected $description = '一键生成tdk';
/**
* @return bool
*/
public function handle()
{
$project_ids = Project::where('type', Project::TYPE_TWO)->pluck('id')->toArray();
foreach ($project_ids as $project_id){
ProjectUpdateTdk::add_task($project_id);
}
}
}
... ...
... ... @@ -37,6 +37,7 @@ class Kernel extends ConsoleKernel
$schedule->command('domain_info')->dailyAt('01:00')->withoutOverlapping(1);// 更新域名|证书结束时间,每天凌晨1点执行一次
$schedule->command('last_inquiry')->dailyAt('04:00')->withoutOverlapping(1);// 最近一次询盘信息
$schedule->command('update_progress')->everyThirtyMinutes()->withoutOverlapping(1);//监控更新
$schedule->command('update_seo_tdk_crontab')->dailyAt('00:00')->withoutOverlapping(1); //更新上线项目TDK
}
/**
... ...
... ... @@ -17,7 +17,7 @@ class CollectTask extends Base
const STATUS_COM = 2;
const STATUS_FAIL = 3;
public static function _insert($url, $project_id, $source, $source_id, $link_type = 0, $language_list = [])
public static function _insert($url, $project_id, $source, $source_id, $link_type = 0, $language_list = [], $page_list = [])
{
if (!$url) {
return;
... ... @@ -35,10 +35,10 @@ class CollectTask extends Base
];
$task = self::where($where)->first();
if (!$task) {
$data = [];
$now = date('Y-m-d H:i:s');
$data = [
[
if (!$task) {
$data[] = [
'project_id' => $project_id,
'source' => $source,
'source_id' => $source_id,
... ... @@ -47,10 +47,10 @@ class CollectTask extends Base
'language' => '',
'created_at' => $now,
'updated_at' => $now,
]
];
}
if ($link_type > 0 && $language_list) {
if ($link_type > 0 && $language_list && in_array($url_arr['path'], $page_list)) {
$domain_arr = explode('.', $url_arr['host']);
foreach ($language_list as $v_lan) {
if ($link_type == 1) {
... ... @@ -77,5 +77,4 @@ class CollectTask extends Base
self::insert($data);
}
}
}
... ...
... ... @@ -104,7 +104,11 @@ class CosService
'verify_peer_name' => false,
]
];
try {
$body = file_get_contents($file_url,false,stream_context_create($opts));
}catch (\Exception $e){
return '';
}
try {
$cosClient->putObject([
... ...