作者 lyh

gx

... ... @@ -320,7 +320,7 @@ class SyncProject extends Command
$param['contract'] = Arr::a2s($param['contract']);
$param['bill'] = Arr::a2s($param['bill']);
if(!isset($param['renewal_record'])){
$param['renewal_record'] = [["amount"=> null, "remark"=> null, "expire_at"=> null]];
$param['renewal_record'] = Arr::a2s([["amount"=> null, "remark"=> null, "expire_at"=> null]]);
}
$info = $paymentModel->read(['project_id'=>$id]);
if($info !== false){
... ...
... ... @@ -5,6 +5,7 @@ namespace App\Console\Commands\Update;
use App\Models\Collect\CollectSource;
use App\Models\Collect\CollectTask;
use App\Models\Com\UpdateLog;
use App\Models\RouteMap\RouteMap;
use App\Services\CosService;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
... ... @@ -12,7 +13,7 @@ use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Redis;
/**
* 4.0,5.0升级到6.0,页面采集
* 4.0,5.0升级到6.0,主站页面采集
* Class ProjectImport
* @package App\Console\Commands
* @author Akun
... ... @@ -37,9 +38,9 @@ class HtmlCollect extends Command
public function handle()
{
// while (true) {
while (true) {
$this->start_collect();
// }
}
}
protected function start_collect()
... ... @@ -62,7 +63,7 @@ class HtmlCollect extends Command
//设置数据库
$project = ProjectServer::useProject($project_id);
if ($project) {
$collect_info = CollectTask::select(['id', 'domain', 'route'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->first();
$collect_info = CollectTask::select(['id', 'domain', 'route'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '')->first();
if (!$collect_info) {
sleep(2);
... ... @@ -83,6 +84,9 @@ class HtmlCollect extends Command
$html = $this->upload_source($html, $source_list, $project_id);
}
} catch (\Exception $e) {
$collect_info->status = CollectTask::STATUS_FAIL;
$collect_info->save();
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: ' . $e->getMessage() . PHP_EOL;
return true;
}
... ... @@ -114,11 +118,29 @@ class HtmlCollect extends Command
return false;
}
switch ($update_log->api_type) {
case 'page':
$source = RouteMap::SOURCE_PAGE;
break;
case 'news':
$source = RouteMap::SOURCE_NEWS;
break;
case 'blog':
$source = RouteMap::SOURCE_BLOG;
break;
case 'tag':
$source = RouteMap::SOURCE_PRODUCT_KEYWORD;
break;
default:
$source = RouteMap::SOURCE_PRODUCT;
break;
}
$complete = false;
//设置数据库
$project = ProjectServer::useProject($update_log->project_id);
if ($project) {
$collect_list = CollectTask::select(['id', 'project_id'])->where('project_id', $update_log['project_id'])->where('status', CollectTask::STATUS_UN)->limit(50)->get();
$collect_list = CollectTask::select(['id', 'project_id'])->where('project_id', $update_log['project_id'])->where('source', $source)->where('language', '')->where('status', CollectTask::STATUS_UN)->orderBy('id', 'asc')->limit(50)->get();
if ($collect_list->count() == 0) {
$complete = true;
... ... @@ -132,7 +154,7 @@ class HtmlCollect extends Command
DB::disconnect('custom_mysql');
if ($complete) {
$update_log->collect_status = UpdateLog::COLLECT_STATUS_COM;
$update_log->collect_status = UpdateLog::COLLECT_STATUS_MAIN;
return 0;
}
... ... @@ -203,9 +225,9 @@ class HtmlCollect extends Command
$path = $arr['path'] ?? '';
if (
(strpos($host, '.globalso.') === false) &&
(strpos($host, '.goodao.') === false) &&
$path && (strpos($path, '.') !== false)
(empty($host) || $host == $domain)
&& $path
&& (strpos($path, '.') !== false)
) {
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
... ...
<?php
namespace App\Console\Commands\Update;
use App\Models\Collect\CollectSource;
use App\Models\Collect\CollectTask;
use App\Models\Com\UpdateLog;
use App\Models\RouteMap\RouteMap;
use App\Services\CosService;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Redis;
/**
* 4.0,5.0升级到6.0,小语种页面采集
* Class ProjectImport
* @package App\Console\Commands
* @author Akun
* @date 2023/11/20 14:04
*/
class HtmlLanguageCollect extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'project_html_language_collect';
/**
* The console command description.
*
* @var string
*/
protected $description = '执行项目html页面采集';
public function handle()
{
while (true) {
$this->start_collect();
}
}
protected function start_collect()
{
$task_id = $this->get_task();
if ($task_id === false) {
//所有项目采集完成
sleep(60);
return true;
} elseif ($task_id === 0) {
//当前项目采集完成
sleep(2);
return true;
}
$task_arr = explode('_', $task_id);
$project_id = $task_arr[0];
$collect_id = $task_arr[1];
//设置数据库
$project = ProjectServer::useProject($project_id);
if ($project) {
$collect_info = CollectTask::select(['id', 'domain', 'route'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '!=', '')->first();
if (!$collect_info) {
sleep(2);
return true;
}
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', collect start' . PHP_EOL;
$collect_info->status = CollectTask::STATUS_ING;
$collect_info->save();
//采集html页面,下载资源到本地并替换
try {
$html = file_get_contents('https://' . $collect_info->domain . $collect_info->route);
$source_list = $this->html_preg($html, $project_id, $collect_info->domain);
if ($source_list) {
$html = $this->upload_source($html, $source_list, $project_id);
}
} catch (\Exception $e) {
$collect_info->status = CollectTask::STATUS_FAIL;
$collect_info->save();
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: ' . $e->getMessage() . PHP_EOL;
return true;
}
$collect_info->html = $html;
$collect_info->status = CollectTask::STATUS_COM;
$collect_info->save();
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', collect end' . PHP_EOL;
}
//关闭数据库
DB::disconnect('custom_mysql');
sleep(2);
}
//获取任务
protected function get_task()
{
$key = 'console_html_language_collect_task';
$task_id = Redis::rpop($key);
if ($task_id) {
return $task_id;
}
$update_log = UpdateLog::where('status', UpdateLog::STATUS_COM)->where('collect_status', UpdateLog::COLLECT_STATUS_MAIN)->orderBy('project_id', 'asc')->first();
if (!$update_log) {
return false;
}
switch ($update_log->api_type) {
case 'page':
$source = RouteMap::SOURCE_PAGE;
break;
case 'news':
$source = RouteMap::SOURCE_NEWS;
break;
case 'blog':
$source = RouteMap::SOURCE_BLOG;
break;
case 'tag':
$source = RouteMap::SOURCE_PRODUCT_KEYWORD;
break;
default:
$source = RouteMap::SOURCE_PRODUCT;
break;
}
$complete = false;
//设置数据库
$project = ProjectServer::useProject($update_log->project_id);
if ($project) {
$collect_list = CollectTask::select(['id', 'project_id'])->where('project_id', $update_log['project_id'])->where('source', $source)->where('language', '!=', '')->where('status', CollectTask::STATUS_UN)->orderBy('id', 'asc')->limit(50)->get();
if ($collect_list->count() == 0) {
$complete = true;
} else {
foreach ($collect_list as $collect) {
Redis::lpush($key, $collect['project_id'] . '_' . $collect['id']);
}
}
}
//关闭数据库
DB::disconnect('custom_mysql');
if ($complete) {
$update_log->collect_status = UpdateLog::COLLECT_STATUS_COM;
return 0;
}
$task_id = Redis::rpop($key);
return $task_id;
}
//正则匹配html资源
protected function html_preg($html, $project_id, $domain)
{
$source = [];
if (!$html) {
return $source;
}
//image
preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img);
$img = $result_img[2] ?? [];
foreach ($img as $vi) {
$check_vi = $this->url_check($vi, $project_id, $domain);
$check_vi && $source[] = $check_vi;
}
//js
preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js);
$js = $result_js[2] ?? [];
foreach ($js as $vj) {
$check_vj = $this->url_check($vj, $project_id, $domain);
$check_vj && $source[] = $check_vj;
}
//video
preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
$video = $result_video[2] ?? [];
foreach ($video as $vv) {
$check_vv = $this->url_check($vv, $project_id, $domain);
$check_vv && $source[] = $check_vv;
}
//css
preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css);
$css = $result_css[2] ?? [];
foreach ($css as $vc) {
$check_vc = $this->url_check($vc, $project_id, $domain);
$check_vc && $source[] = $check_vc;
}
//css background
preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b);
$css_b = $result_css_b[1] ?? [];
foreach ($css_b as $vc_b) {
$check_vc_b = $this->url_check($vc_b, $project_id, $domain);
$check_vc_b && $source[] = $check_vc_b;
}
return $source;
}
//判断资源是否需要下载
protected function url_check($url, $project_id, $domain)
{
if ($url) {
$arr = parse_url($url);
$scheme = $arr['scheme'] ?? '';
$host = $arr['host'] ?? '';
$path = $arr['path'] ?? '';
if (
(empty($host) || $host == $domain)
&& $path
&& (strpos($path, '.') !== false)
) {
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
return [
'download' => true,
'url' => $url,
'url_complete' => ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path
];
} else {
return [
'download' => false,
'url' => $url,
'url_complete' => $source['target']
];
}
} else {
return false;
}
} else {
return false;
}
}
//下载并替换资源
protected function upload_source($html, $source, $project_id)
{
foreach ($source as $vs) {
if ($vs['download']) {
$new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);
if ($new_source) {
CollectSource::insert([
'project_id' => $project_id,
'origin' => $vs['url'],
'target' => $new_source,
'created_at' => date('Y-m-d H:i:s'),
'updated_at' => date('Y-m-d H:i:s'),
]);
$html = str_replace($vs['url'], getImageUrl($new_source), $html);
if (substr($new_source, -3, 3) == 'css') {
// 下载css文件中的资源
$css_html = file_get_contents($vs['url_complete']);
preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
$css_source = $result_css_source[1] ?? [];
$url_arr = explode('/', $vs['url_complete']);
$target_arr = explode('/', $new_source);
foreach ($css_source as $vcs) {
$vcs_arr = parse_url($vcs);
if (isset($vcs_arr['domain'])) {
//不是相对路径,不下载
continue;
}
$vcs = $vcs_arr['path'] ?? '';
if (!$vcs) {
continue;
}
$source_info = CollectSource::where('project_id', $project_id)->where('origin', $vcs)->first();
if ($source_info) {
//已存在,不下载
continue;
}
$url_arr[count($url_arr) - 1] = $vcs;
$url_css_complete = implode('/', $url_arr);
$target_arr[count($target_arr) - 1] = $vcs;
$path = implode('/', $target_arr);
$new_source_css = CosService::uploadRemote($project_id, 'source', $url_css_complete, $path);
if ($new_source_css) {
CollectSource::insert([
'project_id' => $project_id,
'origin' => $vcs,
'target' => $new_source_css,
'created_at' => date('Y-m-d H:i:s'),
'updated_at' => date('Y-m-d H:i:s'),
]);
}
}
}
}
} else {
$html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html);
}
}
return $html;
}
}
... ...
... ... @@ -76,6 +76,22 @@ class ProjectUpdate extends Command
$task->status = UpdateLog::STATUS_ING;//同步中
$task->save();
$domain_arr = parse_url($api_url);
//获取网站配置
$link_type = 0;
$url_web_config = 'https://' . $domain_arr['host'] . '/wp-content/cache/user_config.text';
$data_config = http_get($url_web_config, ['charset' => 'UTF-8']);
if ($data_config) {
$link_type = $data_config['link_type'];
}
//获取所有语种
$language_list = [];
$url_language = 'https://' . $domain_arr['host'] . '/wp-content/plugins/proofreading/json/user_language.json';
$data_language = http_get($url_language, ['charset' => 'UTF-8']);
if ($data_language) {
$language_list = array_column($data_language, 'short');
}
//设置数据库
$project = ProjectServer::useProject($project_id);
if ($project) {
... ... @@ -126,7 +142,7 @@ class ProjectUpdate extends Command
$id = $keyword['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT_KEYWORD, $id);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT_KEYWORD, $id, $link_type, $language_list);
}
}
}
... ... @@ -258,7 +274,7 @@ class ProjectUpdate extends Command
$id = $product['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT, $id);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT, $id, $link_type, $language_list);
}
}
}
... ... @@ -312,7 +328,7 @@ class ProjectUpdate extends Command
$id = $news['id'];
}
CollectTask::_insert($item['url'], $project_id, $api_type == 'news' ? RouteMap::SOURCE_NEWS : RouteMap::SOURCE_BLOG, $id);
CollectTask::_insert($item['url'], $project_id, $api_type == 'news' ? RouteMap::SOURCE_NEWS : RouteMap::SOURCE_BLOG, $id, $link_type, $language_list);
}
}
}
... ... @@ -360,7 +376,7 @@ class ProjectUpdate extends Command
$id = $custom['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PAGE, $id);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PAGE, $id, $link_type, $language_list);
}
}
}
... ...
... ... @@ -67,6 +67,7 @@ class UpdateProgress extends Command
public function getUpdateProgress($project_id){
$info = DB::connection('custom_mysql')->table('gl_update_progress')->whereRaw('total_num > current_num')->first();
if(!empty($info)){
$info = (array)$info;
//超时时间
$time = date("Y-m-d H:i:s",strtotime($info['created_at']) + $info['total_num'] * 60);
if($time > date("Y-m-d H:i:s")){
... ...
... ... @@ -84,7 +84,7 @@ if (!function_exists('http_get')) {
/**
* 发送http get请求
* @param type $url
* @return type
* @return []
*/
function http_get($url, $header = [])
{
... ...
... ... @@ -215,7 +215,7 @@ class DomainInfoLogic extends BaseLogic
}
//域名是否都已经解析
if($info['domain'] && !$this->check_cname($info['domain'], $server_info)){
if(!empty($info['domain']) && !$this->check_cname($info['domain'], $server_info)){
$this->fail('域名' . $info['domain'] . '未解析至目标服务器');
}
foreach ($this->param['other_domain']??[] as $other_domain){
... ...
... ... @@ -533,7 +533,7 @@ class ProjectLogic extends BaseLogic
if($server_info && $domain_info){
//验证解析
if ($domain_info['domain'] && !DomainInfoLogic::instance()->check_cname($domain_info['domain'], $server_info)) {
if (!empty($domain_info['domain']) && !DomainInfoLogic::instance()->check_cname($domain_info['domain'], $server_info)) {
throw new AsideGlobalException(Code::SYSTEM_ERROR,'域名' . $domain_info['domain'] . '未解析至目标服务器');
}
$api_url = 'http://'.$server_info['init_domain'].'/api/createSite';
... ...
... ... @@ -14,27 +14,67 @@ class CollectTask extends Base
const STATUS_UN = 0;
const STATUS_ING = 1;
const STATUS_COM= 2;
const STATUS_COM = 2;
const STATUS_FAIL = 3;
public static function _insert($url, $project_id, $source, $source_id)
public static function _insert($url, $project_id, $source, $source_id, $link_type = 0, $language_list = [])
{
if(!$url){
if (!$url) {
return;
}
$url_arr = parse_url($url);
$where = [
'project_id' => $project_id,
'source' => $source,
'source_id' => $source_id,
'domain' => $url_arr['host'],
'route' => $url_arr['path'],
'language' => ''
];
$task = self::where($where)->first();
if (!$task) {
$now = date('Y-m-d H:i:s');
$data = [
[
'project_id' => $project_id,
'source' => $source,
'source_id' => $source_id,
'domain' => $url_arr['host'],
'route' => $url_arr['path']
'route' => $url_arr['path'],
'language' => '',
'created_at' => $now,
'updated_at' => $now,
]
];
$task = self::where($data)->first();
if(!$task){
$data['created_at'] = $data['updated_at'] = date('Y-m-d H:i:s');
if ($link_type > 0 && $language_list) {
$domain_arr = explode('.', $url_arr['host']);
foreach ($language_list as $v_lan) {
if ($link_type == 1) {
//二级域名
$domain_arr[0] = $v_lan;
$new_domain = implode('.', $domain_arr);
} else {
//二级目录
$new_domain = $url_arr['host'] . '/' . $v_lan;
}
$data[] = [
'project_id' => $project_id,
'source' => $source,
'source_id' => $source_id,
'domain' => $new_domain,
'route' => $url_arr['path'],
'language' => $v_lan,
'created_at' => $now,
'updated_at' => $now,
];
}
}
self::insert($data);
}
}
... ...
... ... @@ -14,7 +14,8 @@ class UpdateLog extends Model
const STATUS_COM = 2;//导入完成
const COLLECT_STATUS_UN = 0;//未开始
const COLLECT_STATUS_COM = 1;//采集完成
const COLLECT_STATUS_COM = 1;//全站小语种采集完成
const COLLECT_STATUS_MAIN = 2;//英语主站采集完成
/**
* 创建更新日志
... ... @@ -33,7 +34,7 @@ class UpdateLog extends Model
$log->api_type = $type;
$log->api_url = $url;
$log->sort = $type == 'category' ? 0 :1;
$log->collect_status = in_array($type, ['website_info', 'category']) ? 1 : 0;
$log->collect_status = ($type == 'category' || $type == 'website_info') ? 1 : 0;
return $log->save();
}
return true;
... ...
... ... @@ -93,11 +93,23 @@ class CosService
'secretKey' => $cos['credentials']['secretKey'],
],
]);
$opts = [
'http' => [
'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0'
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
]
];
$body = file_get_contents($file_url,false,stream_context_create($opts));
try {
$cosClient->putObject([
'Bucket' => $cos['bucket'],
'Key' => $key,
'Body' => fopen($file_url, 'r'),
'Body' => $body,
]);
return $key;
}catch (\Exception $e){
... ...