作者 赵彬吉
... ... @@ -11,7 +11,6 @@ use App\Services\ProjectServer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Redis;
/**
... ... @@ -283,6 +282,7 @@ class HtmlCollect extends Command
protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
{
if ($url) {
$url = str_replace('"', '', $url);
$arr = parse_url($url);
$scheme = $arr['scheme'] ?? '';
$host = $arr['host'] ?? '';
... ... @@ -344,6 +344,7 @@ class HtmlCollect extends Command
$url_arr = explode('/', $vs['url_complete']);
$target_arr = explode('/', $new_source);
foreach ($css_source as $vcs) {
$vcs = str_replace('"', '', $vcs);
$vcs_arr = parse_url($vcs);
if (isset($vcs_arr['domain'])) {
//不是相对路径,不下载
... ... @@ -358,7 +359,7 @@ class HtmlCollect extends Command
continue;
}
$path_arr = explode('.', $vcs);
if (end($path_arr) == 'html') {
if (in_array(end($path_arr), ['html', 'php', 'com', 'xml'])) {
continue;
}
... ...
... ... @@ -9,6 +9,7 @@ use App\Models\RouteMap\RouteMap;
use App\Services\CosService;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Redis;
... ... @@ -75,42 +76,29 @@ class HtmlLanguageCollect extends Command
$collect_info->status = CollectTask::STATUS_ING;
$collect_info->save();
//获取英文站域名
$domain = $collect_info->domain;
if (strpos($domain, '/') !== false) {
$domain = substr($domain, 0, strpos($domain, '/'));
} else {
$domain = str_replace($collect_info->language, 'www', $domain);
}
$web_url_domain = $domain;
$home_url = $domain;
$url_web_config = 'https://' . $domain . '/wp-content/cache/user_config.text';
$data_config = curl_c($url_web_config);
if ($data_config) {
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
if (isset($web_url_arr['host'])) {
$web_url_domain = $web_url_arr['host'];
}
$home_url_arr = parse_url($data_config['home_url'] ?? '');
if (isset($home_url_arr['host'])) {
$home_url = $home_url_arr['host'];
}
}
//获取站点正式和测试域名
$old_info = $this->getOldDomain($project_id, $collect_info->domain);
//采集html页面,下载资源到本地并替换
try {
$html = curl_c('https://' . $collect_info->domain . $collect_info->route, false);
if($html == '0'){
if ($html == '0') {
$collect_info->status = CollectTask::STATUS_FAIL;
$collect_info->save();
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: no html' . PHP_EOL;
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: no html' . PHP_EOL;
sleep(2);
return true;
}
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
//如果有base64图片,先替换掉,再进行资源匹配
$new_html = $html;
preg_match_all("/data:([^;]*);base64,(.*)?\"/", $new_html, $result_img);
$img_base64 = $result_img[2] ?? [];
foreach ($img_base64 as $v64) {
$new_html = str_replace($v64, '', $new_html);
}
$source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
if ($source_list) {
$html = $this->upload_source($html, $source_list, $project_id);
... ... @@ -162,9 +150,6 @@ class HtmlLanguageCollect extends Command
case 'blog':
$source = RouteMap::SOURCE_BLOG;
break;
case 'tag':
$source = RouteMap::SOURCE_PRODUCT_KEYWORD;
break;
default:
$source = RouteMap::SOURCE_PRODUCT;
break;
... ... @@ -197,6 +182,42 @@ class HtmlLanguageCollect extends Command
return $task_id;
}
//获取站点老域名
protected function getOldDomain($project_id, $domain)
{
$key = 'project_collect_lan_domain_' . $project_id;
$data = Cache::get($key);
if (!$data) {
$web_url_domain = $domain;
$home_url = $domain;
$url_web_config = 'https://' . $domain . '/wp-content/cache/user_config.text';
$data_config = curl_c($url_web_config);
if ($data_config) {
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
if (isset($web_url_arr['host'])) {
$web_url_domain = $web_url_arr['host'];
}
$home_url_arr = parse_url($data_config['home_url'] ?? '');
if (isset($home_url_arr['host'])) {
$home_url = $home_url_arr['host'];
}
}
$data = [
'web_url_domain' => $web_url_domain,
'home_url' => $home_url,
];
Cache::add($key, $data, 3600);//缓存1小时
}
return $data;
}
//正则匹配html资源
protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
{
... ... @@ -246,6 +267,13 @@ class HtmlLanguageCollect extends Command
$check_vc_b && $source[] = $check_vc_b;
}
//a标签下载资源
preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a);
$down = $result_a[2] ?? [];
foreach ($down as $vd) {
$check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url);
$check_vd && $source[] = $check_vd;
}
return $source;
}
... ... @@ -254,6 +282,7 @@ class HtmlLanguageCollect extends Command
protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url)
{
if ($url) {
$url = str_replace('&quot;', '', $url);
$arr = parse_url($url);
$scheme = $arr['scheme'] ?? '';
$host = $arr['host'] ?? '';
... ... @@ -265,7 +294,7 @@ class HtmlLanguageCollect extends Command
(empty($host) || $host == $web_url_domain || $host == $home_url)
&& $path
&& (strpos($path, '.') !== false)
&& (end($path_arr) != 'html')
&& (!in_array(end($path_arr), ['html', 'php', 'com', 'xml']))
) {
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
... ... @@ -315,6 +344,7 @@ class HtmlLanguageCollect extends Command
$url_arr = explode('/', $vs['url_complete']);
$target_arr = explode('/', $new_source);
foreach ($css_source as $vcs) {
$vcs = str_replace('&quot;', '', $vcs);
$vcs_arr = parse_url($vcs);
if (isset($vcs_arr['domain'])) {
//不是相对路径,不下载
... ... @@ -329,7 +359,7 @@ class HtmlLanguageCollect extends Command
continue;
}
$path_arr = explode('.', $vcs);
if(end($path_arr) == 'html'){
if (in_array(end($path_arr), ['html', 'php', 'com', 'xml'])) {
continue;
}
... ...
<?php
namespace App\Console\Commands\Update;
use App\Helper\Arr;
use App\Http\Logic\Bside\Product\CategoryLogic;
use App\Models\Collect\CollectSource;
use App\Models\Collect\CollectTask;
use App\Models\Com\UpdateLog;
use App\Models\Product\Category;
use App\Models\Product\Product;
use App\Models\RouteMap\RouteMap;
use App\Services\CosService;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
/**
* 4.0,5.0升级到6.0,内容同步
* Class ProjectImport
* @package App\Console\Commands
* @author Akun
* @date 2023/10/9 15:04
*/
class ProjectUpdateTemp extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'project_update_temp';
/**
* The console command description.
*
* @var string
*/
protected $description = '执行项目升级任务';
public function handle()
{
while (true) {
$this->start_update();
}
}
protected function start_update()
{
$list = UpdateLog::where('api_type', 'category')->get();
foreach ($list as $task) {
$project_id = $task->project_id;
$api_type = $task->api_type;
$api_url_arr = explode('?', $task->api_url);
$api_url = $api_url_arr[0];
$page_size = 20;
echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', task_type: ' . $api_type . ', update start' . PHP_EOL;
$task->status = UpdateLog::STATUS_ING;//同步中
$task->save();
$domain_arr = parse_url($api_url);
//获取网站配置
$link_type = 0;
$web_url_domain = $domain_arr['host'];
$home_url = $domain_arr['host'];
$url_web_config = 'https://' . $domain_arr['host'] . '/wp-content/cache/user_config.text';
$data_config = curl_c($url_web_config);
if ($data_config) {
$link_type = $data_config['link_type'] ?? 0;
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
if (isset($web_url_arr['host'])) {
$web_url_domain = $web_url_arr['host'];
}
$home_url_arr = parse_url($data_config['home_url'] ?? '');
if (isset($home_url_arr['host'])) {
$home_url = $home_url_arr['host'];
}
}
//获取所有语种
$language_list = [];
$url_language = 'https://' . $domain_arr['host'] . '/wp-content/plugins/proofreading/json/user_language.json';
$data_language = curl_c($url_language);
if ($data_language) {
$language_list = array_column($data_language, 'short');
}
//获取所有页面
$page_list = [];
$url_page = 'https://' . $domain_arr['host'] . '/wp-content/cache/pages_list.json';
$data_page = curl_c($url_page);
if ($data_page) {
$page_list = array_column($data_page, 'path');
}
//设置数据库
$project = ProjectServer::useProject($project_id);
if ($project) {
if ($api_type == 'category') {
//产品分类
$url = $api_url . '?' . http_build_query(['w' => 'category']);
$data = curl_c($url);
if (isset($data['code']) && $data['code'] == 200) {
$items = $data['data'] ?? [];
$this->category_insert($project_id, $items, 0);
} else {
return true;
}
} elseif ($api_type == 'post') {
//产品
$url = $api_url . '?' . http_build_query(['w' => 'post', 'page' => 1, 'pagesize' => 0]);
$data = curl_c($url);
if (isset($data['code']) && $data['code'] == 200) {
$count = $data['data']['count'] ?? 0;
$total_page = ceil($count / $page_size);
for ($page = 1; $page <= $total_page; $page++) {
$url_page = $api_url . '?' . http_build_query(['w' => 'post', 'page' => $page, 'pagesize' => $page_size]);
$data_page = curl_c($url_page);
if (isset($data_page['code']) && $data_page['code'] == 200) {
$items = $data_page['data']['data'] ?? [];
$model = new Product();
$category_model = new Category();
$logic = new CategoryLogic();
foreach ($items as $item) {
$route = $this->get_url_route($item['url'] ?? '');
if ($route) {
$product = $model->read(['route' => $route], 'id');
if (!$product) {
//图片
$gallery = [];
if ($item['images'] ?? []) {
foreach ($item['images'] as $k_img => $img) {
$gallery[] = ['alt' => '这是一张产品图', 'url' => $this->source_download($img, $project_id, $domain_arr['host'], $web_url_domain, $home_url)];
}
}
//分类
$category_id = '';
if ($item['category'] ?? []) {
$category_arr = $category_model->list(['original_id' => ['in', array_column($item['category'], 'id')]]);
$category_id = $logic->getLastCategory(array_column($category_arr, 'id'));
}
try {
$item['ttile'] = $this->special2str($item['ttile'] ?? '');
$id = $model->insertGetId([
'project_id' => $project_id,
'title' => $item['ttile'],
'intro' => $item['short_description'] ?? '',
'content' => $item['content'] ?? '',
'category_id' => $category_id,
'thumb' => isset($gallery[0]) ? Arr::a2s($gallery[0]) : '',
'gallery' => Arr::a2s($gallery),
'seo_mate' => Arr::a2s([
'title' => $item['ttile'],
'keyword' => $item['keywords'] ?? '',
'description' => $item['description'] ?? ''
]),
'status' => Product::STATUS_ON,
'created_at' => $item['post_date'] ?? date('Y-m-d H:i:s'),
'updated_at' => $item['post_date'] ?? date('Y-m-d H:i:s'),
'sort' => $item['sort'] ?? 0,
'is_upgrade' => 1,
'six_read' => 1,
'route' => $route
]);
$this->set_map($route, RouteMap::SOURCE_PRODUCT, $id, $project_id);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT, $id, $link_type, $language_list, $page_list);
} catch (\Exception $e) {
echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', error: ' . $e->getMessage() . PHP_EOL;
continue;
}
} else {
$category_id = '';
if ($item['category'] ?? []) {
$category_arr = $category_model->list(['original_id' => ['in', array_column($item['category'], 'id')]]);
$category_id = $logic->getLastCategory(array_column($category_arr, 'id'));
}
$model->edit(['category_id' => $category_id, 'product_type' => ''], ['id' => $product['id']]);
}
}
}
}
}
} else {
return true;
}
}
}
//关闭数据库
DB::disconnect('custom_mysql');
$task->status = UpdateLog::STATUS_COM;//同步完成
if ($api_type == 'post' || $api_type == 'page' || $api_type == 'news' || $api_type == 'blog') {
$task->collect_status = UpdateLog::COLLECT_STATUS_UN;
}
$task->save();
echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', task_type: ' . $api_type . ', update end ' . PHP_EOL;
sleep(2);
}
}
//获取地址路由
protected function get_url_route($url)
{
$arr = parse_url(urldecode($url));
if (empty($arr['path'])) {
return '';
}
$path = $arr['path'];
if (strpos($path, '.') !== false) {
$path = substr($path, 0, strpos($path, '.'));
}
$path_arr = explode('/', $path);
return end($path_arr) ? end($path_arr) : $path_arr[count($path_arr) - 2];
}
//产品多级分类入库
protected function category_insert($project_id, $items, $pid = 0)
{
$model = new Category();
foreach ($items as $item) {
$route = $this->get_url_route($item['url'] ?? '');
if ($route) {
$parent = $model->read(['pid' => $pid, 'route' => $route], 'id');
if (!$parent) {
try {
$item['name'] = $this->special2str($item['name'] ?? '');
$parent_id = $model->addReturnId([
'project_id' => $project_id,
'title' => $item['name'],
'pid' => $pid,
'keywords' => $item['keywords'] ?? '',
'describe' => $item['description'] ?? '',
'original_id' => $item['id'],
'route' => $route
]);
$this->set_map($route, RouteMap::SOURCE_PRODUCT_CATE, $parent_id, $project_id);
} catch (\Exception $e) {
echo 'date:' . date('Y-m-d H:i:s') . ', category_insert error: ' . $e->getMessage() . PHP_EOL;
continue;
}
} else {
$parent_id = $parent['id'];
}
if (!empty($item['children'])) {
$this->category_insert($project_id, $item['children'], $parent_id);
}
}
}
}
//特殊字符转换
protected function special2str($str)
{
if (strpos($str, ';') === false) {
return $str;
}
$list = [
'&lt;' => '<',
'&gt;' => '>',
'&amp;' => '&',
'&acute;' => '´',
'&quot;' => '“',
'&nbsp;' => ' '
];
foreach ($list as $k => $v) {
$str = str_replace($k, $v, $str);
}
return $str;
}
//路由入库
protected function set_map($route, $source, $source_id, $project_id)
{
if ($route) {
$route_map = RouteMap::where('project_id', $project_id)->where('source', $source)->where('source_id', $source_id)->first();
if (!$route_map) {
$route_map = new RouteMap();
$route_map->project_id = $project_id;
$route_map->source = $source;
$route_map->source_id = $source_id;
$route_map->route = $route;
if ($source == RouteMap::SOURCE_NEWS) {
$route_map->path = RouteMap::SOURCE_NEWS;
} elseif ($source == RouteMap::SOURCE_BLOG) {
$route_map->path = RouteMap::SOURCE_BLOG;
}
$route_map->save();
}
}
}
//资源下载
protected function source_download($url, $project_id, $domain, $web_url_domain, $home_url)
{
if (!$url) {
return '';
}
$arr = parse_url($url);
$scheme = $arr['scheme'] ?? '';
$host = $arr['host'] ?? '';
$path = $arr['path'] ?? '';
$url_complete = ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path;
if ((empty($host) || $host == $web_url_domain || $host == $home_url) && $path) {
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
$new_url = CosService::uploadRemote($project_id, 'image_product', $url_complete);
if ($new_url) {
CollectSource::insert([
'project_id' => $project_id,
'origin' => $url,
'target' => $new_url,
'created_at' => date('Y-m-d H:i:s'),
'updated_at' => date('Y-m-d H:i:s'),
]);
return getImageUrl($new_url);
} else {
return $url_complete;
}
} else {
return getImageUrl($source['target']);
}
} else {
return $url_complete;
}
}
}
... ...
... ... @@ -181,6 +181,7 @@ class LoginController extends BaseController
*/
public function eventMessage(){
$message = file_get_contents("php://input");
@file_put_contents(storage_path('logs/lyh_error.log'), var_export($message, true) . PHP_EOL, FILE_APPEND);
$message = simplexml_load_string($message, 'SimpleXMLElement', LIBXML_NOCDATA | LIBXML_NOERROR);
$jsonData = json_encode($message);
$arrayData = json_decode($jsonData, true);
... ... @@ -299,25 +300,4 @@ class LoginController extends BaseController
}
return $data;
}
public function ceshi(){
$url = 'https://demo.globalso.site/';
$contextOptions = [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
];
$context = stream_context_create($contextOptions);
$sourceCode = file_get_contents($url, false, $context);
$pattern = '/<style\b[^>]*>(.*?)<\/style>/s'; // 定义匹配`<style>`标签及其内容的正则表达式
$strippedContent = preg_replace($pattern, '', $sourceCode); // 删除`<style>`标签及其内容
$pattern = '/<link\b[^>]*>/'; // 定义匹配 `<link>` 标签的正则表达式
$strippedContent = preg_replace($pattern, '', $strippedContent); // 删除 `<link>` 标签
$pattern = '/>([^<]+)</'; // 定义匹配中间内容不是标签的正则表达式
$matches = array();
preg_match_all($pattern, $strippedContent, $matches);
$textContentArray = $matches[1];
var_dump($textContentArray);
}
}
... ...
... ... @@ -425,7 +425,7 @@ Route::middleware(['bloginauth'])->group(function () {
});
//自定义模板
Route::prefix('custom')->group(function () {
Route::prefix('custom_module')->group(function () {
Route::any('/', [\App\Http\Controllers\Bside\CustomModule\CustomModuleController::class, 'lists'])->name('custom_lists');
Route::any('/save', [\App\Http\Controllers\Bside\CustomModule\CustomModuleController::class, 'save'])->name('custom_save');
Route::any('/del', [\App\Http\Controllers\Bside\CustomModule\CustomModuleController::class, 'del'])->name('custom_del');
... ...