作者 刘锟

项目升级小语种采集

... ... @@ -5,6 +5,7 @@ namespace App\Console\Commands\Update;
use App\Models\Collect\CollectSource;
use App\Models\Collect\CollectTask;
use App\Models\Com\UpdateLog;
use App\Models\RouteMap\RouteMap;
use App\Services\CosService;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
... ... @@ -37,9 +38,9 @@ class HtmlCollect extends Command
public function handle()
{
// while (true) {
while (true) {
$this->start_collect();
// }
}
}
protected function start_collect()
... ... @@ -114,11 +115,29 @@ class HtmlCollect extends Command
return false;
}
switch ($update_log->api_type) {
case 'page':
$source = RouteMap::SOURCE_PAGE;
break;
case 'news':
$source = RouteMap::SOURCE_NEWS;
break;
case 'blog':
$source = RouteMap::SOURCE_BLOG;
break;
case 'tag':
$source = RouteMap::SOURCE_PRODUCT_KEYWORD;
break;
default:
$source = RouteMap::SOURCE_PRODUCT;
break;
}
$complete = false;
//设置数据库
$project = ProjectServer::useProject($update_log->project_id);
if ($project) {
$collect_list = CollectTask::select(['id', 'project_id'])->where('project_id', $update_log['project_id'])->where('status', CollectTask::STATUS_UN)->limit(50)->get();
$collect_list = CollectTask::select(['id', 'project_id'])->where('project_id', $update_log['project_id'])->where('source', $source)->where('status', CollectTask::STATUS_UN)->orderBy('language', 'asc')->limit(50)->get();
if ($collect_list->count() == 0) {
$complete = true;
... ...
... ... @@ -76,6 +76,22 @@ class ProjectUpdate extends Command
$task->status = UpdateLog::STATUS_ING;//同步中
$task->save();
$domain_arr = parse_url($api_url);
//获取网站配置
$link_type = 0;
$url_web_config = 'https://' . $domain_arr['host'] . '/wp-content/cache/user_config.text';
$data_config = http_get($url_web_config, ['charset' => 'UTF-8']);
if ($data_config) {
$link_type = $data_config['link_type'];
}
//获取所有语种
$language_list = [];
$url_language = 'https://' . $domain_arr['host'] . '/wp-content/plugins/proofreading/json/user_language.json';
$data_language = http_get($url_language, ['charset' => 'UTF-8']);
if ($data_language) {
$language_list = array_column($data_language, 'short');
}
//设置数据库
$project = ProjectServer::useProject($project_id);
if ($project) {
... ... @@ -126,7 +142,7 @@ class ProjectUpdate extends Command
$id = $keyword['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT_KEYWORD, $id);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT_KEYWORD, $id, $link_type, $language_list);
}
}
}
... ... @@ -258,7 +274,7 @@ class ProjectUpdate extends Command
$id = $product['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT, $id);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PRODUCT, $id, $link_type, $language_list);
}
}
}
... ... @@ -312,7 +328,7 @@ class ProjectUpdate extends Command
$id = $news['id'];
}
CollectTask::_insert($item['url'], $project_id, $api_type == 'news' ? RouteMap::SOURCE_NEWS : RouteMap::SOURCE_BLOG, $id);
CollectTask::_insert($item['url'], $project_id, $api_type == 'news' ? RouteMap::SOURCE_NEWS : RouteMap::SOURCE_BLOG, $id, $link_type, $language_list);
}
}
}
... ... @@ -360,7 +376,7 @@ class ProjectUpdate extends Command
$id = $custom['id'];
}
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PAGE, $id);
CollectTask::_insert($item['url'], $project_id, RouteMap::SOURCE_PAGE, $id, $link_type, $language_list);
}
}
}
... ...
... ... @@ -84,7 +84,7 @@ if (!function_exists('http_get')) {
/**
* 发送http get请求
* @param type $url
* @return type
* @return []
*/
function http_get($url, $header = [])
{
... ...
... ... @@ -14,27 +14,66 @@ class CollectTask extends Base
const STATUS_UN = 0;
const STATUS_ING = 1;
const STATUS_COM= 2;
const STATUS_COM = 2;
public static function _insert($url, $project_id, $source, $source_id)
public static function _insert($url, $project_id, $source, $source_id, $link_type = 0, $language_list = [])
{
if(!$url){
if (!$url) {
return;
}
$url_arr = parse_url($url);
$data = [
$where = [
'project_id' => $project_id,
'source' => $source,
'source_id' => $source_id,
'domain' => $url_arr['host'],
'route' => $url_arr['path']
'route' => $url_arr['path'],
'language' => ''
];
$task = self::where($data)->first();
if(!$task){
$data['created_at'] = $data['updated_at'] = date('Y-m-d H:i:s');
$task = self::where($where)->first();
if (!$task) {
$now = date('Y-m-d H:i:s');
$data = [
[
'project_id' => $project_id,
'source' => $source,
'source_id' => $source_id,
'domain' => $url_arr['host'],
'route' => $url_arr['path'],
'language' => '',
'created_at' => $now,
'updated_at' => $now,
]
];
if ($link_type > 0 && $language_list) {
$domain_arr = explode(',', $url_arr['host']);
foreach ($language_list as $v_lan) {
if ($link_type == 1) {
//二级域名
$domain_arr[0] = $v_lan;
$new_domain = implode('.', $domain_arr);
} else {
//二级目录
$new_domain = $url_arr['host'] . '/' . $v_lan;
}
$data[] = [
'project_id' => $project_id,
'source' => $source,
'source_id' => $source_id,
'domain' => $new_domain,
'route' => $url_arr['path'],
'language' => $v_lan,
'created_at' => $now,
'updated_at' => $now,
];
}
}
self::insert($data);
}
}
... ...