作者 刘锟

Merge remote-tracking branch 'origin/master' into akun

... ... @@ -207,7 +207,7 @@ class SyncProject extends Command
$data = [
'project'=>[
'title' => $title,
'company' => $param['company_name'],
'company' => preg_replace('/【.*?】/', '', $param['company_name']),
'lead_name' => $param['principal_name'],
'mobile' => $param['principal_mobile'],
'mysql_id'=>Project::MYSQL_ID,
... ...
... ... @@ -3,14 +3,14 @@
namespace App\Console\Commands\Tdk;
use App\Http\Controllers\Bside\News\NewsController;
use App\Models\News\News;
use App\Models\News\NewsCategory;
use App\Helper\Arr;
use App\Models\Product\Keyword;
use App\Models\Project\Project;
use App\Models\Project\ProjectUpdateTdk;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Str;
/**
* 重跑异常tdk
... ... @@ -63,25 +63,57 @@ class RerunSeoTdk extends Command
}
/**
* 判断异常
* 判断seo_title后缀重复
* @author zbj
* @date 2025/4/12
*/
public function judgeAnomalies($project_id){
//获取当前项目的所有分类
$categories = NewsCategory::pluck('name', 'id')->toArray();
$seo_titles = Keyword::pluck('seo_title', 'id')->toArray();
//新闻 seo_keyword 和 分类名一样的
$news_ids = [];
foreach ($categories as $category){
$ids = News::WhereRaw("FIND_IN_SET('{$category}', `seo_keywords`)")->pluck('id')->toArray();
$news_ids = array_unique(array_merge($news_ids, $ids));
$ids = [];
//Suppliers, Manufacturer
foreach ($seo_titles as $id=>$seo_title){
if(!Str::contains($seo_title, ', ')){
continue;
}
$arr = explode(', ', $seo_title);
$suffix1 = $arr[1];
$arr = explode(' ', $arr[0]);
$suffix2 = Arr::last($arr);
if(Str::singular($suffix1) == Str::singular($suffix2)){
$ids[] = $id;
}
}
$count = count($news_ids);
$count = count($ids);
if($count){
echo "项目{$project_id},共{$count}条需要重跑";
News::whereIn('id', $news_ids)->update(['seo_keywords' => '']);
Keyword::whereIn('id', $ids)->update(['seo_title' => '']);
ProjectUpdateTdk::add_task($project_id);
}
}
// /**
// * 判断异常
// * @author zbj
// * @date 2025/4/12
// */
// public function judgeAnomalies($project_id){
// //获取当前项目的所有分类
// $categories = NewsCategory::pluck('name', 'id')->toArray();
// //新闻 seo_keyword 和 分类名一样的
// $news_ids = [];
// foreach ($categories as $category){
// $ids = News::WhereRaw("FIND_IN_SET('{$category}', `seo_keywords`)")->pluck('id')->toArray();
// $news_ids = array_unique(array_merge($news_ids, $ids));
// }
//
// $count = count($news_ids);
// if($count){
// echo "项目{$project_id},共{$count}条需要重跑";
// News::whereIn('id', $news_ids)->update(['seo_keywords' => '']);
// ProjectUpdateTdk::add_task($project_id);
// }
// }
}
... ...
... ... @@ -383,9 +383,16 @@ class UpdateSeoTdk extends Command
$prefix = $this->getPrefixKeyword($project_id, 'prefix', 1, $title);
//in,for,with,to,near,from 这些介词 只拼前缀,不拼后缀
$suffix_ban = ['in ', 'for ', 'with ', 'to ', 'near ','from ', 'In ', 'For ', 'With ', 'To ', 'Near ','From '];
$is_contains_jieci = false;
$words = explode(' ', $title);
foreach ($words as $word){
$word = Str::replace([',', '!', '?'], '', $word);
if(in_array(strtolower($word), ['in', 'for', 'with', 'to', 'near','from'])){
$is_contains_jieci = true;
}
}
$suffix = '';
if(!Str::contains($title, $suffix_ban)){
if(!$is_contains_jieci){
// 某些后缀不能并存的情况
$ban_suffix = [];
//services/service 结尾的词,后缀不拼manufacturer,factory
... ... @@ -590,11 +597,8 @@ class UpdateSeoTdk extends Command
// 前后缀如果已经存在, 就不在拼接当前类型
if (FALSE !== strpos($topic, $keyword))
return $str;
//处理单词复数 s es ies ves
$keyword = rtrim($keyword, 'ves');
$keyword = rtrim($keyword, 'ies');
$keyword = rtrim($keyword, 'es');
$keyword = rtrim($keyword, 's');
//复数转单数
$keyword = Str::singular($keyword);
$topic_words = explode(" ", $topic);
if($type == 'prefix' && Str::startsWith($topic_words[0], $keyword)){
... ... @@ -605,21 +609,27 @@ class UpdateSeoTdk extends Command
}
}
}
//随机取
//随机取 并单复数去重
shuffle($fix_keyword);
if (count($fix_keyword) < $num)
return implode(", ", $fix_keyword);
$keyword = array_slice($fix_keyword, 0, $num);
$str = implode(", ", $keyword);
// 前后缀内部去重
foreach ($keyword as $k=>$v){
$tmp = rtrim($v, 's');
if (substr_count($str, $tmp) > 1) {
unset($keyword[$k]);
$str = implode(", ", $keyword);
$keywords = [];
foreach ($fix_keyword as $v){
if($num == 0){
break;
}
$is_repeat = false;
foreach ($keywords as $keyword){
if(Str::singular($keyword) == Str::singular($v)){
$is_repeat = true;
break;
}
}
if($is_repeat){
continue;
}
$keywords[] = $v;
$num--;
}
$str = implode(', ', $keywords);
}
return $str;
}
... ...
... ... @@ -439,10 +439,7 @@ class ProjectLogic extends BaseLogic
}
$param['upload_config'] = json_encode($param['upload_config'] ?? []);
$param['web_traffic_config'] = json_encode($param['web_traffic_config'] ?? []);
$robots = $this->model->read(['id'=>$param['id']],['robots'])['robots'];
if($robots == Project::TYPE_ONE){//开启
$param['robots'] = Project::TYPE_ONE;
}
unset($param['robots']);//项目不保存robots
$this->model->edit($param,['id'=>$param['id']]);
Common::del_user_cache($this->model->getTable(),$param['id']);
return $this->success();
... ...
... ... @@ -108,6 +108,17 @@ class SyncSubmitTaskService
if ($tran_visit && $tran_visit !='en' && ($checkIpCountry['country'] == '美国' || $tran_country != $checkIpCountry['country'])) {
throw new InquiryFilterException( '判定为蜘蛛爬取小语种');
}
//2024-04-14 关杰: https://www.vogi-dcs.com/ 项目 没有访问来源的,且ua是一下两个,跳过
//Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36
//Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36
$ua = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
];
if($project['id'] == 3216 && in_array($data['user_agent'], $ua)){
throw new InquiryFilterException( '判定为蜘蛛爬取小语种');
}
}
//域名 过滤国家或ip
$domain_info = DomainInfo::getCacheInfoByProjectId($project['id']);
... ...