作者 刘锟

Merge remote-tracking branch 'origin/master' into akun

@@ -207,7 +207,7 @@ class SyncProject extends Command @@ -207,7 +207,7 @@ class SyncProject extends Command
207 $data = [ 207 $data = [
208 'project'=>[ 208 'project'=>[
209 'title' => $title, 209 'title' => $title,
210 - 'company' => $param['company_name'], 210 + 'company' => preg_replace('/【.*?】/', '', $param['company_name']),
211 'lead_name' => $param['principal_name'], 211 'lead_name' => $param['principal_name'],
212 'mobile' => $param['principal_mobile'], 212 'mobile' => $param['principal_mobile'],
213 'mysql_id'=>Project::MYSQL_ID, 213 'mysql_id'=>Project::MYSQL_ID,
@@ -3,14 +3,14 @@ @@ -3,14 +3,14 @@
3 namespace App\Console\Commands\Tdk; 3 namespace App\Console\Commands\Tdk;
4 4
5 5
6 -use App\Http\Controllers\Bside\News\NewsController;  
7 -use App\Models\News\News;  
8 -use App\Models\News\NewsCategory; 6 +use App\Helper\Arr;
  7 +use App\Models\Product\Keyword;
9 use App\Models\Project\Project; 8 use App\Models\Project\Project;
10 use App\Models\Project\ProjectUpdateTdk; 9 use App\Models\Project\ProjectUpdateTdk;
11 use App\Services\ProjectServer; 10 use App\Services\ProjectServer;
12 use Illuminate\Console\Command; 11 use Illuminate\Console\Command;
13 use Illuminate\Support\Facades\DB; 12 use Illuminate\Support\Facades\DB;
  13 +use Illuminate\Support\Str;
14 14
15 /** 15 /**
16 * 重跑异常tdk 16 * 重跑异常tdk
@@ -63,25 +63,57 @@ class RerunSeoTdk extends Command @@ -63,25 +63,57 @@ class RerunSeoTdk extends Command
63 } 63 }
64 64
65 /** 65 /**
66 - * 判断异常 66 + * 判断seo_title后缀重复
67 * @author zbj 67 * @author zbj
68 * @date 2025/4/12 68 * @date 2025/4/12
69 */ 69 */
70 public function judgeAnomalies($project_id){ 70 public function judgeAnomalies($project_id){
71 //获取当前项目的所有分类 71 //获取当前项目的所有分类
72 - $categories = NewsCategory::pluck('name', 'id')->toArray(); 72 + $seo_titles = Keyword::pluck('seo_title', 'id')->toArray();
73 //新闻 seo_keyword 和 分类名一样的 73 //新闻 seo_keyword 和 分类名一样的
74 - $news_ids = [];  
75 - foreach ($categories as $category){  
76 - $ids = News::WhereRaw("FIND_IN_SET('{$category}', `seo_keywords`)")->pluck('id')->toArray();  
77 - $news_ids = array_unique(array_merge($news_ids, $ids)); 74 + $ids = [];
  75 + //Suppliers, Manufacturer
  76 + foreach ($seo_titles as $id=>$seo_title){
  77 + if(!Str::contains($seo_title, ', ')){
  78 + continue;
  79 + }
  80 + $arr = explode(', ', $seo_title);
  81 + $suffix1 = $arr[1];
  82 + $arr = explode(' ', $arr[0]);
  83 + $suffix2 = Arr::last($arr);
  84 + if(Str::singular($suffix1) == Str::singular($suffix2)){
  85 + $ids[] = $id;
  86 + }
78 } 87 }
79 88
80 - $count = count($news_ids); 89 + $count = count($ids);
81 if($count){ 90 if($count){
82 echo "项目{$project_id},共{$count}条需要重跑"; 91 echo "项目{$project_id},共{$count}条需要重跑";
83 - News::whereIn('id', $news_ids)->update(['seo_keywords' => '']); 92 + Keyword::whereIn('id', $ids)->update(['seo_title' => '']);
84 ProjectUpdateTdk::add_task($project_id); 93 ProjectUpdateTdk::add_task($project_id);
85 } 94 }
86 } 95 }
  96 +
  97 +// /**
  98 +// * 判断异常
  99 +// * @author zbj
  100 +// * @date 2025/4/12
  101 +// */
  102 +// public function judgeAnomalies($project_id){
  103 +// //获取当前项目的所有分类
  104 +// $categories = NewsCategory::pluck('name', 'id')->toArray();
  105 +// //新闻 seo_keyword 和 分类名一样的
  106 +// $news_ids = [];
  107 +// foreach ($categories as $category){
  108 +// $ids = News::WhereRaw("FIND_IN_SET('{$category}', `seo_keywords`)")->pluck('id')->toArray();
  109 +// $news_ids = array_unique(array_merge($news_ids, $ids));
  110 +// }
  111 +//
  112 +// $count = count($news_ids);
  113 +// if($count){
  114 +// echo "项目{$project_id},共{$count}条需要重跑";
  115 +// News::whereIn('id', $news_ids)->update(['seo_keywords' => '']);
  116 +// ProjectUpdateTdk::add_task($project_id);
  117 +// }
  118 +// }
87 } 119 }
@@ -383,9 +383,16 @@ class UpdateSeoTdk extends Command @@ -383,9 +383,16 @@ class UpdateSeoTdk extends Command
383 383
384 $prefix = $this->getPrefixKeyword($project_id, 'prefix', 1, $title); 384 $prefix = $this->getPrefixKeyword($project_id, 'prefix', 1, $title);
385 //in,for,with,to,near,from 这些介词 只拼前缀,不拼后缀 385 //in,for,with,to,near,from 这些介词 只拼前缀,不拼后缀
386 - $suffix_ban = ['in ', 'for ', 'with ', 'to ', 'near ','from ', 'In ', 'For ', 'With ', 'To ', 'Near ','From ']; 386 + $is_contains_jieci = false;
  387 + $words = explode(' ', $title);
  388 + foreach ($words as $word){
  389 + $word = Str::replace([',', '!', '?'], '', $word);
  390 + if(in_array(strtolower($word), ['in', 'for', 'with', 'to', 'near','from'])){
  391 + $is_contains_jieci = true;
  392 + }
  393 + }
387 $suffix = ''; 394 $suffix = '';
388 - if(!Str::contains($title, $suffix_ban)){ 395 + if(!$is_contains_jieci){
389 // 某些后缀不能并存的情况 396 // 某些后缀不能并存的情况
390 $ban_suffix = []; 397 $ban_suffix = [];
391 //services/service 结尾的词,后缀不拼manufacturer,factory 398 //services/service 结尾的词,后缀不拼manufacturer,factory
@@ -590,11 +597,8 @@ class UpdateSeoTdk extends Command @@ -590,11 +597,8 @@ class UpdateSeoTdk extends Command
590 // 前后缀如果已经存在, 就不在拼接当前类型 597 // 前后缀如果已经存在, 就不在拼接当前类型
591 if (FALSE !== strpos($topic, $keyword)) 598 if (FALSE !== strpos($topic, $keyword))
592 return $str; 599 return $str;
593 - //处理单词复数 s es ies ves  
594 - $keyword = rtrim($keyword, 'ves');  
595 - $keyword = rtrim($keyword, 'ies');  
596 - $keyword = rtrim($keyword, 'es');  
597 - $keyword = rtrim($keyword, 's'); 600 + //复数转单数
  601 + $keyword = Str::singular($keyword);
598 602
599 $topic_words = explode(" ", $topic); 603 $topic_words = explode(" ", $topic);
600 if($type == 'prefix' && Str::startsWith($topic_words[0], $keyword)){ 604 if($type == 'prefix' && Str::startsWith($topic_words[0], $keyword)){
@@ -605,21 +609,27 @@ class UpdateSeoTdk extends Command @@ -605,21 +609,27 @@ class UpdateSeoTdk extends Command
605 } 609 }
606 } 610 }
607 } 611 }
608 - //随机取 612 + //随机取 并单复数去重
609 shuffle($fix_keyword); 613 shuffle($fix_keyword);
610 - if (count($fix_keyword) < $num)  
611 - return implode(", ", $fix_keyword);  
612 - $keyword = array_slice($fix_keyword, 0, $num);  
613 - $str = implode(", ", $keyword);  
614 -  
615 - // 前后缀内部去重  
616 - foreach ($keyword as $k=>$v){  
617 - $tmp = rtrim($v, 's');  
618 - if (substr_count($str, $tmp) > 1) {  
619 - unset($keyword[$k]);  
620 - $str = implode(", ", $keyword); 614 + $keywords = [];
  615 + foreach ($fix_keyword as $v){
  616 + if($num == 0){
  617 + break;
  618 + }
  619 + $is_repeat = false;
  620 + foreach ($keywords as $keyword){
  621 + if(Str::singular($keyword) == Str::singular($v)){
  622 + $is_repeat = true;
  623 + break;
  624 + }
  625 + }
  626 + if($is_repeat){
  627 + continue;
621 } 628 }
  629 + $keywords[] = $v;
  630 + $num--;
622 } 631 }
  632 + $str = implode(', ', $keywords);
623 } 633 }
624 return $str; 634 return $str;
625 } 635 }
@@ -439,10 +439,7 @@ class ProjectLogic extends BaseLogic @@ -439,10 +439,7 @@ class ProjectLogic extends BaseLogic
439 } 439 }
440 $param['upload_config'] = json_encode($param['upload_config'] ?? []); 440 $param['upload_config'] = json_encode($param['upload_config'] ?? []);
441 $param['web_traffic_config'] = json_encode($param['web_traffic_config'] ?? []); 441 $param['web_traffic_config'] = json_encode($param['web_traffic_config'] ?? []);
442 - $robots = $this->model->read(['id'=>$param['id']],['robots'])['robots'];  
443 - if($robots == Project::TYPE_ONE){//开启  
444 - $param['robots'] = Project::TYPE_ONE;  
445 - } 442 + unset($param['robots']);//项目不保存robots
446 $this->model->edit($param,['id'=>$param['id']]); 443 $this->model->edit($param,['id'=>$param['id']]);
447 Common::del_user_cache($this->model->getTable(),$param['id']); 444 Common::del_user_cache($this->model->getTable(),$param['id']);
448 return $this->success(); 445 return $this->success();
@@ -108,6 +108,17 @@ class SyncSubmitTaskService @@ -108,6 +108,17 @@ class SyncSubmitTaskService
108 if ($tran_visit && $tran_visit !='en' && ($checkIpCountry['country'] == '美国' || $tran_country != $checkIpCountry['country'])) { 108 if ($tran_visit && $tran_visit !='en' && ($checkIpCountry['country'] == '美国' || $tran_country != $checkIpCountry['country'])) {
109 throw new InquiryFilterException( '判定为蜘蛛爬取小语种'); 109 throw new InquiryFilterException( '判定为蜘蛛爬取小语种');
110 } 110 }
  111 +
  112 + //2024-04-14 关杰: https://www.vogi-dcs.com/ 项目 没有访问来源的,且ua是一下两个,跳过
  113 + //Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36
  114 + //Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36
  115 + $ua = [
  116 + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
  117 + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
  118 + ];
  119 + if($project['id'] == 3216 && in_array($data['user_agent'], $ua)){
  120 + throw new InquiryFilterException( '判定为蜘蛛爬取小语种');
  121 + }
111 } 122 }
112 //域名 过滤国家或ip 123 //域名 过滤国家或ip
113 $domain_info = DomainInfo::getCacheInfoByProjectId($project['id']); 124 $domain_info = DomainInfo::getCacheInfoByProjectId($project['id']);