Merge remote-tracking branch 'origin/master' into akun
正在显示
5 个修改的文件
包含
85 行增加
和
35 行删除
| @@ -207,7 +207,7 @@ class SyncProject extends Command | @@ -207,7 +207,7 @@ class SyncProject extends Command | ||
| 207 | $data = [ | 207 | $data = [ |
| 208 | 'project'=>[ | 208 | 'project'=>[ |
| 209 | 'title' => $title, | 209 | 'title' => $title, |
| 210 | - 'company' => $param['company_name'], | 210 | + 'company' => preg_replace('/【.*?】/', '', $param['company_name']), |
| 211 | 'lead_name' => $param['principal_name'], | 211 | 'lead_name' => $param['principal_name'], |
| 212 | 'mobile' => $param['principal_mobile'], | 212 | 'mobile' => $param['principal_mobile'], |
| 213 | 'mysql_id'=>Project::MYSQL_ID, | 213 | 'mysql_id'=>Project::MYSQL_ID, |
| @@ -3,14 +3,14 @@ | @@ -3,14 +3,14 @@ | ||
| 3 | namespace App\Console\Commands\Tdk; | 3 | namespace App\Console\Commands\Tdk; |
| 4 | 4 | ||
| 5 | 5 | ||
| 6 | -use App\Http\Controllers\Bside\News\NewsController; | ||
| 7 | -use App\Models\News\News; | ||
| 8 | -use App\Models\News\NewsCategory; | 6 | +use App\Helper\Arr; |
| 7 | +use App\Models\Product\Keyword; | ||
| 9 | use App\Models\Project\Project; | 8 | use App\Models\Project\Project; |
| 10 | use App\Models\Project\ProjectUpdateTdk; | 9 | use App\Models\Project\ProjectUpdateTdk; |
| 11 | use App\Services\ProjectServer; | 10 | use App\Services\ProjectServer; |
| 12 | use Illuminate\Console\Command; | 11 | use Illuminate\Console\Command; |
| 13 | use Illuminate\Support\Facades\DB; | 12 | use Illuminate\Support\Facades\DB; |
| 13 | +use Illuminate\Support\Str; | ||
| 14 | 14 | ||
| 15 | /** | 15 | /** |
| 16 | * 重跑异常tdk | 16 | * 重跑异常tdk |
| @@ -63,25 +63,57 @@ class RerunSeoTdk extends Command | @@ -63,25 +63,57 @@ class RerunSeoTdk extends Command | ||
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | /** | 65 | /** |
| 66 | - * 判断异常 | 66 | + * 判断seo_title后缀重复 |
| 67 | * @author zbj | 67 | * @author zbj |
| 68 | * @date 2025/4/12 | 68 | * @date 2025/4/12 |
| 69 | */ | 69 | */ |
| 70 | public function judgeAnomalies($project_id){ | 70 | public function judgeAnomalies($project_id){ |
| 71 | //获取当前项目的所有分类 | 71 | //获取当前项目的所有分类 |
| 72 | - $categories = NewsCategory::pluck('name', 'id')->toArray(); | 72 | + $seo_titles = Keyword::pluck('seo_title', 'id')->toArray(); |
| 73 | //新闻 seo_keyword 和 分类名一样的 | 73 | //新闻 seo_keyword 和 分类名一样的 |
| 74 | - $news_ids = []; | ||
| 75 | - foreach ($categories as $category){ | ||
| 76 | - $ids = News::WhereRaw("FIND_IN_SET('{$category}', `seo_keywords`)")->pluck('id')->toArray(); | ||
| 77 | - $news_ids = array_unique(array_merge($news_ids, $ids)); | 74 | + $ids = []; |
| 75 | + //Suppliers, Manufacturer | ||
| 76 | + foreach ($seo_titles as $id=>$seo_title){ | ||
| 77 | + if(!Str::contains($seo_title, ', ')){ | ||
| 78 | + continue; | ||
| 79 | + } | ||
| 80 | + $arr = explode(', ', $seo_title); | ||
| 81 | + $suffix1 = $arr[1]; | ||
| 82 | + $arr = explode(' ', $arr[0]); | ||
| 83 | + $suffix2 = Arr::last($arr); | ||
| 84 | + if(Str::singular($suffix1) == Str::singular($suffix2)){ | ||
| 85 | + $ids[] = $id; | ||
| 86 | + } | ||
| 78 | } | 87 | } |
| 79 | 88 | ||
| 80 | - $count = count($news_ids); | 89 | + $count = count($ids); |
| 81 | if($count){ | 90 | if($count){ |
| 82 | echo "项目{$project_id},共{$count}条需要重跑"; | 91 | echo "项目{$project_id},共{$count}条需要重跑"; |
| 83 | - News::whereIn('id', $news_ids)->update(['seo_keywords' => '']); | 92 | + Keyword::whereIn('id', $ids)->update(['seo_title' => '']); |
| 84 | ProjectUpdateTdk::add_task($project_id); | 93 | ProjectUpdateTdk::add_task($project_id); |
| 85 | } | 94 | } |
| 86 | } | 95 | } |
| 96 | + | ||
| 97 | +// /** | ||
| 98 | +// * 判断异常 | ||
| 99 | +// * @author zbj | ||
| 100 | +// * @date 2025/4/12 | ||
| 101 | +// */ | ||
| 102 | +// public function judgeAnomalies($project_id){ | ||
| 103 | +// //获取当前项目的所有分类 | ||
| 104 | +// $categories = NewsCategory::pluck('name', 'id')->toArray(); | ||
| 105 | +// //新闻 seo_keyword 和 分类名一样的 | ||
| 106 | +// $news_ids = []; | ||
| 107 | +// foreach ($categories as $category){ | ||
| 108 | +// $ids = News::WhereRaw("FIND_IN_SET('{$category}', `seo_keywords`)")->pluck('id')->toArray(); | ||
| 109 | +// $news_ids = array_unique(array_merge($news_ids, $ids)); | ||
| 110 | +// } | ||
| 111 | +// | ||
| 112 | +// $count = count($news_ids); | ||
| 113 | +// if($count){ | ||
| 114 | +// echo "项目{$project_id},共{$count}条需要重跑"; | ||
| 115 | +// News::whereIn('id', $news_ids)->update(['seo_keywords' => '']); | ||
| 116 | +// ProjectUpdateTdk::add_task($project_id); | ||
| 117 | +// } | ||
| 118 | +// } | ||
| 87 | } | 119 | } |
| @@ -383,9 +383,16 @@ class UpdateSeoTdk extends Command | @@ -383,9 +383,16 @@ class UpdateSeoTdk extends Command | ||
| 383 | 383 | ||
| 384 | $prefix = $this->getPrefixKeyword($project_id, 'prefix', 1, $title); | 384 | $prefix = $this->getPrefixKeyword($project_id, 'prefix', 1, $title); |
| 385 | //in,for,with,to,near,from 这些介词 只拼前缀,不拼后缀 | 385 | //in,for,with,to,near,from 这些介词 只拼前缀,不拼后缀 |
| 386 | - $suffix_ban = ['in ', 'for ', 'with ', 'to ', 'near ','from ', 'In ', 'For ', 'With ', 'To ', 'Near ','From ']; | 386 | + $is_contains_jieci = false; |
| 387 | + $words = explode(' ', $title); | ||
| 388 | + foreach ($words as $word){ | ||
| 389 | + $word = Str::replace([',', '!', '?'], '', $word); | ||
| 390 | + if(in_array(strtolower($word), ['in', 'for', 'with', 'to', 'near','from'])){ | ||
| 391 | + $is_contains_jieci = true; | ||
| 392 | + } | ||
| 393 | + } | ||
| 387 | $suffix = ''; | 394 | $suffix = ''; |
| 388 | - if(!Str::contains($title, $suffix_ban)){ | 395 | + if(!$is_contains_jieci){ |
| 389 | // 某些后缀不能并存的情况 | 396 | // 某些后缀不能并存的情况 |
| 390 | $ban_suffix = []; | 397 | $ban_suffix = []; |
| 391 | //services/service 结尾的词,后缀不拼manufacturer,factory | 398 | //services/service 结尾的词,后缀不拼manufacturer,factory |
| @@ -590,11 +597,8 @@ class UpdateSeoTdk extends Command | @@ -590,11 +597,8 @@ class UpdateSeoTdk extends Command | ||
| 590 | // 前后缀如果已经存在, 就不在拼接当前类型 | 597 | // 前后缀如果已经存在, 就不在拼接当前类型 |
| 591 | if (FALSE !== strpos($topic, $keyword)) | 598 | if (FALSE !== strpos($topic, $keyword)) |
| 592 | return $str; | 599 | return $str; |
| 593 | - //处理单词复数 s es ies ves | ||
| 594 | - $keyword = rtrim($keyword, 'ves'); | ||
| 595 | - $keyword = rtrim($keyword, 'ies'); | ||
| 596 | - $keyword = rtrim($keyword, 'es'); | ||
| 597 | - $keyword = rtrim($keyword, 's'); | 600 | + //复数转单数 |
| 601 | + $keyword = Str::singular($keyword); | ||
| 598 | 602 | ||
| 599 | $topic_words = explode(" ", $topic); | 603 | $topic_words = explode(" ", $topic); |
| 600 | if($type == 'prefix' && Str::startsWith($topic_words[0], $keyword)){ | 604 | if($type == 'prefix' && Str::startsWith($topic_words[0], $keyword)){ |
| @@ -605,21 +609,27 @@ class UpdateSeoTdk extends Command | @@ -605,21 +609,27 @@ class UpdateSeoTdk extends Command | ||
| 605 | } | 609 | } |
| 606 | } | 610 | } |
| 607 | } | 611 | } |
| 608 | - //随机取 | 612 | + //随机取 并单复数去重 |
| 609 | shuffle($fix_keyword); | 613 | shuffle($fix_keyword); |
| 610 | - if (count($fix_keyword) < $num) | ||
| 611 | - return implode(", ", $fix_keyword); | ||
| 612 | - $keyword = array_slice($fix_keyword, 0, $num); | ||
| 613 | - $str = implode(", ", $keyword); | ||
| 614 | - | ||
| 615 | - // 前后缀内部去重 | ||
| 616 | - foreach ($keyword as $k=>$v){ | ||
| 617 | - $tmp = rtrim($v, 's'); | ||
| 618 | - if (substr_count($str, $tmp) > 1) { | ||
| 619 | - unset($keyword[$k]); | ||
| 620 | - $str = implode(", ", $keyword); | 614 | + $keywords = []; |
| 615 | + foreach ($fix_keyword as $v){ | ||
| 616 | + if($num == 0){ | ||
| 617 | + break; | ||
| 618 | + } | ||
| 619 | + $is_repeat = false; | ||
| 620 | + foreach ($keywords as $keyword){ | ||
| 621 | + if(Str::singular($keyword) == Str::singular($v)){ | ||
| 622 | + $is_repeat = true; | ||
| 623 | + break; | ||
| 624 | + } | ||
| 625 | + } | ||
| 626 | + if($is_repeat){ | ||
| 627 | + continue; | ||
| 621 | } | 628 | } |
| 629 | + $keywords[] = $v; | ||
| 630 | + $num--; | ||
| 622 | } | 631 | } |
| 632 | + $str = implode(', ', $keywords); | ||
| 623 | } | 633 | } |
| 624 | return $str; | 634 | return $str; |
| 625 | } | 635 | } |
| @@ -439,10 +439,7 @@ class ProjectLogic extends BaseLogic | @@ -439,10 +439,7 @@ class ProjectLogic extends BaseLogic | ||
| 439 | } | 439 | } |
| 440 | $param['upload_config'] = json_encode($param['upload_config'] ?? []); | 440 | $param['upload_config'] = json_encode($param['upload_config'] ?? []); |
| 441 | $param['web_traffic_config'] = json_encode($param['web_traffic_config'] ?? []); | 441 | $param['web_traffic_config'] = json_encode($param['web_traffic_config'] ?? []); |
| 442 | - $robots = $this->model->read(['id'=>$param['id']],['robots'])['robots']; | ||
| 443 | - if($robots == Project::TYPE_ONE){//开启 | ||
| 444 | - $param['robots'] = Project::TYPE_ONE; | ||
| 445 | - } | 442 | + unset($param['robots']);//项目不保存robots |
| 446 | $this->model->edit($param,['id'=>$param['id']]); | 443 | $this->model->edit($param,['id'=>$param['id']]); |
| 447 | Common::del_user_cache($this->model->getTable(),$param['id']); | 444 | Common::del_user_cache($this->model->getTable(),$param['id']); |
| 448 | return $this->success(); | 445 | return $this->success(); |
| @@ -108,6 +108,17 @@ class SyncSubmitTaskService | @@ -108,6 +108,17 @@ class SyncSubmitTaskService | ||
| 108 | if ($tran_visit && $tran_visit !='en' && ($checkIpCountry['country'] == '美国' || $tran_country != $checkIpCountry['country'])) { | 108 | if ($tran_visit && $tran_visit !='en' && ($checkIpCountry['country'] == '美国' || $tran_country != $checkIpCountry['country'])) { |
| 109 | throw new InquiryFilterException( '判定为蜘蛛爬取小语种'); | 109 | throw new InquiryFilterException( '判定为蜘蛛爬取小语种'); |
| 110 | } | 110 | } |
| 111 | + | ||
| 112 | + //2024-04-14 关杰: https://www.vogi-dcs.com/ 项目 没有访问来源的,且ua是一下两个,跳过 | ||
| 113 | + //Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 | ||
| 114 | + //Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 | ||
| 115 | + $ua = [ | ||
| 116 | + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', | ||
| 117 | + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' | ||
| 118 | + ]; | ||
| 119 | + if($project['id'] == 3216 && in_array($data['user_agent'], $ua)){ | ||
| 120 | + throw new InquiryFilterException( '判定为蜘蛛爬取小语种'); | ||
| 121 | + } | ||
| 111 | } | 122 | } |
| 112 | //域名 过滤国家或ip | 123 | //域名 过滤国家或ip |
| 113 | $domain_info = DomainInfo::getCacheInfoByProjectId($project['id']); | 124 | $domain_info = DomainInfo::getCacheInfoByProjectId($project['id']); |
-
请 注册 或 登录 后发表评论