RelayInquiryForward.php 12.5 KB
<?php
/**
 * Created by PhpStorm.
 * User: akun
 * Date: 2025/02/24
 * Time: 14:14
 */

namespace App\Console\Commands\Inquiry;

use App\Helper\Translate;
use App\Models\Inquiry\InquiryRelayDetail;
use App\Models\Inquiry\InquiryRelayDetailLog;
use Illuminate\Console\Command;
use Illuminate\Support\Arr;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Str;

/**
 * Class RelayInquiry
 * @package App\Console\Commands\Inquiry
 */
class RelayInquiryForward extends Command
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
    protected $signature = 'relay_inquiry_forward';

    /**
     * The console command description.
     *
     * @var string
     */
    protected $description = '转发询盘:拆分转发数据';

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
    }

    /**
     * 模拟访问来源占比
     * @var array
     */
    protected $lyzb = [
        'https://www.google.com/' => 630,
        'http://www.google.com/' => 30,
        'http://www.bing.com/' => 20,
        'https://www.bing.com/' => 5,
        'https://www.youtube.com/' => 5,
        'https://search.yahoo.com/' => 5,
        'https://www.facebook.com/' => 5,
    ];

    /**
     * 俄语站 模拟访问来源占比
     * @var array
     */
    protected $eylyzb = [
        'https://www.yandex.com/' => 630,
        'https://www.google.com/' => 30,
        'http://www.google.com/' => 30,
        'http://www.bing.com/' => 20,
        'https://www.bing.com/' => 5,
        'https://www.youtube.com/' => 5,
        'https://search.yahoo.com/' => 5,
        'https://www.facebook.com/' => 5,
    ];

    /**
     * PC端访问头信息
     * @var array
     */
    protected $pc_ua = [
        0 => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
        1 => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
        2 => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1'
    ];

    /**
     * 移动端访问头信息
     * @var array
     */
    protected $mobile_ua = [
        0 => 'Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko; googleweblight) Chrome/38.0.1025.166 Mobile Safari/535.19',
    ];

    /**
     * google域名后缀
     * @var string[]
     */
    protected $suffix = [
        'co.jp' => '日本',
        'com.tr' => '土耳其',
        'nl' => '荷兰',
        'ru' => '俄罗斯',
        'fr' => '法国',
        'co.kr' => '韩国',
        'fi' => '芬兰',
        'be' => '比利时',
        'lt' => '立陶宛',
        'es' => '西班牙',
        'it' => '意大利',
        'com.au' => '澳大利亚',
        'no' => '挪威',
        'al' => '阿尔巴尼亚',
        'pt' => '葡萄牙',
        'lv' => '拉脱维亚',
        'hu' => '匈牙利',
        'cz' => '捷克',
        'de' => '德国',
        'ca' => '加拿大',
        'co.in' => '印度',
        'co.uk' => '英国',
        'com.vn' => '越南',
        'com.br' => '巴西',
        'co.il' => '以色列',
        'pl' => '波兰',
        'com.eg' => '埃及',
        'co.th' => '泰国',
        'sk' => '斯洛伐克',
        'ro' => '罗马尼亚',
        'com.mx' => '墨西哥',
        'com.my' => '马来西亚',
        'com.pk' => '巴基斯坦',
        'co.nz' => '新西兰',
        'co.za' => '南非',
        'com.ar' => '阿根廷',
        'com.kw' => '科威特',
        'com.sg' => '新加坡',
        'com.co' => '哥伦比亚',
        'co.id' => '印度尼西亚',
        'gr' => '希腊',
        'bg' => '保加利亚',
        'mn' => '蒙古',
        'dk' => '丹麦',
        'com.sa' => '沙特阿拉伯',
        'com.pe' => '秘鲁',
        'com.ph' => '菲律宾',
        'com.ua' => '乌克兰',
        'ge' => '格鲁吉亚',
        'ae' => '阿拉伯联合酋长国',
        'tn' => '突尼斯',
    ];

    protected $otherzb = [700, 300]; //模拟访问来源占比 (非美国) google.com|google.其他后缀


    public function handle()
    {
        while (true) {
            $this->startInquiryDetail();
        }
    }


    public function startInquiryDetail()
    {
        $inquiry_detail = InquiryRelayDetail::where('status', InquiryRelayDetail::STATUS_INIT)->orderBy('start_at', 'asc')->orderBy('id', 'asc')->first();
        if (!$inquiry_detail) {
            //所有任务执行完成
            sleep(60);
            return true;
        }

        try {
            $this->output('询盘详情ID:' . $inquiry_detail['id'] . ',开始拆分转发数据');

            $inquiry_detail->status = InquiryRelayDetail::STATUS_PEND;//拆分中
            $inquiry_detail->save();

            //先补全询盘详情数据
            $urls = $this->completeDetail($inquiry_detail);

            //再拆分转发数据
            $this->relayDetail($inquiry_detail['id'], $urls, $inquiry_detail['start_at']);

            $inquiry_detail->status = InquiryRelayDetail::STATUS_SUCCESS;
            $inquiry_detail->save();

            $this->output('询盘详情ID:' . $inquiry_detail['id'] . ',拆分转发数据结束');
        } catch (\Exception $e) {
            $inquiry_detail->status = InquiryRelayDetail::STATUS_FAIL;
            $inquiry_detail->remark = mb_substr($e->getMessage(), 0, 200);;
            $inquiry_detail->save();
            $this->output('询盘详情ID:' . $inquiry_detail['id'] . ',拆分转发数据失败,原因:' . $e->getMessage());
        }

        return true;
    }

    /**
     * 补全询盘详情数据
     * @param $inquiry_detail
     * @return array
     * @author Akun
     * @date 2025/02/25 17:19
     */
    public function completeDetail($inquiry_detail)
    {
        //visit urls
        $visit_urls = $this->getUrls($inquiry_detail['is_v6'], $inquiry_detail['website'], $inquiry_detail['email']);
        $inquiry_url = json_decode($inquiry_detail['urls'], true);
        $urls = array_merge($visit_urls, $inquiry_url);

        //lang
        if (is_numeric($inquiry_detail['message'])) { //数字会被识别为中文
            $lang = 'en';
        } else {
            $translateSl = Translate::translateSl($inquiry_detail['message']);
            $lang = $translateSl['texts']['sl'] ?? 'en';
        }

        // 客户端 头信息 来源
        $device_port = $inquiry_detail['email'] ? '1' : '2'; //1 pc 2移动端
        $user_agent = $inquiry_detail['email'] ? Arr::random($this->pc_ua) : Arr::random($this->mobile_ua);
        $referrer = $this->getReferer($inquiry_detail['country'], $lang);

        $inquiry_detail->device_port = $device_port;
        $inquiry_detail->user_agent = $user_agent;
        $inquiry_detail->referrer = $referrer;
        $inquiry_detail->urls = json_encode($urls);
        $inquiry_detail->num = count($urls) + 1;
        $inquiry_detail->save();

        return $urls;
    }

    /**
     * 创建转发详情
     * @param $task_id
     * @param $urls
     * @param $start_at
     * @author Akun
     * @date 2025/02/25 15:41
     */
    public function relayDetail($task_id, $urls, $start_at)
    {
        $pre = 0;
        $seconds = rand(300, 7200);   // 开始时间 从5分钟-2小时后开始
        foreach ($urls as $k => $v) {
            $pre++;
            $seconds += rand(5, 60);
            InquiryRelayDetailLog::createInquiryLog($task_id, InquiryRelayDetailLog::TYPE_VISIT, $pre, $v, date('Y-m-d H:i:s', strtotime($start_at) + $seconds));
            // 最后一次访问询盘 加上询盘
            if ($k + 1 >= count($urls)) {
                $seconds += rand(30, 120);
                $pre++;
                InquiryRelayDetailLog::createInquiryLog($task_id, InquiryRelayDetailLog::TYPE_INQUIRY, $pre, $v, date('Y-m-d H:i:s', strtotime($start_at) + $seconds));
            }
        }
    }

    /**
     * 获取访问url
     * @param $is_v6
     * @param $website
     * @param $email
     * @return array
     * @author Akun
     * @date 2025/02/25 15:45
     */
    public function getUrls($is_v6, $website, $email)
    {
        $domain = 'https://' . $website . '/';
        // v6:有邮箱推送主站,没有邮箱推送AMP站;v5:仅推送有邮箱到主站
        if ($is_v6) {
            // 获取访问明细和着陆页
            $product_url = $this->getLinksFromSitemap($domain . 'product_sitemap.xml');
            $product_cate_url = $this->getLinksFromSitemap($domain . 'product_category_sitemap.xml');
            $keywords_url = $this->getLinksFromSitemap($domain . 'product_keywords_sitemap.xml');
            $page_url = $this->getLinksFromSitemap($domain . 'page_sitemap.xml');
        } else {
            if ($email) {
                //通过sitemap拿访问页面
                $product_url = $this->getLinksFromSitemap($domain . 'sitemap_post.xml');
                $product_cate_url = $this->getLinksFromSitemap($domain . 'sitemap_category.xml');
                $keywords_url = $this->getLinksFromSitemap($domain . 'sitemap_post_tag.xml');
                $page_url = $this->getLinksFromSitemap($domain . 'sitemap_page.xml');
            } else {
                //m站先就往contact-us着陆
                $product_url = $product_cate_url = $keywords_url = [];
                $page_url = [$domain . 'contact-us/'];
            }
        }

        // 所有可用url
        $urls = [];
        //入口url 首页30%,单页10%,聚合页60%
        $type = getRandByRatio([30, 10, 60]);
        $inlet = $domain;
        $type == 1 && $inlet = $page_url ? Arr::random($page_url) : $domain;
        $type == 2 && $inlet = $keywords_url ? Arr::random($keywords_url) : $domain;
        $urls[] = $inlet;
        $all_urls = array_merge($urls, $product_url, $product_cate_url, $keywords_url, $page_url);

        if (count($all_urls) > 1) {
            // 随机访问1-6个页面
            $deep = rand(1, 6);
            $visit_urls = Arr::random($all_urls, $deep > count($all_urls) ? count($all_urls) : $deep);
            $urls = array_merge($urls, $visit_urls);
        }

        return $urls;
    }

    /**
     * 获取头信息
     * @param $ip_area
     * @param $lang
     * @return int|string
     */
    public function getReferer($ip_area, $lang)
    {
        if ($lang == 'ru') {
            return $this->get_rand($this->eylyzb);
        }
        if ($ip_area == '美国') {
            $referer = $this->get_rand($this->lyzb);
        } else {
            $referer = 'https://www.google.com/';
            $suffix = array_search($ip_area, $this->suffix);
            if ($suffix) {
                $res_qtzb = $this->get_rand($this->otherzb);
                if ($res_qtzb == 1) {
                    $referer = 'https://www.google.' . $suffix . '/';
                }
            }
        }
        return $referer;
    }

    /**
     * 概率算法
     * @param $proArr
     * @return int|string
     */
    protected function get_rand($proArr)
    {
        $result = '';
        $proSum = array_sum($proArr);
        foreach ($proArr as $key => $proCur) {
            $randNum = mt_rand(1, $proSum);
            if ($randNum <= $proCur) {
                $result = $key;
                break;
            } else {
                $proSum -= $proCur;
            }
        }
        unset ($proArr);
        return $result;
    }

    /**
     * 获取sitemap内容
     * @param $sitemapUrl
     * @return array|mixed
     */
    function getLinksFromSitemap($sitemapUrl)
    {
        try {
            //忽略cert证书 先下载到临时文件
            $result = Http::withoutVerifying()->get($sitemapUrl)->body();
            $tempFilePath = tempnam(sys_get_temp_dir(), 'remote_file_');
            file_put_contents($tempFilePath, $result);
            $xml = simplexml_load_file($tempFilePath);
            $links = [];
            foreach ($xml->url as $url) {
                $loc = (string)$url->loc;
                if (!Str::contains($loc, ['404', 'thanks', 'test'])) {
                    $links[] = $loc;
                }
            }
            //随机取20个
            $total = count($links);
            return Arr::random($links, $total > 20 ? 20 : $total);
        } catch (\Exception $e) {
            echo date('Y-m-d H:i:s') . 'sitemap获取失败:' . $e->getMessage() . PHP_EOL;
            return $links ?? [];
        }
    }

    public function output($message)
    {
        echo date('Y-m-d H:i:s') . ' | ' . $message . PHP_EOL;
    }
}