SyncSubmitRepository.php 3.5 KB
<?php
/**
 * Created by PhpStorm.
 * User: zhl
 * Date: 2024/1/6
 * Time: 14:00
 */
namespace App\Repositories;

use App\Models\SyncSubmitTask\SyncSubmitTask;
use App\Models\Visit\Visit;

/**
 * Class SyncSubmitRepository
 * @package App\Repositories
 */
class SyncSubmitRepository
{
    /**
     * 上线站点引流
     * @param $ip
     * @param $url
     * @param $user_agent
     * @param string $referrer_url
     * @param int $device_port
     * @param int $traffic
     * @return bool
     */
    public function trafficVisit($ip, $url, $user_agent, $referrer_url = '', $device_port = Visit::DEVICE_PC, $traffic = SyncSubmitTask::TRAFFIC_DEFAULT)
    {
        if (empty($ip) || $ip == '127.0.0.1')
            return false;
        if ($this->isBot($user_agent))
            return false;
        $url_array = parse_url($url);
        if (empty($url_array['host']))
            return false;

        // 检查重置来源URL
        $referrer_url = $this->initReferrer($referrer_url);
        // 头信息中带有这些信息, 代表是手机端, 重置设备类型
        if (preg_match('/Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini|Mobile|wap|windowsce|ucweb/', $user_agent)) {
            $device_port = 2;
        }

        // 组装字段数据
        $array = [
            'ip' => $ip,
            'domain' => $url_array['host'],
            'referer' => $referrer_url,
            'user_agent' => $user_agent,
            'data' => [
                'url' => $url,
                'domain' => $url_array['scheme'] . '://' . $url_array['host'],
                'device_port' => in_array($device_port, array_keys(Visit::deviceMap())) ? $device_port : Visit::DEVICE_PC,
                'referrer_url' => $referrer_url
            ]
        ];
        SyncSubmitTask::createTask($array, SyncSubmitTask::TYPE_VISIT, $traffic);
        return true;
    }

    /**
     * 通过头信息,判断是否是蜘蛛爬虫
     * @param $agent
     * @return bool
     */
    public function isBot($agent)
    {
        $spiderSite= ["TencentTraveler", "Baiduspider+", "BaiduGame", "Googlebot", "msnbot", "Sosospider+", "Sogou web spider", "ia_archiver", "Yahoo! Slurp", "YoudaoBot",
            "Yahoo Slurp", "MSNBot", "Java (Often spam bot)", "BaiDuSpider", "Voila", "Yandex bot", "BSpider", "twiceler", "Sogou Spider", "Speedy Spider", "Google AdSense",
            "Heritrix", "Python-urllib", "Alexa (IA Archiver)", "Ask", "Exabot", "Custo", "OutfoxBot/YodaoBot", "yacy", "SurveyBot", "legs", "lwp-trivial", "Nutch", "StackRambler",
            "The web archive (IA Archiver)", "Perl tool", "MJ12bot", "Netcraft", "MSIECrawler", "WGet tools", "larbin", "Fish search", "yandex.com/bots", "google.com/bot",
            "bingbot", "YandexMobileBot", "BingPreview", "AhrefsBot", "bot"
        ];

        foreach($spiderSite as $val) {
            $str = strtolower($val);
            if (strpos($agent, $str) !== false) {
                return true;
            }
        }
        return false;
    }

    /**
     * 按照规则重置referrer信息
     * TODO 如果来自特定网站的原样返回, 其他的重置到google
     * @param $referrer
     * @return string
     */
    public function initReferrer($referrer)
    {
        if (empty($referrer))
            return '';
        if (preg_match('/google|facebook|bing|yahoo|youtobe|linkedin|messefrankfurt|yandex|tiktok|twitter|instagram|reddit|telegram|pinterest|tumblr/', $referrer)) {
            return $referrer;
        }else{
            return 'https://www.google.com/';
        }
    }
}