SyncSubmitRepository.php
3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
<?php
/**
* Created by PhpStorm.
* User: zhl
* Date: 2024/1/6
* Time: 14:00
*/
namespace App\Repositories;
use App\Models\SyncSubmitTask\SyncSubmitTask;
use App\Models\Visit\Visit;
/**
* Class SyncSubmitRepository
* @package App\Repositories
*/
class SyncSubmitRepository
{
/**
* 上线站点引流
* @param $ip
* @param $url
* @param $user_agent
* @param string $referrer_url
* @param int $device_port
* @param int $traffic
* @return bool
*/
public function trafficVisit($ip, $url, $user_agent, $referrer_url = '', $device_port = Visit::DEVICE_PC, $traffic = SyncSubmitTask::TRAFFIC_DEFAULT)
{
if (empty($ip) || $ip == '127.0.0.1')
return false;
if ($this->isBot($user_agent))
return false;
$url_array = parse_url($url);
if (empty($url_array['host']))
return false;
// 检查重置来源URL
$referrer_url = $this->initReferrer($referrer_url);
// 头信息中带有这些信息, 代表是手机端, 重置设备类型
if (preg_match('/Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini|Mobile|wap|windowsce|ucweb/', $user_agent)) {
$device_port = 2;
}
// 组装字段数据
$array = [
'ip' => $ip,
'domain' => $url_array['host'],
'referer' => $referrer_url,
'user_agent' => $user_agent,
'data' => [
'url' => $url,
'domain' => $url_array['scheme'] . '://' . $url_array['host'],
'device_port' => in_array($device_port, array_keys(Visit::deviceMap())) ? $device_port : Visit::DEVICE_PC,
'referrer_url' => $referrer_url
]
];
SyncSubmitTask::createTask($array, SyncSubmitTask::TYPE_VISIT, $traffic);
return true;
}
/**
* 通过头信息,判断是否是蜘蛛爬虫
* @param $agent
* @return bool
*/
public function isBot($agent)
{
$spiderSite= ["TencentTraveler", "Baiduspider+", "BaiduGame", "Googlebot", "msnbot", "Sosospider+", "Sogou web spider", "ia_archiver", "Yahoo! Slurp", "YoudaoBot",
"Yahoo Slurp", "MSNBot", "Java (Often spam bot)", "BaiDuSpider", "Voila", "Yandex bot", "BSpider", "twiceler", "Sogou Spider", "Speedy Spider", "Google AdSense",
"Heritrix", "Python-urllib", "Alexa (IA Archiver)", "Ask", "Exabot", "Custo", "OutfoxBot/YodaoBot", "yacy", "SurveyBot", "legs", "lwp-trivial", "Nutch", "StackRambler",
"The web archive (IA Archiver)", "Perl tool", "MJ12bot", "Netcraft", "MSIECrawler", "WGet tools", "larbin", "Fish search", "yandex.com/bots", "google.com/bot",
"bingbot", "YandexMobileBot", "BingPreview", "AhrefsBot", "bot"
];
foreach($spiderSite as $val) {
$str = strtolower($val);
if (strpos($agent, $str) !== false) {
return true;
}
}
return false;
}
/**
* 按照规则重置referrer信息
* TODO 如果来自特定网站的原样返回, 其他的重置到google
* @param $referrer
* @return string
*/
public function initReferrer($referrer)
{
if (empty($referrer))
return '';
if (preg_match('/google|facebook|bing|yahoo|youtobe|linkedin|messefrankfurt|yandex|tiktok|twitter|instagram|reddit|telegram|pinterest|tumblr/', $referrer)) {
return $referrer;
}else{
return 'https://www.google.com/';
}
}
}