作者 张关杰

Merge branch 'develop' of http://47.244.231.31:8099/zhl/globalso-v6 into bate

... ... @@ -53,6 +53,7 @@ class HtmlCollect extends Command
protected function start_collect()
{
$tdk_project_ids = [714];
$task_id = $this->get_task();
if ($task_id === false) {
//所有项目采集完成
... ... @@ -108,7 +109,9 @@ class HtmlCollect extends Command
}
//提取页面tdk
$this->get_site_meta($new_html, $collect_info);
if(in_array($project_id,$tdk_project_ids)){
$this->get_site_meta($new_html, $collect_info);
}
$source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
... ... @@ -205,13 +208,13 @@ class HtmlCollect extends Command
#Title
preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches);
if (!empty($matches[1])) {
$meta['title'] = substr($matches[1][0], 0, 70);
$meta['title'] = substr($matches[1][0], 0, 255);
}
#Keywords
preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
if (!empty($matches[1])) {
$meta['keyword'] = substr($matches[1][0], 0, 200);
$meta['keyword'] = substr($matches[1][0], 0, 255);
}
#Description
... ...
... ... @@ -239,7 +239,7 @@ class WebTraffic extends Command
->where('pdo.domain', '>', 0)
->where('poc.qa_status', OnlineCheck::STATUS_ONLINE_TRUE)
->whereIn('gl_project.type', [Project::TYPE_TWO, Project::TYPE_FOUR])
->whereIn('pdo.project_id', [6, 25]) //todo 测试两个项目 后面删掉
->where('gl_project.is_upgrade', 0) //非升级项目
->where(function ($query) use ($type) {
if($type == 1){
//1-3个月项目
... ...
... ... @@ -158,14 +158,11 @@ class TranslateLogic extends BaseLogic
}
$contentData = array_values($contentData);
$pattern = '/<meta\s+[^>]*name=[\'"](keywords|description)[\'"][^>]*content=[\'"]([^\'"]+)[\'"]/i'; // 匹配 name 为 "keywords" 或 "description" 的 meta 标签的正则表达式
$matches1 = array();
preg_match_all($pattern, $strippedContent, $matches1);
$matches = array();
preg_match_all($pattern, $strippedContent, $matches);
$metaData = array();
foreach ($matches1[1] as $content) {
if (!empty($content)) {
$metaData[] = $content;
}
foreach ($matches[2] as $index => $content) {
$metaData[] = $content;
}
$data = array_merge($metaData, $contentData);
return $data;
... ...