Merge branch 'develop' of http://47.244.231.31:8099/zhl/globalso-v6 into bate
正在显示
3 个修改的文件
包含
9 行增加
和
9 行删除
| @@ -53,6 +53,7 @@ class HtmlCollect extends Command | @@ -53,6 +53,7 @@ class HtmlCollect extends Command | ||
| 53 | 53 | ||
| 54 | protected function start_collect() | 54 | protected function start_collect() |
| 55 | { | 55 | { |
| 56 | + $tdk_project_ids = [714]; | ||
| 56 | $task_id = $this->get_task(); | 57 | $task_id = $this->get_task(); |
| 57 | if ($task_id === false) { | 58 | if ($task_id === false) { |
| 58 | //所有项目采集完成 | 59 | //所有项目采集完成 |
| @@ -108,7 +109,9 @@ class HtmlCollect extends Command | @@ -108,7 +109,9 @@ class HtmlCollect extends Command | ||
| 108 | } | 109 | } |
| 109 | 110 | ||
| 110 | //提取页面tdk | 111 | //提取页面tdk |
| 112 | + if(in_array($project_id,$tdk_project_ids)){ | ||
| 111 | $this->get_site_meta($new_html, $collect_info); | 113 | $this->get_site_meta($new_html, $collect_info); |
| 114 | + } | ||
| 112 | 115 | ||
| 113 | $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']); | 116 | $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']); |
| 114 | 117 | ||
| @@ -205,13 +208,13 @@ class HtmlCollect extends Command | @@ -205,13 +208,13 @@ class HtmlCollect extends Command | ||
| 205 | #Title | 208 | #Title |
| 206 | preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches); | 209 | preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches); |
| 207 | if (!empty($matches[1])) { | 210 | if (!empty($matches[1])) { |
| 208 | - $meta['title'] = substr($matches[1][0], 0, 70); | 211 | + $meta['title'] = substr($matches[1][0], 0, 255); |
| 209 | } | 212 | } |
| 210 | 213 | ||
| 211 | #Keywords | 214 | #Keywords |
| 212 | preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches); | 215 | preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches); |
| 213 | if (!empty($matches[1])) { | 216 | if (!empty($matches[1])) { |
| 214 | - $meta['keyword'] = substr($matches[1][0], 0, 200); | 217 | + $meta['keyword'] = substr($matches[1][0], 0, 255); |
| 215 | } | 218 | } |
| 216 | 219 | ||
| 217 | #Description | 220 | #Description |
| @@ -239,7 +239,7 @@ class WebTraffic extends Command | @@ -239,7 +239,7 @@ class WebTraffic extends Command | ||
| 239 | ->where('pdo.domain', '>', 0) | 239 | ->where('pdo.domain', '>', 0) |
| 240 | ->where('poc.qa_status', OnlineCheck::STATUS_ONLINE_TRUE) | 240 | ->where('poc.qa_status', OnlineCheck::STATUS_ONLINE_TRUE) |
| 241 | ->whereIn('gl_project.type', [Project::TYPE_TWO, Project::TYPE_FOUR]) | 241 | ->whereIn('gl_project.type', [Project::TYPE_TWO, Project::TYPE_FOUR]) |
| 242 | - ->whereIn('pdo.project_id', [6, 25]) //todo 测试两个项目 后面删掉 | 242 | + ->where('gl_project.is_upgrade', 0) //非升级项目 |
| 243 | ->where(function ($query) use ($type) { | 243 | ->where(function ($query) use ($type) { |
| 244 | if($type == 1){ | 244 | if($type == 1){ |
| 245 | //1-3个月项目 | 245 | //1-3个月项目 |
| @@ -158,15 +158,12 @@ class TranslateLogic extends BaseLogic | @@ -158,15 +158,12 @@ class TranslateLogic extends BaseLogic | ||
| 158 | } | 158 | } |
| 159 | $contentData = array_values($contentData); | 159 | $contentData = array_values($contentData); |
| 160 | $pattern = '/<meta\s+[^>]*name=[\'"](keywords|description)[\'"][^>]*content=[\'"]([^\'"]+)[\'"]/i'; // 匹配 name 为 "keywords" 或 "description" 的 meta 标签的正则表达式 | 160 | $pattern = '/<meta\s+[^>]*name=[\'"](keywords|description)[\'"][^>]*content=[\'"]([^\'"]+)[\'"]/i'; // 匹配 name 为 "keywords" 或 "description" 的 meta 标签的正则表达式 |
| 161 | - | ||
| 162 | - $matches1 = array(); | ||
| 163 | - preg_match_all($pattern, $strippedContent, $matches1); | 161 | + $matches = array(); |
| 162 | + preg_match_all($pattern, $strippedContent, $matches); | ||
| 164 | $metaData = array(); | 163 | $metaData = array(); |
| 165 | - foreach ($matches1[1] as $content) { | ||
| 166 | - if (!empty($content)) { | 164 | + foreach ($matches[2] as $index => $content) { |
| 167 | $metaData[] = $content; | 165 | $metaData[] = $content; |
| 168 | } | 166 | } |
| 169 | - } | ||
| 170 | $data = array_merge($metaData, $contentData); | 167 | $data = array_merge($metaData, $contentData); |
| 171 | return $data; | 168 | return $data; |
| 172 | } | 169 | } |
-
请 注册 或 登录 后发表评论