作者 张关杰

Merge branch 'develop' of http://47.244.231.31:8099/zhl/globalso-v6 into bate

@@ -53,6 +53,7 @@ class HtmlCollect extends Command @@ -53,6 +53,7 @@ class HtmlCollect extends Command
53 53
54 protected function start_collect() 54 protected function start_collect()
55 { 55 {
  56 + $tdk_project_ids = [714];
56 $task_id = $this->get_task(); 57 $task_id = $this->get_task();
57 if ($task_id === false) { 58 if ($task_id === false) {
58 //所有项目采集完成 59 //所有项目采集完成
@@ -108,7 +109,9 @@ class HtmlCollect extends Command @@ -108,7 +109,9 @@ class HtmlCollect extends Command
108 } 109 }
109 110
110 //提取页面tdk 111 //提取页面tdk
  112 + if(in_array($project_id,$tdk_project_ids)){
111 $this->get_site_meta($new_html, $collect_info); 113 $this->get_site_meta($new_html, $collect_info);
  114 + }
112 115
113 $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']); 116 $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
114 117
@@ -205,13 +208,13 @@ class HtmlCollect extends Command @@ -205,13 +208,13 @@ class HtmlCollect extends Command
205 #Title 208 #Title
206 preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches); 209 preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches);
207 if (!empty($matches[1])) { 210 if (!empty($matches[1])) {
208 - $meta['title'] = substr($matches[1][0], 0, 70); 211 + $meta['title'] = substr($matches[1][0], 0, 255);
209 } 212 }
210 213
211 #Keywords 214 #Keywords
212 preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches); 215 preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
213 if (!empty($matches[1])) { 216 if (!empty($matches[1])) {
214 - $meta['keyword'] = substr($matches[1][0], 0, 200); 217 + $meta['keyword'] = substr($matches[1][0], 0, 255);
215 } 218 }
216 219
217 #Description 220 #Description
@@ -239,7 +239,7 @@ class WebTraffic extends Command @@ -239,7 +239,7 @@ class WebTraffic extends Command
239 ->where('pdo.domain', '>', 0) 239 ->where('pdo.domain', '>', 0)
240 ->where('poc.qa_status', OnlineCheck::STATUS_ONLINE_TRUE) 240 ->where('poc.qa_status', OnlineCheck::STATUS_ONLINE_TRUE)
241 ->whereIn('gl_project.type', [Project::TYPE_TWO, Project::TYPE_FOUR]) 241 ->whereIn('gl_project.type', [Project::TYPE_TWO, Project::TYPE_FOUR])
242 - ->whereIn('pdo.project_id', [6, 25]) //todo 测试两个项目 后面删掉 242 + ->where('gl_project.is_upgrade', 0) //非升级项目
243 ->where(function ($query) use ($type) { 243 ->where(function ($query) use ($type) {
244 if($type == 1){ 244 if($type == 1){
245 //1-3个月项目 245 //1-3个月项目
@@ -158,15 +158,12 @@ class TranslateLogic extends BaseLogic @@ -158,15 +158,12 @@ class TranslateLogic extends BaseLogic
158 } 158 }
159 $contentData = array_values($contentData); 159 $contentData = array_values($contentData);
160 $pattern = '/<meta\s+[^>]*name=[\'"](keywords|description)[\'"][^>]*content=[\'"]([^\'"]+)[\'"]/i'; // 匹配 name 为 "keywords" 或 "description" 的 meta 标签的正则表达式 160 $pattern = '/<meta\s+[^>]*name=[\'"](keywords|description)[\'"][^>]*content=[\'"]([^\'"]+)[\'"]/i'; // 匹配 name 为 "keywords" 或 "description" 的 meta 标签的正则表达式
161 -  
162 - $matches1 = array();  
163 - preg_match_all($pattern, $strippedContent, $matches1); 161 + $matches = array();
  162 + preg_match_all($pattern, $strippedContent, $matches);
164 $metaData = array(); 163 $metaData = array();
165 - foreach ($matches1[1] as $content) {  
166 - if (!empty($content)) { 164 + foreach ($matches[2] as $index => $content) {
167 $metaData[] = $content; 165 $metaData[] = $content;
168 } 166 }
169 - }  
170 $data = array_merge($metaData, $contentData); 167 $data = array_merge($metaData, $contentData);
171 return $data; 168 return $data;
172 } 169 }