作者 刘锟

合并分支 'akun' 到 'master'

Akun



查看合并请求 !360
@@ -53,6 +53,7 @@ class HtmlCollect extends Command @@ -53,6 +53,7 @@ class HtmlCollect extends Command
53 53
54 protected function start_collect() 54 protected function start_collect()
55 { 55 {
  56 + $tdk_project_ids = [714];
56 $task_id = $this->get_task(); 57 $task_id = $this->get_task();
57 if ($task_id === false) { 58 if ($task_id === false) {
58 //所有项目采集完成 59 //所有项目采集完成
@@ -108,7 +109,9 @@ class HtmlCollect extends Command @@ -108,7 +109,9 @@ class HtmlCollect extends Command
108 } 109 }
109 110
110 //提取页面tdk 111 //提取页面tdk
111 - $this->get_site_meta($new_html, $collect_info); 112 + if(in_array($project_id,$tdk_project_ids)){
  113 + $this->get_site_meta($new_html, $collect_info);
  114 + }
112 115
113 $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']); 116 $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
114 117
@@ -205,13 +208,13 @@ class HtmlCollect extends Command @@ -205,13 +208,13 @@ class HtmlCollect extends Command
205 #Title 208 #Title
206 preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches); 209 preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches);
207 if (!empty($matches[1])) { 210 if (!empty($matches[1])) {
208 - $meta['title'] = substr($matches[1][0], 0, 70); 211 + $meta['title'] = substr($matches[1][0], 0, 255);
209 } 212 }
210 213
211 #Keywords 214 #Keywords
212 preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches); 215 preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
213 if (!empty($matches[1])) { 216 if (!empty($matches[1])) {
214 - $meta['keyword'] = substr($matches[1][0], 0, 200); 217 + $meta['keyword'] = substr($matches[1][0], 0, 255);
215 } 218 }
216 219
217 #Description 220 #Description