作者 lyh
... ... @@ -53,7 +53,6 @@ class HtmlCollect extends Command
protected function start_collect()
{
$tdk_project_ids = [714];
$task_id = $this->get_task();
if ($task_id === false) {
//所有项目采集完成
... ... @@ -109,9 +108,7 @@ class HtmlCollect extends Command
}
//提取页面tdk
if(in_array($project_id,$tdk_project_ids)){
$this->get_site_meta($new_html, $collect_info);
}
$this->get_site_meta($new_html, $collect_info);
$source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
... ... @@ -218,7 +215,7 @@ class HtmlCollect extends Command
}
#Description
preg_match_all('/<meta name=[\'|\"]description[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
preg_match_all('/<meta\s+[^>]*?name=[\'|\"]description[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
if (!empty($matches[1])) {
$meta['description'] = substr($matches[1][0], 0, 255);
}
... ...