作者 刘锟

合并分支 'akun' 到 'master'

Akun



查看合并请求 !360
... ... @@ -53,6 +53,7 @@ class HtmlCollect extends Command
protected function start_collect()
{
$tdk_project_ids = [714];
$task_id = $this->get_task();
if ($task_id === false) {
//所有项目采集完成
... ... @@ -108,7 +109,9 @@ class HtmlCollect extends Command
}
//提取页面tdk
if(in_array($project_id,$tdk_project_ids)){
$this->get_site_meta($new_html, $collect_info);
}
$source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
... ... @@ -205,13 +208,13 @@ class HtmlCollect extends Command
#Title
preg_match_all('/<title>([\w\W]*?)<\/title>/', $html, $matches);
if (!empty($matches[1])) {
$meta['title'] = substr($matches[1][0], 0, 70);
$meta['title'] = substr($matches[1][0], 0, 255);
}
#Keywords
preg_match_all('/<meta\s+[^>]*?name=[\'|\"]keywords[\'|\"]\s+[^>]*?content=[\'|\"]([\w\W]*?)[\'|\"]/', $html, $matches);
if (!empty($matches[1])) {
$meta['keyword'] = substr($matches[1][0], 0, 200);
$meta['keyword'] = substr($matches[1][0], 0, 255);
}
#Description
... ...