作者 刘锟

html采集

... ... @@ -213,11 +213,16 @@ class HtmlCollect extends Command
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
return [
'download' => true,
'url' => $url,
'url_complete' => ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path
];
} else {
return false;
return [
'download' => false,
'url' => $url,
'url_complete' => $source['target']
];
}
} else {
return false;
... ... @@ -232,16 +237,20 @@ class HtmlCollect extends Command
{
foreach ($source as $vs) {
$new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);
if ($new_source) {
CollectSource::insert([
'project_id' => $project_id,
'origin' => $vs['url'],
'target' => $new_source,
'created_at' => date('Y-m-d H:i:s'),
'updated_at' => date('Y-m-d H:i:s'),
]);
$html = str_replace($vs['url'], $new_source, $html);
if ($vs['download']) {
$new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);
if ($new_source) {
CollectSource::insert([
'project_id' => $project_id,
'origin' => $vs['url'],
'target' => $new_source,
'created_at' => date('Y-m-d H:i:s'),
'updated_at' => date('Y-m-d H:i:s'),
]);
$html = str_replace($vs['url'], $new_source, $html);
}
} else {
$html = str_replace($vs['url'], $vs['url_complete'], $html);
}
}
... ...