作者 刘锟

html采集

... ... @@ -213,11 +213,16 @@ class HtmlCollect extends Command
$source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
if (!$source) {
return [
'download' => true,
'url' => $url,
'url_complete' => ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path
];
} else {
return false;
return [
'download' => false,
'url' => $url,
'url_complete' => $source['target']
];
}
} else {
return false;
... ... @@ -232,6 +237,7 @@ class HtmlCollect extends Command
{
foreach ($source as $vs) {
if ($vs['download']) {
$new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);
if ($new_source) {
CollectSource::insert([
... ... @@ -243,6 +249,9 @@ class HtmlCollect extends Command
]);
$html = str_replace($vs['url'], $new_source, $html);
}
} else {
$html = str_replace($vs['url'], $vs['url_complete'], $html);
}
}
return $html;
... ...