作者 刘锟

html采集

@@ -213,11 +213,16 @@ class HtmlCollect extends Command @@ -213,11 +213,16 @@ class HtmlCollect extends Command
213 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); 213 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
214 if (!$source) { 214 if (!$source) {
215 return [ 215 return [
  216 + 'download' => true,
216 'url' => $url, 217 'url' => $url,
217 'url_complete' => ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path 218 'url_complete' => ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path
218 ]; 219 ];
219 } else { 220 } else {
220 - return false; 221 + return [
  222 + 'download' => false,
  223 + 'url' => $url,
  224 + 'url_complete' => $source['target']
  225 + ];
221 } 226 }
222 } else { 227 } else {
223 return false; 228 return false;
@@ -232,6 +237,7 @@ class HtmlCollect extends Command @@ -232,6 +237,7 @@ class HtmlCollect extends Command
232 { 237 {
233 foreach ($source as $vs) { 238 foreach ($source as $vs) {
234 239
  240 + if ($vs['download']) {
235 $new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']); 241 $new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);
236 if ($new_source) { 242 if ($new_source) {
237 CollectSource::insert([ 243 CollectSource::insert([
@@ -243,6 +249,9 @@ class HtmlCollect extends Command @@ -243,6 +249,9 @@ class HtmlCollect extends Command
243 ]); 249 ]);
244 $html = str_replace($vs['url'], $new_source, $html); 250 $html = str_replace($vs['url'], $new_source, $html);
245 } 251 }
  252 + } else {
  253 + $html = str_replace($vs['url'], $vs['url_complete'], $html);
  254 + }
246 } 255 }
247 256
248 return $html; 257 return $html;