作者 刘锟

update

@@ -103,7 +103,7 @@ class HtmlSpecialCollect extends Command @@ -103,7 +103,7 @@ class HtmlSpecialCollect extends Command
103 $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']); 103 $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
104 104
105 if ($source_list) { 105 if ($source_list) {
106 - $html = $this->upload_source($html, array_unique($source_list), $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']); 106 + $html = $this->upload_source($html, $source_list, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
107 } 107 }
108 } catch (\Exception $e) { 108 } catch (\Exception $e) {
109 $collect_info->status = CollectTask::STATUS_FAIL; 109 $collect_info->status = CollectTask::STATUS_FAIL;
@@ -198,48 +198,60 @@ class HtmlSpecialCollect extends Command @@ -198,48 +198,60 @@ class HtmlSpecialCollect extends Command
198 $img = $result_img[2] ?? []; 198 $img = $result_img[2] ?? [];
199 foreach ($img as $vi) { 199 foreach ($img as $vi) {
200 $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url); 200 $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url);
  201 + if ($check_vi && (!in_array($check_vi, $source))) {
201 $check_vi && $source[] = $check_vi; 202 $check_vi && $source[] = $check_vi;
202 } 203 }
  204 + }
203 205
204 //js 206 //js
205 preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js); 207 preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js);
206 $js = $result_js[2] ?? []; 208 $js = $result_js[2] ?? [];
207 foreach ($js as $vj) { 209 foreach ($js as $vj) {
208 $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url); 210 $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url);
  211 + if ($check_vj && (!in_array($check_vj, $source))) {
209 $check_vj && $source[] = $check_vj; 212 $check_vj && $source[] = $check_vj;
210 } 213 }
  214 + }
211 215
212 //video 216 //video
213 preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video); 217 preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video);
214 $video = $result_video[2] ?? []; 218 $video = $result_video[2] ?? [];
215 foreach ($video as $vv) { 219 foreach ($video as $vv) {
216 $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url); 220 $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url);
  221 + if ($check_vv && (!in_array($check_vv, $source))) {
217 $check_vv && $source[] = $check_vv; 222 $check_vv && $source[] = $check_vv;
218 } 223 }
  224 + }
219 225
220 //css 226 //css
221 preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css); 227 preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css);
222 $css = $result_css[2] ?? []; 228 $css = $result_css[2] ?? [];
223 foreach ($css as $vc) { 229 foreach ($css as $vc) {
224 $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url); 230 $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url);
  231 + if ($check_vc && (!in_array($check_vc, $source))) {
225 $check_vc && $source[] = $check_vc; 232 $check_vc && $source[] = $check_vc;
226 } 233 }
  234 + }
227 235
228 //css background 236 //css background
229 preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b); 237 preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b);
230 $css_b = $result_css_b[1] ?? []; 238 $css_b = $result_css_b[1] ?? [];
231 foreach ($css_b as $vc_b) { 239 foreach ($css_b as $vc_b) {
232 $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url); 240 $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url);
  241 + if ($check_vc_b && (!in_array($check_vc_b, $source))) {
233 $check_vc_b && $source[] = $check_vc_b; 242 $check_vc_b && $source[] = $check_vc_b;
234 } 243 }
  244 + }
235 245
236 //a标签下载资源 246 //a标签下载资源
237 preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a); 247 preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a);
238 $down = $result_a[2] ?? []; 248 $down = $result_a[2] ?? [];
239 foreach ($down as $vd) { 249 foreach ($down as $vd) {
240 $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url); 250 $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url);
  251 + if ($check_vd && (!in_array($check_vd, $source))) {
241 $check_vd && $source[] = $check_vd; 252 $check_vd && $source[] = $check_vd;
242 } 253 }
  254 + }
243 255
244 return $source; 256 return $source;
245 } 257 }
@@ -270,7 +282,6 @@ class HtmlSpecialCollect extends Command @@ -270,7 +282,6 @@ class HtmlSpecialCollect extends Command
270 ) { 282 ) {
271 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); 283 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
272 if (!$source) { 284 if (!$source) {
273 - echo $url . PHP_EOL;  
274 return [ 285 return [
275 'download' => true, 286 'download' => true,
276 'url' => $url, 287 'url' => $url,