Aggiunta e documentata modalità debug

This commit is contained in:
loviuz 2022-01-08 20:41:41 +01:00
parent 99f0fbed83
commit b5ff9c2899
4 changed files with 9 additions and 2 deletions

View File

@ -20,5 +20,8 @@ $allowedMimetypes = [
'application/pdf' => 'pdf' 'application/pdf' => 'pdf'
]; ];
// Specifica se visualizzare le URL scansionate
$debug = true;
// Directory dove salvare i file trovati // Directory dove salvare i file trovati
$download_dir = __DIR__.'/pdf'; $download_dir = __DIR__.'/pdf';

View File

@ -22,4 +22,6 @@ Inserendo negli `$start_url` l'indirizzo sopra e avviando lo script, dovreste tr
[2] http://localhost/example/pagina2/sub2.3 [2] http://localhost/example/pagina2/sub2.3
[3] http://localhost/example/pagina1/sub1.1/subsub1.1.1 [3] http://localhost/example/pagina1/sub1.1/subsub1.1.1
[3] http://localhost/example/pagina1/sub1.1/subsub1.1.2 [3] http://localhost/example/pagina1/sub1.1/subsub1.1.2
[3] http://localhost/example/pagina1/sub1.1/test.pdf
[*] Downloading test.pdf
``` ```

View File

@ -18,6 +18,7 @@ $scraper = new Scraper();
// Configure object // Configure object
$scraper->allowedMimetypes = $allowedMimetypes; $scraper->allowedMimetypes = $allowedMimetypes;
$scraper->link_rules = $link_rules; $scraper->link_rules = $link_rules;
$scraper->debug = $debug;
$scraper->scrape($start_urls, 0); $scraper->scrape($start_urls, 0);

View File

@ -9,6 +9,7 @@ class Scraper
public $results = []; public $results = [];
public $allowedMimetypes; public $allowedMimetypes;
public $link_rules; public $link_rules;
public $debug = false;
/** /**
* Scrapes specified URLs * Scrapes specified URLs
@ -24,8 +25,8 @@ class Scraper
foreach ($urls as $url) { foreach ($urls as $url) {
$crawler = $client->request('GET', $url); $crawler = $client->request('GET', $url);
if( $url == 'http://localhost/example/pagina1/sub1.1/test.pdf' ){ if ($this->debug) {
$a = 1; print '['.$level.'] '.$url."\n";
} }
$scraped_obj[$url]['content-type'] = $this->getContentType($client); $scraped_obj[$url]['content-type'] = $this->getContentType($client);