Aggiunta e documentata modalità debug

This commit is contained in:
loviuz 2022-01-08 20:41:41 +01:00
parent 99f0fbed83
commit b5ff9c2899
4 changed files with 9 additions and 2 deletions

View File

@ -20,5 +20,8 @@ $allowedMimetypes = [
'application/pdf' => 'pdf'
];
// Specifica se visualizzare le URL scansionate
$debug = true;
// Directory dove salvare i file trovati
$download_dir = __DIR__.'/pdf';

View File

@ -22,4 +22,6 @@ Inserendo negli `$start_url` l'indirizzo sopra e avviando lo script, dovreste tr
[2] http://localhost/example/pagina2/sub2.3
[3] http://localhost/example/pagina1/sub1.1/subsub1.1.1
[3] http://localhost/example/pagina1/sub1.1/subsub1.1.2
[3] http://localhost/example/pagina1/sub1.1/test.pdf
[*] Downloading test.pdf
```

View File

@ -18,6 +18,7 @@ $scraper = new Scraper();
// Configure object
$scraper->allowedMimetypes = $allowedMimetypes;
$scraper->link_rules = $link_rules;
$scraper->debug = $debug;
$scraper->scrape($start_urls, 0);

View File

@ -9,6 +9,7 @@ class Scraper
public $results = [];
public $allowedMimetypes;
public $link_rules;
public $debug = false;
/**
* Scrapes specified URLs
@ -24,8 +25,8 @@ class Scraper
foreach ($urls as $url) {
$crawler = $client->request('GET', $url);
if( $url == 'http://localhost/example/pagina1/sub1.1/test.pdf' ){
$a = 1;
if ($this->debug) {
print '['.$level.'] '.$url."\n";
}
$scraped_obj[$url]['content-type'] = $this->getContentType($client);