Aggiunta e documentata modalità debug
This commit is contained in:
parent
99f0fbed83
commit
b5ff9c2899
|
@ -20,5 +20,8 @@ $allowedMimetypes = [
|
||||||
'application/pdf' => 'pdf'
|
'application/pdf' => 'pdf'
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Specifica se visualizzare le URL scansionate
|
||||||
|
$debug = true;
|
||||||
|
|
||||||
// Directory dove salvare i file trovati
|
// Directory dove salvare i file trovati
|
||||||
$download_dir = __DIR__.'/pdf';
|
$download_dir = __DIR__.'/pdf';
|
||||||
|
|
|
@ -22,4 +22,6 @@ Inserendo negli `$start_url` l'indirizzo sopra e avviando lo script, dovreste tr
|
||||||
[2] http://localhost/example/pagina2/sub2.3
|
[2] http://localhost/example/pagina2/sub2.3
|
||||||
[3] http://localhost/example/pagina1/sub1.1/subsub1.1.1
|
[3] http://localhost/example/pagina1/sub1.1/subsub1.1.1
|
||||||
[3] http://localhost/example/pagina1/sub1.1/subsub1.1.2
|
[3] http://localhost/example/pagina1/sub1.1/subsub1.1.2
|
||||||
|
[3] http://localhost/example/pagina1/sub1.1/test.pdf
|
||||||
|
[*] Downloading test.pdf
|
||||||
```
|
```
|
|
@ -18,6 +18,7 @@ $scraper = new Scraper();
|
||||||
// Configure object
|
// Configure object
|
||||||
$scraper->allowedMimetypes = $allowedMimetypes;
|
$scraper->allowedMimetypes = $allowedMimetypes;
|
||||||
$scraper->link_rules = $link_rules;
|
$scraper->link_rules = $link_rules;
|
||||||
|
$scraper->debug = $debug;
|
||||||
|
|
||||||
$scraper->scrape($start_urls, 0);
|
$scraper->scrape($start_urls, 0);
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ class Scraper
|
||||||
public $results = [];
|
public $results = [];
|
||||||
public $allowedMimetypes;
|
public $allowedMimetypes;
|
||||||
public $link_rules;
|
public $link_rules;
|
||||||
|
public $debug = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scrapes specified URLs
|
* Scrapes specified URLs
|
||||||
|
@ -24,8 +25,8 @@ class Scraper
|
||||||
foreach ($urls as $url) {
|
foreach ($urls as $url) {
|
||||||
$crawler = $client->request('GET', $url);
|
$crawler = $client->request('GET', $url);
|
||||||
|
|
||||||
if( $url == 'http://localhost/example/pagina1/sub1.1/test.pdf' ){
|
if ($this->debug) {
|
||||||
$a = 1;
|
print '['.$level.'] '.$url."\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
$scraped_obj[$url]['content-type'] = $this->getContentType($client);
|
$scraped_obj[$url]['content-type'] = $this->getContentType($client);
|
||||||
|
|
Loading…
Reference in New Issue