structured_scraper/src/Scraping/Scraper.php

30 lines
795 B
PHP

<?php
namespace Scraping;
use \Goutte\Client;
class Scraper
{
public $links = [];
public static function scrape($method, $url, $filters, $level){
$client = new Client();
$crawler = $client->request($method, $url);
print str_pad('['.$level.']', $level*2, ' ', STR_PAD_LEFT).' '.$url."\n";
// 1) Lista atti
$crawler->filterXPath($filters[$level])->each(function ($node) use ($method, $filters, $level) {
$new_url = $node->link()->getUri();
$new_level = $level+1;
//print str_pad('['.$new_level.']', $new_level*2, ' ', STR_PAD_RIGHT).' '.$new_url."\n";
if ($new_level < count($filters)) {
self::scrape($method, $new_url, $filters, $new_level);
}
});
}
}