From e070b9b511aea783d9709acd014e759aaaf5f7e1 Mon Sep 17 00:00:00 2001 From: Jason Ghent Date: Sat, 30 Nov 2013 12:08:17 -0500 Subject: [PATCH 1/3] Added stackoverflow parsing. --- inc/3rdparty/site_config/custom/stackoverflow.com.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100755 inc/3rdparty/site_config/custom/stackoverflow.com.txt diff --git a/inc/3rdparty/site_config/custom/stackoverflow.com.txt b/inc/3rdparty/site_config/custom/stackoverflow.com.txt new file mode 100755 index 000000000..d2eb984d3 --- /dev/null +++ b/inc/3rdparty/site_config/custom/stackoverflow.com.txt @@ -0,0 +1,4 @@ +title: //title +body: //div[@id='question']//div[contains(@class,'post-text')] | //div[@id='answers-header']//h2 | //div[contains(@class,'accepted-answer')]//div[contains(@class,'post-text')] + +test_url: http://stackoverflow.com/questions/20302422/calling-a-function-from-a-javascript-object From 2ab37d6205d3e4f7bfacf702de6755a924931751 Mon Sep 17 00:00:00 2001 From: Jason Ghent Date: Sat, 30 Nov 2013 13:00:24 -0500 Subject: [PATCH 2/3] Addition of stackexchange parser. --- inc/3rdparty/site_config/custom/stackexchange.com.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100755 inc/3rdparty/site_config/custom/stackexchange.com.txt diff --git a/inc/3rdparty/site_config/custom/stackexchange.com.txt b/inc/3rdparty/site_config/custom/stackexchange.com.txt new file mode 100755 index 000000000..c9d44b1d1 --- /dev/null +++ b/inc/3rdparty/site_config/custom/stackexchange.com.txt @@ -0,0 +1,4 @@ +title: //title +body: //div[@id='question']//div[contains(@class,'post-text')] | //div[@id='answers-header']//h2 | //div[contains(@class,'accepted-answer')]//div[contains(@class,'post-text')] + +test_url: http://cstheory.stackexchange.com/questions/14811/what-is-the-enlightenment-im-supposed-to-attain-after-studying-finite-automata/14818#14818 From 16ac4e3dbe5b3137fdb3d5f476efe627c0aad8e4 Mon Sep 17 00:00:00 2001 From: Jason Date: Sat, 30 Nov 2013 13:02:18 -0500 Subject: [PATCH 3/3] Subdomain to domain failover left incorrect leading '.'. This has been remedied. --- inc/3rdparty/content-extractor/SiteConfig.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/3rdparty/content-extractor/SiteConfig.php b/inc/3rdparty/content-extractor/SiteConfig.php index 089e10c62..efb229dd2 100644 --- a/inc/3rdparty/content-extractor/SiteConfig.php +++ b/inc/3rdparty/content-extractor/SiteConfig.php @@ -114,7 +114,7 @@ class SiteConfig $split = explode('.', $host); if (count($split) > 1) { array_shift($split); - $try[] = '.'.implode('.', $split); + $try[] = implode('.', $split); } foreach ($try as $h) { if (array_key_exists($h, self::$config_cache)) { @@ -181,4 +181,4 @@ class SiteConfig return $config; } } -?> \ No newline at end of file +?>