96 lines
2.9 KiB
Plaintext
Executable File
96 lines
2.9 KiB
Plaintext
Executable File
# A. Niepel, narya.de@...
|
|
# - added single_page_link
|
|
# - added author for default and single page view
|
|
# - added date for single page view
|
|
# fforst@...
|
|
# - Fixed it
|
|
# bode2104@...
|
|
# - Fixed single_page_link
|
|
# - Included intro text in single page view
|
|
# - Added body in default view
|
|
# stesie@
|
|
# - removed copyright box
|
|
# - removed "print more" box
|
|
|
|
# set body
|
|
tidy: no
|
|
# body in single page view
|
|
body: //div[@id="spArticleContent"]
|
|
# body in default view
|
|
body: //div[@id="spArticleSection"]
|
|
body: //div[contains(@class, 'article-section')] | //div[@id='js-article-top-wide-asset'] | //p[contains(@class, 'article-intro')] | //div[contains(@class, 'js-module-box-image')]
|
|
# body in "Fotostrecke"
|
|
body: //div[@id="spBigaContent"]
|
|
|
|
# set date in single page view
|
|
date: //div[@id="spArticleContent"]/h3
|
|
# strip date
|
|
strip: //div[@id="spArticleContent"]/h3
|
|
# set date in "Fotostrecke"
|
|
date: //div[@id="spBigaDatum"]
|
|
|
|
# title in default view
|
|
title: //h2[contains(@class, 'article-title')]
|
|
#set title in single page view
|
|
title: //div[@id='spArticleContent']/h2
|
|
# strip title
|
|
strip: //div[@id='spArticleContent']/h1
|
|
strip: //div[@id='spArticleContent']/h2
|
|
#set title in "Fotostrecke"
|
|
title: //div[@class='spBigaHeadline']
|
|
|
|
# set author
|
|
author: //p[@class="spAuthor"]/a
|
|
author: substring-after(//p[@class="spAuthor"], 'Von ')
|
|
# strip author
|
|
strip: //p[@class='spAuthor']
|
|
|
|
# remove captions
|
|
strip: //*/span[@class='spPicLayerText']
|
|
strip: //*/div[@class='spPanoPlayerPaneControl']
|
|
strip: //*/div[@class='spCredit']
|
|
strip: //*/div[@class='spCredit']/following-sibling::p
|
|
|
|
# remove ads
|
|
strip: //div[@class='spMInline']
|
|
|
|
# remove photogalleries and extras
|
|
strip: //div[contains(@class, 'spPhotoGallery')]
|
|
strip: //div[@class='spPhotoGallery']/following-sibling::br
|
|
strip: //div[@class='spAssetAlignleft']
|
|
strip: //div[contains(@class,'spAsset')]
|
|
strip: //br[@clear='all']
|
|
|
|
# remove community functions
|
|
strip: //div[@id='spSocialBookmark']
|
|
strip: //div[contains(@class, 'spCommunityBox')]
|
|
strip: //div[contains(@class, 'spArticleNewsfeedBox')]
|
|
strip: //div[@class='spArticleCredit']
|
|
|
|
# remove clutter in "Fotostrecke"
|
|
strip: //div[@id='spBreadcrumb']
|
|
strip: //div[@id='spBigaLatestEntries']
|
|
strip: //div[contains(@class, 'spBigaNavi')]
|
|
strip: //div[@class='spDottedLine']
|
|
|
|
strip: //div[@class='asset-box article-print-more']
|
|
strip: //div[@class='article-copyright']
|
|
strip: //span[@class='image-buttons']
|
|
|
|
# Use link to print article for single page view
|
|
single_page_link: //a[contains(@href, '-druck')]
|
|
if_page_contains: //div[contains(@class, 'multi-pager-control')]
|
|
|
|
# Clean up title in print view
|
|
find_string: <title>Druckversion -
|
|
replace_string: <title>
|
|
|
|
# use next link in "Fotostrecke"
|
|
next_page_link: //a[@class='spBigaControlForw']
|
|
test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
|
|
|
|
# regular article
|
|
test_url: http://www.spiegel.de/wirtschaft/soziales/griechenland-was-den-griechischen-buergern-nun-droht-a-1042682.html
|
|
|
|
# multipage article
|
|
test_url: http://www.spiegel.de/spiegel/a-710880.html |