fix vimeo engine and add comments

engine generate (Error: None), I don't know why
This commit is contained in:
Thomas Pointhuber 2014-09-01 17:10:25 +02:00
parent 03db970e6a
commit 58a443be29
2 changed files with 41 additions and 27 deletions

View File

@ -1,43 +1,58 @@
## Vimeo (Videos)
#
# @website https://vimeo.com/
# @provide-api yes (http://developer.vimeo.com/api), they have a maximum count of queries/hour
#
# @using-api no (TODO, rewrite to api)
# @results HTML (using search portal)
# @stable no (HTML can change)
# @parse url, title, publishedDate, thumbnail
#
# @todo rewrite to api
# @todo set content-parameter with correct data
from urllib import urlencode from urllib import urlencode
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from dateutil import parser from dateutil import parser
base_url = 'http://vimeo.com' # engine dependent config
search_url = base_url + '/search?{query}' categories = ['videos']
url_xpath = None paging = True
content_xpath = None
title_xpath = None # search-url
results_xpath = '' base_url = 'https://vimeo.com'
content_tpl = '<a href="{0}"> <img src="{2}"/> </a>' search_url = base_url + '/search/page:{pageno}?{query}'
# specific xpath variables
url_xpath = './a/@href'
content_xpath = './a/img/@src'
title_xpath = './a/div[@class="data"]/p[@class="title"]/text()'
results_xpath = '//div[@id="browse_content"]/ol/li'
publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
# the cookie set by vimeo contains all the following values,
# but only __utma seems to be requiered
cookie = {
#'vuid':'918282893.1027205400'
# 'ab_bs':'%7B%223%22%3A279%7D'
'__utma': '00000000.000#0000000.0000000000.0000000000.0000000000.0'
# '__utmb':'18302654.1.10.1388942090'
#, '__utmc':'18302654'
#, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' # noqa
#, '__utml':'search'
}
# do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query})) params['url'] = search_url.format(pageno=params['pageno'] ,
params['cookies'] = cookie query=urlencode({'q': query}))
# TODO required?
params['cookies']['__utma'] = '00000000.000#0000000.0000000000.0000000000.0000000000.0'
return params return params
# get response from search-request
def response(resp): def response(resp):
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
p = HTMLParser() p = HTMLParser()
# parse results
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):
url = base_url + result.xpath(url_xpath)[0] url = base_url + result.xpath(url_xpath)[0]
title = p.unescape(extract_text(result.xpath(title_xpath))) title = p.unescape(extract_text(result.xpath(title_xpath)))
@ -45,10 +60,13 @@ def response(resp):
publishedDate = parser.parse(extract_text( publishedDate = parser.parse(extract_text(
result.xpath(publishedDate_xpath)[0])) result.xpath(publishedDate_xpath)[0]))
# append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,
'content': content_tpl.format(url, title, thumbnail), 'content': '',
'template': 'videos.html', 'template': 'videos.html',
'publishedDate': publishedDate, 'publishedDate': publishedDate,
'thumbnail': thumbnail}) 'thumbnail': thumbnail})
# return results
return results return results

View File

@ -153,11 +153,7 @@ engines:
- name : vimeo - name : vimeo
engine : vimeo engine : vimeo
categories : videos locale : en-US
results_xpath : //div[@id="browse_content"]/ol/li
url_xpath : ./a/@href
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
content_xpath : ./a/img/@src
shortcut : vm shortcut : vm
locales: locales: