searx/searx/engines/openverse.py

# SPDX-License-Identifier: AGPL-3.0-or-later
"""

 Openverse (formerly known as: Creative Commons search engine) [Images]

"""

from json import loads
from urllib.parse import urlencode


about = {
    "website": 'https://search.creativecommons.org/',
    "wikidata_id": None,
    "official_api_documentation": 'https://api.creativecommons.engineering/v1/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
}

categories = ['images']

paging = True
nb_per_page = 20

base_url = 'https://api.openverse.engineering/v1/images/'
search_string = '?page={page}&page_size={nb_per_page}&format=json&{query}'


def request(query, params):

    search_path = search_string.format(
        query=urlencode({'q': query}),
        nb_per_page=nb_per_page,
        page=params['pageno'])

    params['url'] = base_url + search_path

    return params


def response(resp):
    results = []

    json_data = loads(resp.text)

    for result in json_data['results']:
        results.append({'url': result['foreign_landing_url'],
                        'title': result['title'],
                        'img_src': result['url'],
                        'template': 'images.html'})

    return results
Add Creative Commons search engine 2021-02-03 19:13:39 +01:00			`# SPDX-License-Identifier: AGPL-3.0-or-later`
			`"""`

Pick minor fixes from searxng (#3251) * [fix] Rename ccengine engine to openverse The CC engine was merged with WordPress and renamed to Openverse Source: https://wordpress.org/news/2021/05/welcome-to-openverse/ * [fix] ccengine engine - avoid unwanted redirects api.openverse.engineering is a little picky and wants to have a trailing slash in the path: /v1/images? -->/ v1/images/? otherwise it redirects, here is the debug log: DEBUG searx.network.openverse : HTTP Request: GET https://api.openverse.engineering/v1/images?&page=1&page_size=20&format=json&q=foo "HTTP/2 301 Moved Permanently" (text/html; charset=utf-8) DEBUG searx.network.openverse : HTTP Request: GET https://api.openverse.engineering/v1/images/?&page=1&page_size=20&format=json&q=foo "HTTP/2 200 OK" (application/json) WARNING searx.engines.openverse : ErrorContext('searx/search/processors/online.py', 105, 'count_error(', None, '1 redirects, maximum: 0', ('200', 'OK', 'api.openverse.engineering')) True Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> * [fix] FutureWarning from lxml Just in case if content is None, the original code will skip extract_text(), and just append the None value to 'content'. So just add allow_none=True, and this will return None without raising a ValueError in extract_text(). * [enh] Add pagination to Brave Also added ```&spellcheck=1``` because now it is disabled by default, not returning any ```suggestion_xpath```. Co-authored-by: Léon Tiekötter <leon@tiekoetter.com> Co-authored-by: Markus Heiser <markus.heiser@darmarit.de> Co-authored-by: capric98 <42015599+capric98@users.noreply.github.com> Co-authored-by: Allen <64094914+allendema@users.noreply.github.com> 2022-06-06 00:01:27 +02:00			`Openverse (formerly known as: Creative Commons search engine) [Images]`
Add Creative Commons search engine 2021-02-03 19:13:39 +01:00
			`"""`

			`from json import loads`
			`from urllib.parse import urlencode`


			`about = {`
			`"website": 'https://search.creativecommons.org/',`
			`"wikidata_id": None,`
			`"official_api_documentation": 'https://api.creativecommons.engineering/v1/',`
			`"use_official_api": True,`
			`"require_api_key": False,`
			`"results": 'JSON',`
			`}`

			`categories = ['images']`

			`paging = True`
			`nb_per_page = 20`

Pick minor fixes from searxng (#3251) * [fix] Rename ccengine engine to openverse The CC engine was merged with WordPress and renamed to Openverse Source: https://wordpress.org/news/2021/05/welcome-to-openverse/ * [fix] ccengine engine - avoid unwanted redirects api.openverse.engineering is a little picky and wants to have a trailing slash in the path: /v1/images? -->/ v1/images/? otherwise it redirects, here is the debug log: DEBUG searx.network.openverse : HTTP Request: GET https://api.openverse.engineering/v1/images?&page=1&page_size=20&format=json&q=foo "HTTP/2 301 Moved Permanently" (text/html; charset=utf-8) DEBUG searx.network.openverse : HTTP Request: GET https://api.openverse.engineering/v1/images/?&page=1&page_size=20&format=json&q=foo "HTTP/2 200 OK" (application/json) WARNING searx.engines.openverse : ErrorContext('searx/search/processors/online.py', 105, 'count_error(', None, '1 redirects, maximum: 0', ('200', 'OK', 'api.openverse.engineering')) True Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> * [fix] FutureWarning from lxml Just in case if content is None, the original code will skip extract_text(), and just append the None value to 'content'. So just add allow_none=True, and this will return None without raising a ValueError in extract_text(). * [enh] Add pagination to Brave Also added ```&spellcheck=1``` because now it is disabled by default, not returning any ```suggestion_xpath```. Co-authored-by: Léon Tiekötter <leon@tiekoetter.com> Co-authored-by: Markus Heiser <markus.heiser@darmarit.de> Co-authored-by: capric98 <42015599+capric98@users.noreply.github.com> Co-authored-by: Allen <64094914+allendema@users.noreply.github.com> 2022-06-06 00:01:27 +02:00			`base_url = 'https://api.openverse.engineering/v1/images/'`
			`search_string = '?page={page}&page_size={nb_per_page}&format=json&{query}'`
Add Creative Commons search engine 2021-02-03 19:13:39 +01:00

			`def request(query, params):`

			`search_path = search_string.format(`
			`query=urlencode({'q': query}),`
			`nb_per_page=nb_per_page,`
			`page=params['pageno'])`

			`params['url'] = base_url + search_path`

			`return params`


			`def response(resp):`
			`results = []`

			`json_data = loads(resp.text)`

			`for result in json_data['results']:`
			`results.append({'url': result['foreign_landing_url'],`
			`'title': result['title'],`
			`'img_src': result['url'],`
			`'template': 'images.html'})`

			`return results`