[extractor/common] Extract more metadata for VideoObject in _json_ld

This commit is contained in:
Sergey M․ 2016-07-09 03:27:11 +07:00
parent 2de624fdd5
commit 6b3a3098b5
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 9 additions and 2 deletions

View File

@ -44,6 +44,7 @@ from ..utils import (
sanitized_Request, sanitized_Request,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unified_timestamp,
url_basename, url_basename,
xpath_element, xpath_element,
xpath_text, xpath_text,
@ -840,10 +841,16 @@ class InfoExtractor(object):
}) })
elif item_type == 'VideoObject': elif item_type == 'VideoObject':
info.update({ info.update({
'url': json_ld.get('contentUrl'),
'title': unescapeHTML(json_ld.get('name')), 'title': unescapeHTML(json_ld.get('name')),
'description': unescapeHTML(json_ld.get('description')), 'description': unescapeHTML(json_ld.get('description')),
'upload_date': unified_strdate(json_ld.get('upload_date')), 'thumbnail': json_ld.get('thumbnailUrl'),
'url': unescapeHTML(json_ld.get('contentUrl')), 'duration': parse_duration(json_ld.get('duration')),
'timestamp': unified_timestamp(json_ld.get('uploadDate')),
'filesize': float_or_none(json_ld.get('contentSize')),
'tbr': int_or_none(json_ld.get('bitrate')),
'width': int_or_none(json_ld.get('width')),
'height': int_or_none(json_ld.get('height')),
}) })
return dict((k, v) for k, v in info.items() if v is not None) return dict((k, v) for k, v in info.items() if v is not None)