[pornhub] Extract `cast`

Closes #406, https://github.com/ytdl-org/youtube-dl/pull/27384
This commit is contained in:
pukkandan 2021-06-13 21:36:47 +05:30
parent 3fd4c2a543
commit d0fb4bd16f
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
2 changed files with 5 additions and 1 deletions

View File

@ -290,6 +290,7 @@ class InfoExtractor(object):
categories: A list of categories that the video falls in, for example categories: A list of categories that the video falls in, for example
["Sports", "Berlin"] ["Sports", "Berlin"]
tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"] tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
cast: A list of the video cast
is_live: True, False, or None (=unknown). Whether this video is a is_live: True, False, or None (=unknown). Whether this video is a
live stream that goes on instead of a fixed-length video. live stream that goes on instead of a fixed-length video.
was_live: True, False, or None (=unknown). Whether this video was was_live: True, False, or None (=unknown). Whether this video was

View File

@ -14,6 +14,7 @@ from ..compat import (
) )
from .openload import PhantomJSwrapper from .openload import PhantomJSwrapper
from ..utils import ( from ..utils import (
clean_html,
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -145,6 +146,7 @@ class PornHubIE(PornHubBaseIE):
'age_limit': 18, 'age_limit': 18,
'tags': list, 'tags': list,
'categories': list, 'categories': list,
'cast': list,
}, },
}, { }, {
# non-ASCII title # non-ASCII title
@ -464,7 +466,7 @@ class PornHubIE(PornHubBaseIE):
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
% meta_key, webpage, meta_key, default=None) % meta_key, webpage, meta_key, default=None)
if div: if div:
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div) return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
info = self._search_json_ld(webpage, video_id, default={}) info = self._search_json_ld(webpage, video_id, default={})
# description provided in JSON-LD is irrelevant # description provided in JSON-LD is irrelevant
@ -485,6 +487,7 @@ class PornHubIE(PornHubBaseIE):
'age_limit': 18, 'age_limit': 18,
'tags': extract_list('tags'), 'tags': extract_list('tags'),
'categories': extract_list('categories'), 'categories': extract_list('categories'),
'cast': extract_list('pornstars'),
'subtitles': subtitles, 'subtitles': subtitles,
}, info) }, info)