[soundcloud] Extract more metadata

This commit is contained in:
Sergey M․ 2019-02-10 23:44:08 +07:00
parent e9dee7f1b2
commit f516f44094
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 56 additions and 9 deletions

View File

@ -16,7 +16,8 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unified_strdate, try_get,
unified_timestamp,
update_url_query, update_url_query,
url_or_none, url_or_none,
) )
@ -51,12 +52,17 @@ class SoundcloudIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '62986583', 'id': '62986583',
'ext': 'mp3', 'ext': 'mp3',
'upload_date': '20121011', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music', 'uploader': 'E.T. ExTerrestrial Music',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'timestamp': 1349920598,
'upload_date': '20121011',
'duration': 143, 'duration': 143,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
} }
}, },
# not streamable song # not streamable song
@ -68,9 +74,14 @@ class SoundcloudIE(InfoExtractor):
'title': 'Goldrushed', 'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept', 'uploader': 'The Royal Concept',
'timestamp': 1337635207,
'upload_date': '20120521', 'upload_date': '20120521',
'duration': 227, 'duration': 30,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
'params': { 'params': {
# rtmp # rtmp
@ -85,11 +96,16 @@ class SoundcloudIE(InfoExtractor):
'id': '123998367', 'id': '123998367',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'uploader': 'jaimeMF',
'description': 'test chars: \"\'/\\ä↭', 'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF',
'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9, 'duration': 9,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
}, },
# private link (alt format) # private link (alt format)
@ -100,11 +116,16 @@ class SoundcloudIE(InfoExtractor):
'id': '123998367', 'id': '123998367',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'uploader': 'jaimeMF',
'description': 'test chars: \"\'/\\ä↭', 'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF',
'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9, 'duration': 9,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
}, },
# downloadable song # downloadable song
@ -117,9 +138,14 @@ class SoundcloudIE(InfoExtractor):
'title': 'Bus Brakes', 'title': 'Bus Brakes',
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
'uploader': 'oddsamples', 'uploader': 'oddsamples',
'timestamp': 1389232924,
'upload_date': '20140109', 'upload_date': '20140109',
'duration': 17, 'duration': 17,
'license': 'cc-by-sa', 'license': 'cc-by-sa',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
}, },
# private link, downloadable format # private link, downloadable format
@ -132,9 +158,14 @@ class SoundcloudIE(InfoExtractor):
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
'uploader': 'Ori Uplift Music', 'uploader': 'Ori Uplift Music',
'timestamp': 1504206263,
'upload_date': '20170831', 'upload_date': '20170831',
'duration': 7449, 'duration': 7449,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
}, },
# no album art, use avatar pic for thumbnail # no album art, use avatar pic for thumbnail
@ -147,10 +178,15 @@ class SoundcloudIE(InfoExtractor):
'title': 'Sideways (Prod. Mad Real)', 'title': 'Sideways (Prod. Mad Real)',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'uploader': 'garyvee', 'uploader': 'garyvee',
'timestamp': 1488152409,
'upload_date': '20170226', 'upload_date': '20170226',
'duration': 207, 'duration': 207,
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -176,22 +212,33 @@ class SoundcloudIE(InfoExtractor):
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
track_id = compat_str(info['id']) track_id = compat_str(info['id'])
title = info['title']
name = full_title or track_id name = full_title or track_id
if quiet: if quiet:
self.report_extraction(name) self.report_extraction(name)
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url') thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
if isinstance(thumbnail, compat_str): if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500') thumbnail = thumbnail.replace('-large', '-t500x500')
username = try_get(info, lambda x: x['user']['username'], compat_str)
def extract_count(key):
return int_or_none(info.get('%s_count' % key))
result = { result = {
'id': track_id, 'id': track_id,
'uploader': info.get('user', {}).get('username'), 'uploader': username,
'upload_date': unified_strdate(info.get('created_at')), 'timestamp': unified_timestamp(info.get('created_at')),
'title': info['title'], 'title': title,
'description': info.get('description'), 'description': info.get('description'),
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': int_or_none(info.get('duration'), 1000), 'duration': int_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'), 'webpage_url': info.get('permalink_url'),
'license': info.get('license'), 'license': info.get('license'),
'view_count': extract_count('playback'),
'like_count': extract_count('favoritings'),
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
'genre': info.get('genre'),
} }
formats = [] formats = []
query = {'client_id': self._CLIENT_ID} query = {'client_id': self._CLIENT_ID}