[youtube] Add support for classes in swf parser

This commit is contained in:
Philipp Hagemeister 2014-07-18 00:54:17 +02:00
parent 3fbd27f73e
commit 5dc3552d85
1 changed files with 66 additions and 33 deletions

View File

@ -507,6 +507,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
v = - ((v ^ 0xffffffff) + 1) v = - ((v ^ 0xffffffff) + 1)
return v return v
def s24(reader):
bs = reader.read(3)
assert len(bs) == 3
first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00'
return struct.unpack('!i', first_byte + bs)
def read_string(reader=None): def read_string(reader=None):
if reader is None: if reader is None:
reader = code_reader reader = code_reader
@ -647,16 +653,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return methods return methods
class AVMClass(object):
def __init__(self, name_idx):
self.name_idx = name_idx
self.method_names = {}
self.method_idxs = {}
self.methods = {}
self.method_pyfunctions = {}
self.variables = {}
@property
def name(self):
return multinames[self.name_idx]
# Classes # Classes
TARGET_CLASSNAME = u'SignatureDecipher'
searched_idx = multinames.index(TARGET_CLASSNAME)
searched_class_id = None
class_count = u30() class_count = u30()
classes = []
for class_id in range(class_count): for class_id in range(class_count):
name_idx = u30() name_idx = u30()
if name_idx == searched_idx: classes.append(AVMClass(name_idx))
# We found the class we're looking for!
searched_class_id = class_id
u30() # super_name idx u30() # super_name idx
flags = read_byte() flags = read_byte()
if flags & 0x08 != 0: # Protected namespace is present if flags & 0x08 != 0: # Protected namespace is present
@ -668,21 +683,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
trait_count = u30() trait_count = u30()
for _c2 in range(trait_count): for _c2 in range(trait_count):
parse_traits_info() parse_traits_info()
assert len(classes) == class_count
if searched_class_id is None: TARGET_CLASSNAME = u'SignatureDecipher'
searched_class = next(
c for c in classes if c.name == TARGET_CLASSNAME)
if searched_class is None:
raise ExtractorError(u'Target class %r not found' % raise ExtractorError(u'Target class %r not found' %
TARGET_CLASSNAME) TARGET_CLASSNAME)
method_names = {} for avm_class in classes:
method_idxs = {}
for class_id in range(class_count):
u30() # cinit u30() # cinit
trait_count = u30() trait_count = u30()
for _c2 in range(trait_count): for _c2 in range(trait_count):
trait_methods = parse_traits_info() trait_methods = parse_traits_info()
if class_id == searched_class_id: avm_class.method_names.update(trait_methods.items())
method_names.update(trait_methods.items()) avm_class.method_idxs.update(dict(
method_idxs.update(dict(
(idx, name) (idx, name)
for name, idx in trait_methods.items())) for name, idx in trait_methods.items()))
@ -697,7 +713,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Method bodies # Method bodies
method_body_count = u30() method_body_count = u30()
Method = collections.namedtuple('Method', ['code', 'local_count']) Method = collections.namedtuple('Method', ['code', 'local_count'])
methods = {}
for _c in range(method_body_count): for _c in range(method_body_count):
method_idx = u30() method_idx = u30()
u30() # max_stack u30() # max_stack
@ -706,9 +721,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u30() # max_scope_depth u30() # max_scope_depth
code_length = u30() code_length = u30()
code = read_bytes(code_length) code = read_bytes(code_length)
if method_idx in method_idxs: for avm_class in classes:
if method_idx in avm_class.method_idxs:
m = Method(code, local_count) m = Method(code, local_count)
methods[method_idxs[method_idx]] = m avm_class.methods[avm_class.method_idxs[method_idx]] = m
exception_count = u30() exception_count = u30()
for _c2 in range(exception_count): for _c2 in range(exception_count):
u30() # from u30() # from
@ -721,16 +737,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
parse_traits_info() parse_traits_info()
assert p + code_reader.tell() == len(code_tag) assert p + code_reader.tell() == len(code_tag)
assert len(methods) == len(method_idxs)
method_pyfunctions = {} def extract_function(avm_class, func_name):
if func_name in avm_class.method_pyfunctions:
def extract_function(func_name): return avm_class.method_pyfunctions[func_name]
if func_name in method_pyfunctions: if func_name not in avm_class.methods:
return method_pyfunctions[func_name]
if func_name not in methods:
raise ExtractorError(u'Cannot find function %r' % func_name) raise ExtractorError(u'Cannot find function %r' % func_name)
m = methods[func_name] m = avm_class.methods[func_name]
def resfunc(args): def resfunc(args):
registers = ['(this)'] + list(args) + [None] * m.local_count registers = ['(this)'] + list(args) + [None] * m.local_count
@ -738,7 +751,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
coder = io.BytesIO(m.code) coder = io.BytesIO(m.code)
while True: while True:
opcode = struct.unpack('!B', coder.read(1))[0] opcode = struct.unpack('!B', coder.read(1))[0]
if opcode == 36: # pushbyte if opcode == 17: # iftrue
offset = s24(coder)
value = stack.pop()
if value:
coder.seek(coder.tell() + offset)
elif opcode == 36: # pushbyte
v = struct.unpack('!B', coder.read(1))[0] v = struct.unpack('!B', coder.read(1))[0]
stack.append(v) stack.append(v)
elif opcode == 44: # pushstring elif opcode == 44: # pushstring
@ -776,8 +794,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
assert isinstance(obj, list) assert isinstance(obj, list)
res = args[0].join(obj) res = args[0].join(obj)
stack.append(res) stack.append(res)
elif mname in method_pyfunctions: elif mname in avm_class.method_pyfunctions:
stack.append(method_pyfunctions[mname](args)) stack.append(avm_class.method_pyfunctions[mname](args))
else: else:
raise NotImplementedError( raise NotImplementedError(
u'Unsupported property %r on %r' u'Unsupported property %r on %r'
@ -809,7 +827,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif opcode == 93: # findpropstrict elif opcode == 93: # findpropstrict
index = u30(coder) index = u30(coder)
mname = multinames[index] mname = multinames[index]
res = extract_function(mname) res = extract_function(avm_class, mname)
stack.append(res)
elif opcode == 94: # findproperty
index = u30(coder)
mname = multinames[index]
res = avm_class.variables.get(mname)
stack.append(res)
elif opcode == 96: # getlex
index = u30(coder)
mname = multinames[index]
res = avm_class.variables.get(mname)
stack.append(res) stack.append(res)
elif opcode == 97: # setproperty elif opcode == 97: # setproperty
index = u30(coder) index = u30(coder)
@ -848,6 +876,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
value1 = stack.pop() value1 = stack.pop()
res = value1 % value2 res = value1 % value2
stack.append(res) stack.append(res)
elif opcode == 175: # greaterequals
value2 = stack.pop()
value1 = stack.pop()
result = value1 >= value2
stack.append(result)
elif opcode == 208: # getlocal_0 elif opcode == 208: # getlocal_0
stack.append(registers[0]) stack.append(registers[0])
elif opcode == 209: # getlocal_1 elif opcode == 209: # getlocal_1
@ -864,10 +897,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
raise NotImplementedError( raise NotImplementedError(
u'Unsupported opcode %d' % opcode) u'Unsupported opcode %d' % opcode)
method_pyfunctions[func_name] = resfunc avm_class.method_pyfunctions[func_name] = resfunc
return resfunc return resfunc
initial_function = extract_function(u'decipher') initial_function = extract_function(searched_class, u'decipher')
return lambda s: initial_function([s]) return lambda s: initial_function([s])
def _decrypt_signature(self, s, video_id, player_url, age_gate=False): def _decrypt_signature(self, s, video_id, player_url, age_gate=False):