X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fdownloader%2Ff4m.py;h=9b15a0e15bd5f2047490feda44662572f46aac6a;hb=067aa17edf5a46a8cbc4d6b90864eddf051fa2bc;hp=e456ed58f6928837fc88e430c4334e227f8529e3;hpb=75a24854073e590f4efc9f037b57dee348f52b61;p=youtube-dl diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index e456ed58f..9b15a0e15 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -1,12 +1,12 @@ from __future__ import division, unicode_literals -import base64 import io import itertools import time from .fragment import FragmentFD from ..compat import ( + compat_b64decode, compat_etree_fromstring, compat_urlparse, compat_urllib_error, @@ -243,8 +243,17 @@ def remove_encrypted_media(media): media)) -def _add_ns(prop): - return '{http://ns.adobe.com/f4m/1.0}%s' % prop +def _add_ns(prop, ver=1): + return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) + + +def get_base_url(manifest): + base_url = xpath_text( + manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], + 'base URL', default=None) + if base_url: + base_url = base_url.strip() + return base_url class F4mFD(FragmentFD): @@ -303,7 +312,7 @@ class F4mFD(FragmentFD): boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None - bootstrap = base64.b64decode(node.text.encode('ascii')) + bootstrap = compat_b64decode(node.text) boot_info = read_bootstrap_info(bootstrap) return boot_info, bootstrap_url @@ -315,8 +324,8 @@ class F4mFD(FragmentFD): urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.geturl() # Some manifests may be malformed, e.g. prosiebensat1 generated manifests - # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 - # and https://github.com/rg3/youtube-dl/issues/7823) + # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 + # and https://github.com/ytdl-org/youtube-dl/issues/7823) manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() doc = compat_etree_fromstring(manifest) @@ -330,17 +339,17 @@ class F4mFD(FragmentFD): rate, media = list(filter( lambda f: int(f[0]) == requested_bitrate, formats))[0] - base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) + # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. + man_base_url = get_base_url(doc) or man_url + + base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) - # From Adobe F4M 3.0 spec: - # The element SHALL be the base URL for all relative - # (HTTP-based) URLs in the manifest. If is not present, said - # URLs should be relative to the location of the containing document. - boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url) + boot_info, bootstrap_url = self._parse_bootstrap_node( + bootstrap_node, man_base_url) live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: - metadata = base64.b64decode(metadata_node.text.encode('ascii')) + metadata = compat_b64decode(metadata_node.text) else: metadata = None @@ -376,7 +385,7 @@ class F4mFD(FragmentFD): while fragments_list: seg_i, frag_i = fragments_list.pop(0) frag_index += 1 - if frag_index <= ctx['frag_index']: + if frag_index <= ctx['fragment_index']: continue name = 'Seg%d-Frag%d' % (seg_i, frag_i) query = [] @@ -400,7 +409,7 @@ class F4mFD(FragmentFD): # In tests, segments may be truncated, and thus # FlvReader may not be able to parse the whole # chunk. If so, write the segment as is - # See https://github.com/rg3/youtube-dl/issues/9214 + # See https://github.com/ytdl-org/youtube-dl/issues/9214 dest_stream.write(down_data) break raise