Merge pull request #3927 from qrtt1/master
[youtube-dl] / youtube_dl / downloader / f4m.py
index 9a6c03556a64ae5b052354e8973d7bc01546b392..ef3e0d5f44122621eb9252f93dd1c5471b3fcfee 100644 (file)
@@ -4,18 +4,21 @@ import base64
 import io
 import itertools
 import os
 import io
 import itertools
 import os
-from struct import unpack, pack
 import time
 import xml.etree.ElementTree as etree
 
 from .common import FileDownloader
 from .http import HttpFD
 import time
 import xml.etree.ElementTree as etree
 
 from .common import FileDownloader
 from .http import HttpFD
-from ..utils import (
-    compat_urllib_request,
+from ..compat import (
     compat_urlparse,
     compat_urlparse,
+)
+from ..utils import (
+    struct_pack,
+    struct_unpack,
     format_bytes,
     encodeFilename,
     sanitize_open,
     format_bytes,
     encodeFilename,
     sanitize_open,
+    xpath_text,
 )
 
 
 )
 
 
@@ -27,13 +30,13 @@ class FlvReader(io.BytesIO):
 
     # Utility functions for reading numbers and strings
     def read_unsigned_long_long(self):
 
     # Utility functions for reading numbers and strings
     def read_unsigned_long_long(self):
-        return unpack('!Q', self.read(8))[0]
+        return struct_unpack('!Q', self.read(8))[0]
 
     def read_unsigned_int(self):
 
     def read_unsigned_int(self):
-        return unpack('!I', self.read(4))[0]
+        return struct_unpack('!I', self.read(4))[0]
 
     def read_unsigned_char(self):
 
     def read_unsigned_char(self):
-        return unpack('!B', self.read(1))[0]
+        return struct_unpack('!B', self.read(1))[0]
 
     def read_string(self):
         res = b''
 
     def read_string(self):
         res = b''
@@ -54,7 +57,7 @@ class FlvReader(io.BytesIO):
         if size == 1:
             real_size = self.read_unsigned_long_long()
             header_end = 16
         if size == 1:
             real_size = self.read_unsigned_long_long()
             header_end = 16
-        return real_size, box_type, self.read(real_size-header_end)
+        return real_size, box_type, self.read(real_size - header_end)
 
     def read_asrt(self):
         # version
 
     def read_asrt(self):
         # version
@@ -116,8 +119,8 @@ class FlvReader(io.BytesIO):
         self.read_unsigned_char()
         # flags
         self.read(3)
         self.read_unsigned_char()
         # flags
         self.read(3)
-        # BootstrapinfoVersion
-        bootstrap_info_version = self.read_unsigned_int()
+
+        self.read_unsigned_int()  # BootstrapinfoVersion
         # Profile,Live,Update,Reserved
         self.read(1)
         # time scale
         # Profile,Live,Update,Reserved
         self.read(1)
         # time scale
@@ -126,15 +129,15 @@ class FlvReader(io.BytesIO):
         self.read_unsigned_long_long()
         # SmpteTimeCodeOffset
         self.read_unsigned_long_long()
         self.read_unsigned_long_long()
         # SmpteTimeCodeOffset
         self.read_unsigned_long_long()
-        # MovieIdentifier
-        movie_identifier = self.read_string()
+
+        self.read_string()  # MovieIdentifier
         server_count = self.read_unsigned_char()
         # ServerEntryTable
         for i in range(server_count):
             self.read_string()
         quality_count = self.read_unsigned_char()
         # QualityEntryTable
         server_count = self.read_unsigned_char()
         # ServerEntryTable
         for i in range(server_count):
             self.read_string()
         quality_count = self.read_unsigned_char()
         # QualityEntryTable
-        for i in range(server_count):
+        for i in range(quality_count):
             self.read_string()
         # DrmData
         self.read_string()
             self.read_string()
         # DrmData
         self.read_string()
@@ -179,7 +182,7 @@ def build_fragments_list(boot_info):
     n_frags = segment_run_entry[1]
     fragment_run_entry_table = boot_info['fragments'][0]['fragments']
     first_frag_number = fragment_run_entry_table[0]['first']
     n_frags = segment_run_entry[1]
     fragment_run_entry_table = boot_info['fragments'][0]['fragments']
     first_frag_number = fragment_run_entry_table[0]['first']
-    for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)):
+    for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)):
         res.append((1, frag_number))
     return res
 
         res.append((1, frag_number))
     return res
 
@@ -196,7 +199,7 @@ def write_flv_header(stream, metadata):
     # Script data
     stream.write(b'\x12')
     # Size of the metadata with 3 bytes
     # Script data
     stream.write(b'\x12')
     # Size of the metadata with 3 bytes
-    stream.write(pack('!L', len(metadata))[1:])
+    stream.write(struct_pack('!L', len(metadata))[1:])
     stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
     stream.write(metadata)
     # Magic numbers extracted from the output files produced by AdobeHDS.php
     stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
     stream.write(metadata)
     # Magic numbers extracted from the output files produced by AdobeHDS.php
@@ -220,21 +223,49 @@ class F4mFD(FileDownloader):
 
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
 
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
+        requested_bitrate = info_dict.get('tbr')
         self.to_screen('[download] Downloading f4m manifest')
         manifest = self.ydl.urlopen(man_url).read()
         self.report_destination(filename)
         self.to_screen('[download] Downloading f4m manifest')
         manifest = self.ydl.urlopen(man_url).read()
         self.report_destination(filename)
-        http_dl = HttpQuietDownloader(self.ydl, {'continuedl': True, 'quiet': True, 'noprogress': True})
+        http_dl = HttpQuietDownloader(
+            self.ydl,
+            {
+                'continuedl': True,
+                'quiet': True,
+                'noprogress': True,
+                'ratelimit': self.params.get('ratelimit', None),
+                'test': self.params.get('test', False),
+            }
+        )
 
         doc = etree.fromstring(manifest)
         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
 
         doc = etree.fromstring(manifest)
         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
-        formats = sorted(formats, key=lambda f: f[0])
-        rate, media = formats[-1]
+        if requested_bitrate is None:
+            # get the best format
+            formats = sorted(formats, key=lambda f: f[0])
+            rate, media = formats[-1]
+        else:
+            rate, media = list(filter(
+                lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
         base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
         base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
-        bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
+        bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
+        if bootstrap_node.text is None:
+            bootstrap_url = compat_urlparse.urljoin(
+                base_url, bootstrap_node.attrib['url'])
+            bootstrap = self.ydl.urlopen(bootstrap_url).read()
+        else:
+            bootstrap = base64.b64decode(bootstrap_node.text)
         metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
         boot_info = read_bootstrap_info(bootstrap)
         metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
         boot_info = read_bootstrap_info(bootstrap)
+
         fragments_list = build_fragments_list(boot_info)
         fragments_list = build_fragments_list(boot_info)
+        if self.params.get('test', False):
+            # We only download the first fragment
+            fragments_list = fragments_list[:1]
         total_frags = len(fragments_list)
         total_frags = len(fragments_list)
+        # For some akamai manifests we'll need to add a query to the fragment url
+        akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
 
         tmpfilename = self.temp_name(filename)
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
 
         tmpfilename = self.temp_name(filename)
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
@@ -251,7 +282,7 @@ class F4mFD(FileDownloader):
         def frag_progress_hook(status):
             frag_total_bytes = status.get('total_bytes', 0)
             estimated_size = (state['downloaded_bytes'] +
         def frag_progress_hook(status):
             frag_total_bytes = status.get('total_bytes', 0)
             estimated_size = (state['downloaded_bytes'] +
-                (total_frags - state['frag_counter']) * frag_total_bytes)
+                              (total_frags - state['frag_counter']) * frag_total_bytes)
             if status['status'] == 'finished':
                 state['downloaded_bytes'] += frag_total_bytes
                 state['frag_counter'] += 1
             if status['status'] == 'finished':
                 state['downloaded_bytes'] += frag_total_bytes
                 state['frag_counter'] += 1
@@ -261,19 +292,21 @@ class F4mFD(FileDownloader):
                 frag_downloaded_bytes = status['downloaded_bytes']
                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
                 frag_progress = self.calc_percent(frag_downloaded_bytes,
                 frag_downloaded_bytes = status['downloaded_bytes']
                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
                 frag_progress = self.calc_percent(frag_downloaded_bytes,
-                    frag_total_bytes)
+                                                  frag_total_bytes)
                 progress = self.calc_percent(state['frag_counter'], total_frags)
                 progress += frag_progress / float(total_frags)
 
             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
             self.report_progress(progress, format_bytes(estimated_size),
                 progress = self.calc_percent(state['frag_counter'], total_frags)
                 progress += frag_progress / float(total_frags)
 
             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
             self.report_progress(progress, format_bytes(estimated_size),
-                status.get('speed'), eta)
+                                 status.get('speed'), eta)
         http_dl.add_progress_hook(frag_progress_hook)
 
         frags_filenames = []
         for (seg_i, frag_i) in fragments_list:
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             url = base_url + name
         http_dl.add_progress_hook(frag_progress_hook)
 
         frags_filenames = []
         for (seg_i, frag_i) in fragments_list:
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             url = base_url + name
+            if akamai_pv:
+                url += '?' + akamai_pv.strip(';')
             frag_filename = '%s-%s' % (tmpfilename, name)
             success = http_dl.download(frag_filename, {'url': url})
             if not success:
             frag_filename = '%s-%s' % (tmpfilename, name)
             success = http_dl.download(frag_filename, {'url': url})
             if not success:
@@ -288,6 +321,7 @@ class F4mFD(FileDownloader):
                         break
             frags_filenames.append(frag_filename)
 
                         break
             frags_filenames.append(frag_filename)
 
+        dest_stream.close()
         self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
 
         self.try_rename(tmpfilename, filename)
         self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
 
         self.try_rename(tmpfilename, filename)