Merge remote-tracking branch 'David-Development/rtl2.py'
[youtube-dl] / youtube_dl / downloader / f4m.py
index 9a6c03556a64ae5b052354e8973d7bc01546b392..c68b2c303035395f77d3a983d949eb23737e0156 100644 (file)
@@ -4,18 +4,21 @@ import base64
 import io
 import itertools
 import os
 import io
 import itertools
 import os
-from struct import unpack, pack
 import time
 import xml.etree.ElementTree as etree
 
 from .common import FileDownloader
 from .http import HttpFD
 import time
 import xml.etree.ElementTree as etree
 
 from .common import FileDownloader
 from .http import HttpFD
-from ..utils import (
-    compat_urllib_request,
+from ..compat import (
     compat_urlparse,
     compat_urlparse,
+)
+from ..utils import (
+    struct_pack,
+    struct_unpack,
     format_bytes,
     encodeFilename,
     sanitize_open,
     format_bytes,
     encodeFilename,
     sanitize_open,
+    xpath_text,
 )
 
 
 )
 
 
@@ -27,13 +30,13 @@ class FlvReader(io.BytesIO):
 
     # Utility functions for reading numbers and strings
     def read_unsigned_long_long(self):
 
     # Utility functions for reading numbers and strings
     def read_unsigned_long_long(self):
-        return unpack('!Q', self.read(8))[0]
+        return struct_unpack('!Q', self.read(8))[0]
 
     def read_unsigned_int(self):
 
     def read_unsigned_int(self):
-        return unpack('!I', self.read(4))[0]
+        return struct_unpack('!I', self.read(4))[0]
 
     def read_unsigned_char(self):
 
     def read_unsigned_char(self):
-        return unpack('!B', self.read(1))[0]
+        return struct_unpack('!B', self.read(1))[0]
 
     def read_string(self):
         res = b''
 
     def read_string(self):
         res = b''
@@ -54,7 +57,7 @@ class FlvReader(io.BytesIO):
         if size == 1:
             real_size = self.read_unsigned_long_long()
             header_end = 16
         if size == 1:
             real_size = self.read_unsigned_long_long()
             header_end = 16
-        return real_size, box_type, self.read(real_size-header_end)
+        return real_size, box_type, self.read(real_size - header_end)
 
     def read_asrt(self):
         # version
 
     def read_asrt(self):
         # version
@@ -116,8 +119,8 @@ class FlvReader(io.BytesIO):
         self.read_unsigned_char()
         # flags
         self.read(3)
         self.read_unsigned_char()
         # flags
         self.read(3)
-        # BootstrapinfoVersion
-        bootstrap_info_version = self.read_unsigned_int()
+
+        self.read_unsigned_int()  # BootstrapinfoVersion
         # Profile,Live,Update,Reserved
         self.read(1)
         # time scale
         # Profile,Live,Update,Reserved
         self.read(1)
         # time scale
@@ -126,15 +129,15 @@ class FlvReader(io.BytesIO):
         self.read_unsigned_long_long()
         # SmpteTimeCodeOffset
         self.read_unsigned_long_long()
         self.read_unsigned_long_long()
         # SmpteTimeCodeOffset
         self.read_unsigned_long_long()
-        # MovieIdentifier
-        movie_identifier = self.read_string()
+
+        self.read_string()  # MovieIdentifier
         server_count = self.read_unsigned_char()
         # ServerEntryTable
         for i in range(server_count):
             self.read_string()
         quality_count = self.read_unsigned_char()
         # QualityEntryTable
         server_count = self.read_unsigned_char()
         # ServerEntryTable
         for i in range(server_count):
             self.read_string()
         quality_count = self.read_unsigned_char()
         # QualityEntryTable
-        for i in range(server_count):
+        for i in range(quality_count):
             self.read_string()
         # DrmData
         self.read_string()
             self.read_string()
         # DrmData
         self.read_string()
@@ -174,34 +177,43 @@ def build_fragments_list(boot_info):
     """ Return a list of (segment, fragment) for each fragment in the video """
     res = []
     segment_run_table = boot_info['segments'][0]
     """ Return a list of (segment, fragment) for each fragment in the video """
     res = []
     segment_run_table = boot_info['segments'][0]
-    # I've only found videos with one segment
-    segment_run_entry = segment_run_table['segment_run'][0]
-    n_frags = segment_run_entry[1]
     fragment_run_entry_table = boot_info['fragments'][0]['fragments']
     first_frag_number = fragment_run_entry_table[0]['first']
     fragment_run_entry_table = boot_info['fragments'][0]['fragments']
     first_frag_number = fragment_run_entry_table[0]['first']
-    for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)):
-        res.append((1, frag_number))
+    fragments_counter = itertools.count(first_frag_number)
+    for segment, fragments_count in segment_run_table['segment_run']:
+        for _ in range(fragments_count):
+            res.append((segment, next(fragments_counter)))
     return res
 
 
     return res
 
 
-def write_flv_header(stream, metadata):
-    """Writes the FLV header and the metadata to stream"""
+def write_unsigned_int(stream, val):
+    stream.write(struct_pack('!I', val))
+
+
+def write_unsigned_int_24(stream, val):
+    stream.write(struct_pack('!I', val)[1:])
+
+
+def write_flv_header(stream):
+    """Writes the FLV header to stream"""
     # FLV header
     stream.write(b'FLV\x01')
     stream.write(b'\x05')
     stream.write(b'\x00\x00\x00\x09')
     # FLV header
     stream.write(b'FLV\x01')
     stream.write(b'\x05')
     stream.write(b'\x00\x00\x00\x09')
-    # FLV File body
     stream.write(b'\x00\x00\x00\x00')
     stream.write(b'\x00\x00\x00\x00')
-    # FLVTAG
-    # Script data
-    stream.write(b'\x12')
-    # Size of the metadata with 3 bytes
-    stream.write(pack('!L', len(metadata))[1:])
-    stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
-    stream.write(metadata)
-    # Magic numbers extracted from the output files produced by AdobeHDS.php
-    #(https://github.com/K-S-V/Scripts)
-    stream.write(b'\x00\x00\x01\x73')
+
+
+def write_metadata_tag(stream, metadata):
+    """Writes optional metadata tag to stream"""
+    SCRIPT_TAG = b'\x12'
+    FLV_TAG_HEADER_LEN = 11
+
+    if metadata:
+        stream.write(SCRIPT_TAG)
+        write_unsigned_int_24(stream, len(metadata))
+        stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
+        stream.write(metadata)
+        write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
 
 
 def _add_ns(prop):
 
 
 def _add_ns(prop):
@@ -220,25 +232,58 @@ class F4mFD(FileDownloader):
 
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
 
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
+        requested_bitrate = info_dict.get('tbr')
         self.to_screen('[download] Downloading f4m manifest')
         manifest = self.ydl.urlopen(man_url).read()
         self.report_destination(filename)
         self.to_screen('[download] Downloading f4m manifest')
         manifest = self.ydl.urlopen(man_url).read()
         self.report_destination(filename)
-        http_dl = HttpQuietDownloader(self.ydl, {'continuedl': True, 'quiet': True, 'noprogress': True})
+        http_dl = HttpQuietDownloader(
+            self.ydl,
+            {
+                'continuedl': True,
+                'quiet': True,
+                'noprogress': True,
+                'ratelimit': self.params.get('ratelimit', None),
+                'test': self.params.get('test', False),
+            }
+        )
 
         doc = etree.fromstring(manifest)
         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
 
         doc = etree.fromstring(manifest)
         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
-        formats = sorted(formats, key=lambda f: f[0])
-        rate, media = formats[-1]
+        if requested_bitrate is None:
+            # get the best format
+            formats = sorted(formats, key=lambda f: f[0])
+            rate, media = formats[-1]
+        else:
+            rate, media = list(filter(
+                lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
         base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
         base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
-        bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
-        metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
+        bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
+        if bootstrap_node.text is None:
+            bootstrap_url = compat_urlparse.urljoin(
+                base_url, bootstrap_node.attrib['url'])
+            bootstrap = self.ydl.urlopen(bootstrap_url).read()
+        else:
+            bootstrap = base64.b64decode(bootstrap_node.text)
+        metadata_node = media.find(_add_ns('metadata'))
+        if metadata_node is not None:
+            metadata = base64.b64decode(metadata_node.text)
+        else:
+            metadata = None
         boot_info = read_bootstrap_info(bootstrap)
         boot_info = read_bootstrap_info(bootstrap)
+
         fragments_list = build_fragments_list(boot_info)
         fragments_list = build_fragments_list(boot_info)
+        if self.params.get('test', False):
+            # We only download the first fragment
+            fragments_list = fragments_list[:1]
         total_frags = len(fragments_list)
         total_frags = len(fragments_list)
+        # For some akamai manifests we'll need to add a query to the fragment url
+        akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
 
         tmpfilename = self.temp_name(filename)
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
 
         tmpfilename = self.temp_name(filename)
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
-        write_flv_header(dest_stream, metadata)
+        write_flv_header(dest_stream)
+        write_metadata_tag(dest_stream, metadata)
 
         # This dict stores the download progress, it's updated by the progress
         # hook
 
         # This dict stores the download progress, it's updated by the progress
         # hook
@@ -251,7 +296,7 @@ class F4mFD(FileDownloader):
         def frag_progress_hook(status):
             frag_total_bytes = status.get('total_bytes', 0)
             estimated_size = (state['downloaded_bytes'] +
         def frag_progress_hook(status):
             frag_total_bytes = status.get('total_bytes', 0)
             estimated_size = (state['downloaded_bytes'] +
-                (total_frags - state['frag_counter']) * frag_total_bytes)
+                              (total_frags - state['frag_counter']) * frag_total_bytes)
             if status['status'] == 'finished':
                 state['downloaded_bytes'] += frag_total_bytes
                 state['frag_counter'] += 1
             if status['status'] == 'finished':
                 state['downloaded_bytes'] += frag_total_bytes
                 state['frag_counter'] += 1
@@ -261,19 +306,21 @@ class F4mFD(FileDownloader):
                 frag_downloaded_bytes = status['downloaded_bytes']
                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
                 frag_progress = self.calc_percent(frag_downloaded_bytes,
                 frag_downloaded_bytes = status['downloaded_bytes']
                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
                 frag_progress = self.calc_percent(frag_downloaded_bytes,
-                    frag_total_bytes)
+                                                  frag_total_bytes)
                 progress = self.calc_percent(state['frag_counter'], total_frags)
                 progress += frag_progress / float(total_frags)
 
             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
             self.report_progress(progress, format_bytes(estimated_size),
                 progress = self.calc_percent(state['frag_counter'], total_frags)
                 progress += frag_progress / float(total_frags)
 
             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
             self.report_progress(progress, format_bytes(estimated_size),
-                status.get('speed'), eta)
+                                 status.get('speed'), eta)
         http_dl.add_progress_hook(frag_progress_hook)
 
         frags_filenames = []
         for (seg_i, frag_i) in fragments_list:
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             url = base_url + name
         http_dl.add_progress_hook(frag_progress_hook)
 
         frags_filenames = []
         for (seg_i, frag_i) in fragments_list:
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             url = base_url + name
+            if akamai_pv:
+                url += '?' + akamai_pv.strip(';')
             frag_filename = '%s-%s' % (tmpfilename, name)
             success = http_dl.download(frag_filename, {'url': url})
             if not success:
             frag_filename = '%s-%s' % (tmpfilename, name)
             success = http_dl.download(frag_filename, {'url': url})
             if not success:
@@ -288,6 +335,7 @@ class F4mFD(FileDownloader):
                         break
             frags_filenames.append(frag_filename)
 
                         break
             frags_filenames.append(frag_filename)
 
+        dest_stream.close()
         self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
 
         self.try_rename(tmpfilename, filename)
         self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
 
         self.try_rename(tmpfilename, filename)