[downloader/hls] Add support for AES-128 encrypted segments in hlsnative downloader
authorremitamine <remitamine@gmail.com>
Sun, 10 Jan 2016 19:09:53 +0000 (20:09 +0100)
committerRemita Amine <remitamine@gmail.com>
Sun, 19 Jun 2016 00:01:40 +0000 (01:01 +0100)
youtube_dl/downloader/hls.py
youtube_dl/extractor/common.py
youtube_dl/utils.py

index 54f2108e964b5eb70609b15cc6e0589036931235..1d5f178a036cdffc60b00b6a2b6284cc7fb20c89 100644 (file)
@@ -2,14 +2,24 @@ from __future__ import unicode_literals
 
 import os.path
 import re
+import binascii
+try:
+    from Crypto.Cipher import AES
+    can_decrypt_frag = True
+except ImportError:
+    can_decrypt_frag = False
 
 from .fragment import FragmentFD
 from .external import FFmpegFD
 
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_struct_pack,
+)
 from ..utils import (
     encodeFilename,
     sanitize_open,
+    parse_m3u8_attributes,
 )
 
 
@@ -21,7 +31,7 @@ class HlsFD(FragmentFD):
     @staticmethod
     def can_download(manifest):
         UNSUPPORTED_FEATURES = (
-            r'#EXT-X-KEY:METHOD=(?!NONE)',  # encrypted streams [1]
+            r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1]
             r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
 
             # Live streams heuristic does not always work (e.g. geo restricted to Germany
@@ -39,7 +49,9 @@ class HlsFD(FragmentFD):
             # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
             # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
         )
-        return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
+        check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
+        check_results.append(not (re.search(r'#EXT-X-KEY:METHOD=AES-128', manifest) and not can_decrypt_frag))
+        return all(check_results)
 
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
@@ -57,36 +69,58 @@ class HlsFD(FragmentFD):
                 fd.add_progress_hook(ph)
             return fd.real_download(filename, info_dict)
 
-        fragment_urls = []
+        total_frags = 0
         for line in s.splitlines():
             line = line.strip()
             if line and not line.startswith('#'):
-                segment_url = (
-                    line
-                    if re.match(r'^https?://', line)
-                    else compat_urlparse.urljoin(man_url, line))
-                fragment_urls.append(segment_url)
-                # We only download the first fragment during the test
-                if self.params.get('test', False):
-                    break
+                total_frags += 1
 
         ctx = {
             'filename': filename,
-            'total_frags': len(fragment_urls),
+            'total_frags': total_frags,
         }
 
         self._prepare_and_start_frag_download(ctx)
 
+        i = 0
+        media_sequence = 0
+        decrypt_info = {'METHOD': 'NONE'}
         frags_filenames = []
-        for i, frag_url in enumerate(fragment_urls):
-            frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
-            success = ctx['dl'].download(frag_filename, {'url': frag_url})
-            if not success:
-                return False
-            down, frag_sanitized = sanitize_open(frag_filename, 'rb')
-            ctx['dest_stream'].write(down.read())
-            down.close()
-            frags_filenames.append(frag_sanitized)
+        for line in s.splitlines():
+            line = line.strip()
+            if line:
+                if not line.startswith('#'):
+                    frag_url = (
+                        line
+                        if re.match(r'^https?://', line)
+                        else compat_urlparse.urljoin(man_url, line))
+                    frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
+                    success = ctx['dl'].download(frag_filename, {'url': frag_url})
+                    if not success:
+                        return False
+                    down, frag_sanitized = sanitize_open(frag_filename, 'rb')
+                    frag_content = down.read()
+                    down.close()
+                    if decrypt_info['METHOD'] == 'AES-128':
+                        iv = decrypt_info.get('IV') or compat_struct_pack(">8xq", media_sequence)
+                        frag_content = AES.new(decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
+                    ctx['dest_stream'].write(frag_content)
+                    frags_filenames.append(frag_sanitized)
+                    # We only download the first fragment during the test
+                    if self.params.get('test', False):
+                        break
+                    i += 1
+                    media_sequence += 1
+                elif line.startswith('#EXT-X-KEY'):
+                    decrypt_info = parse_m3u8_attributes(line[11:])
+                    if decrypt_info['METHOD'] == 'AES-128':
+                        if 'IV' in decrypt_info:
+                            decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
+                        if not re.match(r'^https?://', decrypt_info['URI']):
+                            decrypt_info['URI'] = compat_urlparse.urljoin(man_url, decrypt_info['URI'])
+                        decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
+                elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
+                    media_sequence = int(line[22:])
 
         self._finish_frag_download(ctx)
 
index bfd4321605a906862987063b243703445136dc5e..5a2603b509244810e5a19d8adb2b124d3a5c2d78 100644 (file)
@@ -53,6 +53,7 @@ from ..utils import (
     mimetype2ext,
     update_Request,
     update_url_query,
+    parse_m3u8_attributes,
 )
 
 
@@ -1150,23 +1151,11 @@ class InfoExtractor(object):
             }]
         last_info = None
         last_media = None
-        kv_rex = re.compile(
-            r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
         for line in m3u8_doc.splitlines():
             if line.startswith('#EXT-X-STREAM-INF:'):
-                last_info = {}
-                for m in kv_rex.finditer(line):
-                    v = m.group('val')
-                    if v.startswith('"'):
-                        v = v[1:-1]
-                    last_info[m.group('key')] = v
+                last_info = parse_m3u8_attributes(line)
             elif line.startswith('#EXT-X-MEDIA:'):
-                last_media = {}
-                for m in kv_rex.finditer(line):
-                    v = m.group('val')
-                    if v.startswith('"'):
-                        v = v[1:-1]
-                    last_media[m.group('key')] = v
+                last_media = parse_m3u8_attributes(line)
             elif line.startswith('#') or not line.strip():
                 continue
             else:
index 82f67f6cdca2345d8dff1a10dfe2444a898362b9..562031fe110a9a6962c6e5efed2bb311f3059979 100644 (file)
@@ -2852,3 +2852,12 @@ def decode_packed_codes(code):
     return re.sub(
         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
         obfucasted_code)
+
+
+def parse_m3u8_attributes(attrib):
+    info = {}
+    for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
+        if val.startswith('"'):
+            val = val[1:-1]
+        info[key] = val
+    return info