Merge remote-tracking branch 'origin/master'

author Philipp Hagemeister <phihag@phihag.de>

Mon, 22 Sep 2014 10:53:41 +0000 (12:53 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Mon, 22 Sep 2014 10:53:41 +0000 (12:53 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Mon, 22 Sep 2014 10:53:41 +0000 (12:53 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Mon, 22 Sep 2014 10:53:41 +0000 (12:53 +0200)
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py

index 71353f607daead364acbdad83b18b79e61a5bffa..b3be16ff15353ed057607064bba2faa3fcdc2fad 100644 (file)
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -16,6 +16,7 @@ from ..utils import (
      format_bytes,
      encodeFilename,
      sanitize_open,
+    xpath_text,
  )
  
  
@@ -251,6 +252,8 @@ class F4mFD(FileDownloader):
              # We only download the first fragment
              fragments_list = fragments_list[:1]
          total_frags = len(fragments_list)
+        # For some akamai manifests we'll need to add a query to the fragment url
+        akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
  
          tmpfilename = self.temp_name(filename)
          (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
@@ -290,6 +293,8 @@ class F4mFD(FileDownloader):
          for (seg_i, frag_i) in fragments_list:
              name = 'Seg%d-Frag%d' % (seg_i, frag_i)
              url = base_url + name
+            if akamai_pv:
+                url += '?' + akamai_pv.strip(';')
              frag_filename = '%s-%s' % (tmpfilename, name)
              success = http_dl.download(frag_filename, {'url': url})
              if not success:
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 8a5eb8cf167d1c04adfc572bd75536ccb8352615..244d222970e7cbb0178695babd85bd7132d972fd 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -249,7 +249,10 @@ from .nosvideo import NosVideoIE
  from .novamov import NovaMovIE
  from .nowness import NownessIE
  from .nowvideo import NowVideoIE
-from .npo import NPOIE
+from .npo import (
+    NPOIE,
+    TegenlichtVproIE,
+)
  from .nrk import (
      NRKIE,
      NRKTVIE,
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py

index d2e4acbada5b99c5c3eac4fe6b966ce77dab1ef9..e75ab7c398604451db54bc9d3afe66e4df074871 100644 (file)
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
-        'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
+        # md5 checksum is not stable
          'info_dict': {
-            'id': 'u1RInQZRN7QJ',
+            'id': 'bTmnLCvIbaaH',
              'ext': 'flv',
              'title': 'I Am a Firefighter',
              'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py

index 7a154e94abfb8c40093705111a25f38e3ce9016e..f36d446d2dd398ac48168f2181299bdfc920797f 100644 (file)
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -7,6 +7,7 @@ from ..utils import (
      unified_strdate,
      parse_duration,
      qualities,
+    url_basename,
  )
  
  
@@ -55,7 +56,9 @@ class NPOIE(InfoExtractor):
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
+        return self._get_info(video_id)
  
+    def _get_info(self, video_id):
          metadata = self._download_json(
              'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
              video_id,
@@ -106,3 +109,30 @@ class NPOIE(InfoExtractor):
              'duration': parse_duration(metadata.get('tijdsduur')),
              'formats': formats,
          }
+
+
+class TegenlichtVproIE(NPOIE):
+    IE_NAME = 'tegenlicht.vpro.nl'
+    _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
+
+    _TESTS = [
+        {
+            'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
+            'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+            'info_dict': {
+                'id': 'VPWON_1169289',
+                'ext': 'm4v',
+                'title': 'Tegenlicht',
+                'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+                'upload_date': '20130225',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        name = url_basename(url)
+        webpage = self._download_webpage(url, name)
+        urn = self._html_search_meta('mediaurn', webpage)
+        info_page = self._download_json(
+            'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
+        return self._get_info(info_page['mid'])
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py

index 34058fd4bc12652ed5332103c81fd83e344d5efd..214990e7a7e974383dc35d3dc4f042cc8d5cfb24 100644 (file)
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -21,7 +21,7 @@ class SBSIE(InfoExtractor):
          'md5': '3150cf278965eeabb5b4cea1c963fe0a',
          'info_dict': {
              'id': '320403011771',
-            'ext': 'flv',
+            'ext': 'mp4',
              'title': 'Dingo Conservation',
              'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
              'thumbnail': 're:http://.*\.jpg',
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index b6b2dba9ca9e6ee02c7dc6b2cf01d3601874a6b2..0be793b1c262ed6c951fa6695de3cf22680d5720 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -5,6 +5,7 @@ import json
  
  from .common import InfoExtractor
  from ..utils import (
+    compat_str,
      ExtractorError,
      xpath_with_ns,
  )
@@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor):
          body = meta.find(_x('smil:body'))
  
          f4m_node = body.find(_x('smil:seq//smil:video'))
-        if f4m_node is not None:
+        if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
              f4m_url = f4m_node.attrib['src']
              if 'manifest.f4m?' not in f4m_url:
                  f4m_url += '?'
              # the parameters are from syfy.com, other sites may use others,
              # they also work for nbc.com
              f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
-            formats = [{
-                'ext': 'flv',
-                'url': f4m_url,
-            }]
+            formats = self._extract_f4m_formats(f4m_url, video_id)
          else:
-            base_url = head.find(_x('smil:meta')).attrib['base']
-            switch = body.find(_x('smil:switch'))
              formats = []
-            for f in switch.findall(_x('smil:video')):
-                attr = f.attrib
-                width = int(attr['width'])
-                height = int(attr['height'])
-                vbr = int(attr['system-bitrate']) // 1000
-                format_id = '%dx%d_%dk' % (width, height, vbr)
-                formats.append({
-                    'format_id': format_id,
-                    'url': base_url,
-                    'play_path': 'mp4:' + attr['src'],
-                    'ext': 'flv',
-                    'width': width,
-                    'height': height,
-                    'vbr': vbr,
-                })
+            switch = body.find(_x('smil:switch'))
+            if switch is not None:
+                base_url = head.find(_x('smil:meta')).attrib['base']
+                for f in switch.findall(_x('smil:video')):
+                    attr = f.attrib
+                    width = int(attr['width'])
+                    height = int(attr['height'])
+                    vbr = int(attr['system-bitrate']) // 1000
+                    format_id = '%dx%d_%dk' % (width, height, vbr)
+                    formats.append({
+                        'format_id': format_id,
+                        'url': base_url,
+                        'play_path': 'mp4:' + attr['src'],
+                        'ext': 'flv',
+                        'width': width,
+                        'height': height,
+                        'vbr': vbr,
+                    })
+            else:
+                switch = body.find(_x('smil:seq//smil:switch'))
+                for f in switch.findall(_x('smil:video')):
+                    attr = f.attrib
+                    vbr = int(attr['system-bitrate']) // 1000
+                    formats.append({
+                        'format_id': compat_str(vbr),
+                        'url': attr['src'],
+                        'vbr': vbr,
+                    })
              self._sort_formats(formats)
  
          return {
author	Philipp Hagemeister <phihag@phihag.de>
	Mon, 22 Sep 2014 10:53:41 +0000 (12:53 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Mon, 22 Sep 2014 10:53:41 +0000 (12:53 +0200)
youtube_dl/downloader/f4m.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/nbc.py		patch \| blob \| history
youtube_dl/extractor/npo.py		patch \| blob \| history
youtube_dl/extractor/sbs.py		patch \| blob \| history
youtube_dl/extractor/theplatform.py		patch \| blob \| history