[bloomberg] Extract the available formats (closes #2776)

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Mon, 28 Jul 2014 13:25:56 +0000 (15:25 +0200)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Mon, 28 Jul 2014 13:32:38 +0000 (15:32 +0200)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 28 Jul 2014 13:25:56 +0000 (15:25 +0200)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 28 Jul 2014 13:32:38 +0000 (15:32 +0200)
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py

index e6be6ae6c878c9ede7cd2cf3b6be663e22bb8be1..71353f607daead364acbdad83b18b79e61a5bffa 100644 (file)
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
  
      def real_download(self, filename, info_dict):
          man_url = info_dict['url']
+        requested_bitrate = info_dict.get('tbr')
          self.to_screen('[download] Downloading f4m manifest')
          manifest = self.ydl.urlopen(man_url).read()
          self.report_destination(filename)
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
  
          doc = etree.fromstring(manifest)
          formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
-        formats = sorted(formats, key=lambda f: f[0])
-        rate, media = formats[-1]
+        if requested_bitrate is None:
+            # get the best format
+            formats = sorted(formats, key=lambda f: f[0])
+            rate, media = formats[-1]
+        else:
+            rate, media = list(filter(
+                lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
          base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
          bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
          metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py

index 25fb79e146b18f50962ba506d01560fbd845dbf2..c51a97ce4327cff934216927948587131dedfa80 100644 (file)
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        'md5': '7bf08858ff7c203c870e8a6190e221e5',
+        # The md5 checksum changes
          'info_dict': {
              'id': 'qurhIVlJSB6hzkVi229d8g',
              'ext': 'flv',
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
          return {
              'id': name.split('-')[-1],
              'title': title,
-            'url': f4m_url,
-            'ext': 'flv',
+            'formats': self._extract_f4m_formats(f4m_url, name),
              'description': self._og_search_description(webpage),
              'thumbnail': self._og_search_thumbnail(webpage),
          }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 52c00186e458fd131e28eb178ea09df8e9d9ce0d..59030e1275ef6778a6462e9a701e40b415d18e09 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
      clean_html,
      compiled_regex_type,
      ExtractorError,
+    int_or_none,
      RegexNotFoundError,
      sanitize_filename,
      unescapeHTML,
@@ -590,6 +591,22 @@ class InfoExtractor(object):
          self.to_screen(msg)
          time.sleep(timeout)
  
+    def _extract_f4m_formats(self, manifest_url, video_id):
+        manifest = self._download_xml(manifest_url, video_id)
+
+        formats = []
+        for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+            formats.append({
+                'url': manifest_url,
+                'ext': 'flv',
+                'tbr': int_or_none(media_el.attrib.get('bitrate')),
+                'width': int_or_none(media_el.attrib.get('width')),
+                'height': int_or_none(media_el.attrib.get('height')),
+            })
+        self._sort_formats(formats)
+
+        return formats
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Mon, 28 Jul 2014 13:25:56 +0000 (15:25 +0200)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Mon, 28 Jul 2014 13:32:38 +0000 (15:32 +0200)
youtube_dl/downloader/f4m.py		patch \| blob \| history
youtube_dl/extractor/bloomberg.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history