Merge branch 'ping-viki-shows'

author Sergey M․ <dstftw@gmail.com>

Wed, 20 May 2015 16:10:06 +0000 (22:10 +0600)

committer Sergey M․ <dstftw@gmail.com>

Wed, 20 May 2015 16:10:06 +0000 (22:10 +0600)
author Sergey M․ <dstftw@gmail.com>
Wed, 20 May 2015 16:10:06 +0000 (22:10 +0600)
committer Sergey M․ <dstftw@gmail.com>
Wed, 20 May 2015 16:10:06 +0000 (22:10 +0600)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 43fbe8b1d75bf260dd98d29ee7cb029b4c373365..a4879bd9a1a4b5221e824927e0ef0ec4d6c9f734 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -142,6 +142,7 @@
   - **Eporner**
   - **EroProfile**
   - **Escapist**
+ - **ESPN** (Currently broken)
   - **EveryonesMixtape**
   - **exfm**: ex.fm
   - **ExpoTV**
@@ -338,6 +339,7 @@
   - **OktoberfestTV**
   - **on.aol.com**
   - **Ooyala**
+ - **OoyalaExternal**
   - **OpenFilm**
   - **orf:fm4**: radio FM4
   - **orf:iptv**: iptv.ORF.at
@@ -451,6 +453,7 @@
   - **Spike**
   - **Sport5**
   - **SportBox**
+ - **SportBoxEmbed**
   - **SportDeutschland**
   - **Srf**
   - **SRMediathek**: Saarländischer Rundfunk
@@ -510,6 +513,8 @@
   - **Turbo**
   - **Tutv**
   - **tv.dfb.de**
+ - **TV2**
+ - **TV2Article**
   - **TV4**: tv4.se and tv4play.se
   - **tvigle**: Интернет-телевидение Tvigle.ru
   - **tvp.pl**
diff --git a/test/test_utils.py b/test/test_utils.py

index b401070371bfcea183abc2b08419ddd0c75dd3fe..e13e11b59f7f427e8c082f003c650268895ef6f3 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -621,6 +621,21 @@ Line
  '''
          self.assertEqual(dfxp2srt(dfxp_data), srt_data)
  
+        dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
+            <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+            <body>
+                <div xml:lang="en">
+                    <p begin="0" end="1">The first line</p>
+                </div>
+            </body>
+            </tt>'''
+        srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The first line
+
+'''
+        self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
+
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/tox.ini b/tox.ini

index 00c6e00e3b72c4de21dc725173e3bb60ea5fa55b..cd805fe8ac27481937a1000a5a37412ff4f0d923 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -4,6 +4,8 @@ envlist = py26,py27,py33,py34
  deps =
     nose
     coverage
+# We need a valid $HOME for test_compat_expanduser
+passenv = HOME
  defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
      --exclude test_subtitles.py --exclude test_write_annotations.py
      --exclude test_youtube_lists.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 5df889945947eda4ae1b8a152a1e325c9cb56936..58b34e087b421474112719ffb3389b252e560313 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1368,7 +1368,7 @@ class YoutubeDL(object):
                          postprocessors = []
                          self.report_warning('You have requested multiple '
                                              'formats but ffmpeg or avconv are not installed.'
-                                            ' The formats won\'t be merged')
+                                            ' The formats won\'t be merged.')
                      else:
                          postprocessors = [merger]
  
@@ -1395,8 +1395,8 @@ class YoutubeDL(object):
                      requested_formats = info_dict['requested_formats']
                      if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
                          info_dict['ext'] = 'mkv'
-                        self.report_warning('You have requested formats incompatible for merge. '
-                                            'The formats will be merged into mkv')
+                        self.report_warning(
+                            'Requested formats are incompatible for merge and will be merged into mkv.')
                      # Ensure filename always has a correct extension for successful merge
                      filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
                      if os.path.exists(encodeFilename(filename)):
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 173e9a15515a2a8325d7e86bb053bb3f3beded5e..24efb7ce50e27e371c84d1e85b3b6314f0624026 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -572,6 +572,10 @@ from .tumblr import TumblrIE
  from .tunein import TuneInIE
  from .turbo import TurboIE
  from .tutv import TutvIE
+from .tv2 import (
+    TV2IE,
+    TV2ArticleIE,
+)
  from .tv4 import TV4IE
  from .tvigle import TvigleIE
  from .tvp import TvpIE, TvpSeriesIE
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 65bb7708638a20a7e162c8ad8a06321434461ae6..cecf917ffb67040739bbacf573297ba891ec9ea1 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -786,8 +786,8 @@ class InfoExtractor(object):
              return True
          except ExtractorError as e:
              if isinstance(e.cause, compat_HTTPError):
-                self.report_warning(
-                    '%s URL is invalid, skipping' % item, video_id)
+                self.to_screen(
+                    '%s: %s URL is invalid, skipping' % (video_id, item))
                  return False
              raise
  
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py

index 65f6ca103973bb25c016ae92fcb551c65def31d1..b107557880345157fcf66cb7e6e1b4a92b25af2e 100644 (file)
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -7,9 +7,9 @@ from ..utils import int_or_none
  
  
  class InstagramIE(InfoExtractor):
-    _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
+    _VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
      _TEST = {
-        'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
+        'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
          'md5': '0d2da106a9d2631273e192b372806516',
          'info_dict': {
              'id': 'aye83DjauH',
@@ -41,11 +41,11 @@ class InstagramIE(InfoExtractor):
  
  
  class InstagramUserIE(InfoExtractor):
-    _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+    _VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
      IE_DESC = 'Instagram user profile'
      IE_NAME = 'instagram:user'
      _TEST = {
-        'url': 'http://instagram.com/porsche',
+        'url': 'https://instagram.com/porsche',
          'info_dict': {
              'id': 'porsche',
              'title': 'porsche',
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py

index 1484ac0d267697dceb34c9e406e3a26b26a37f54..da896caf160f6f0a0ae49167ae546cd9da4d45fa 100644 (file)
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -50,9 +50,7 @@ class LetvIE(InfoExtractor):
              'title': '与龙共舞 完整版',
              'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
          },
-        'params': {
-            'cn_verification_proxy': 'http://proxy.uku.im:8888'
-        },
+        'skip': 'Only available in China',
      }]
  
      @staticmethod
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py

index 13113820bc4c820012bd618104c1b192897a6985..b540033e25a8c8e033f9d7f7b49d6b6ae46f755b 100644 (file)
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dl/extractor/qqmusic.py
@@ -26,7 +26,7 @@ class QQMusicIE(InfoExtractor):
              'title': '可惜没如果',
              'upload_date': '20141227',
              'creator': '林俊杰',
-            'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
+            'description': 'md5:d327722d0361576fde558f1ac68a7065',
          }
      }]
  
@@ -60,6 +60,8 @@ class QQMusicIE(InfoExtractor):
          lrc_content = self._html_search_regex(
              r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
              detail_info_page, 'LRC lyrics', default=None)
+        if lrc_content:
+            lrc_content = lrc_content.replace('\\n', '\n')
  
          guid = self.m_r_get_ruin()
  
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py

index 3073e5e866150048523dc4cbe65e66c57661e3a3..d4bd1a0d72624e65bc9a5890f371b492451a9486 100644 (file)
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -1,7 +1,6 @@
  # -*- coding: utf-8 -*-
  from __future__ import unicode_literals
  
-import json
  import re
  from .common import InfoExtractor
  from ..utils import (
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py

index eab4adfca46f3686ac7d23ca581681e56d5066f1..29bd9ce6f76247b5ac997050075c7e720d8a3b2b 100644 (file)
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -23,9 +23,7 @@ class SohuIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'MV：Far East Movement《The Illest》',
          },
-        'params': {
-            'cn_verification_proxy': 'proxy.uku.im:8888'
-        }
+        'skip': 'On available in China',
      }, {
          'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
          'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py

new file mode 100644 (file)

index 0000000..fa338b9
--- /dev/null
+++ b/youtube_dl/extractor/tv2.py
@@ -0,0 +1,126 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    float_or_none,
+    parse_iso8601,
+    remove_end,
+)
+
+
+class TV2IE(InfoExtractor):
+    _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.tv2.no/v/916509/',
+        'md5': '9cb9e3410b18b515d71892f27856e9b1',
+        'info_dict': {
+            'id': '916509',
+            'ext': 'flv',
+            'title': 'Se Gryttens hyllest av Steven Gerrard',
+            'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
+            'timestamp': 1431715610,
+            'upload_date': '20150515',
+            'duration': 156.967,
+            'view_count': int,
+            'categories': list,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        formats = []
+        format_urls = []
+        for protocol in ('HDS', 'HLS'):
+            data = self._download_json(
+                'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
+                video_id, 'Downloading play JSON')['playback']
+            for item in data['items']['item']:
+                video_url = item.get('url')
+                if not video_url or video_url in format_urls:
+                    continue
+                format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
+                if not self._is_valid_url(video_url, video_id, format_id):
+                    continue
+                format_urls.append(video_url)
+                ext = determine_ext(video_url)
+                if ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        video_url, video_id, f4m_id=format_id))
+                elif ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', m3u8_id=format_id))
+                elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
+                    pass
+                else:
+                    formats.append({
+                        'url': video_url,
+                        'format_id': format_id,
+                        'tbr': int_or_none(item.get('bitrate')),
+                        'filesize': int_or_none(item.get('fileSize')),
+                    })
+        self._sort_formats(formats)
+
+        asset = self._download_json(
+            'http://sumo.tv2.no/api/web/asset/%s.json' % video_id,
+            video_id, 'Downloading metadata JSON')['asset']
+
+        title = asset['title']
+        description = asset.get('description')
+        timestamp = parse_iso8601(asset.get('createTime'))
+        duration = float_or_none(asset.get('accurateDuration') or asset.get('duration'))
+        view_count = int_or_none(asset.get('views'))
+        categories = asset.get('keywords', '').split(',')
+
+        thumbnails = [{
+            'id': thumbnail.get('@type'),
+            'url': thumbnail.get('url'),
+        } for _, thumbnail in asset.get('imageVersions', {}).items()]
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnails': thumbnails,
+            'timestamp': timestamp,
+            'duration': duration,
+            'view_count': view_count,
+            'categories': categories,
+            'formats': formats,
+        }
+
+
+class TV2ArticleIE(InfoExtractor):
+    _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
+        'info_dict': {
+            'id': '6930542',
+            'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret',
+            'description': 'md5:339573779d3eea3542ffe12006190954',
+        },
+        'playlist_count': 2,
+    }, {
+        'url': 'http://www.tv2.no/a/6930542',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2')
+            for video_id in re.findall(r'data-assetid="(\d+)"', webpage)]
+
+        title = remove_end(self._og_search_title(webpage), ' - TV2.no')
+        description = remove_end(self._og_search_description(webpage), ' - TV2.no')
+
+        return self.playlist_result(entries, playlist_id, title, description)
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py

index 96c809eaf7155290210e0f8b18d3a2c7c948ba97..c4751050ec60901c2750b2f1692059f6246e23dc 100644 (file)
--- a/youtube_dl/extractor/ultimedia.py
+++ b/youtube_dl/extractor/ultimedia.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
  from ..utils import (
      ExtractorError,
      qualities,
@@ -44,9 +45,9 @@ class UltimediaIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        deliver_url = self._search_regex(
-            r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
-            webpage, 'deliver URL')
+        deliver_url = self._proto_relative_url(self._search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
+            webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':')
  
          deliver_page = self._download_webpage(
              deliver_url, video_id, 'Downloading iframe page')
@@ -57,7 +58,8 @@ class UltimediaIE(InfoExtractor):
  
          player = self._parse_json(
              self._search_regex(
-                r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
+                r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on",
+                deliver_page, 'player'),
              video_id)
  
          quality = qualities(['flash', 'html5'])
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py

index 619039e516c96209c38953e6b73f93a6895df54c..15377097e658b20e75a08f19b370be3bef2158c7 100644 (file)
--- a/youtube_dl/extractor/vier.py
+++ b/youtube_dl/extractor/vier.py
@@ -38,11 +38,14 @@ class VierIE(InfoExtractor):
          webpage = self._download_webpage(url, display_id)
  
          video_id = self._search_regex(
-            r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
+            [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
+            webpage, 'video id')
          application = self._search_regex(
-            r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
+            [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
+            webpage, 'application', default='vier_vod')
          filename = self._search_regex(
-            r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
+            [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
+            webpage, 'filename')
  
          playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
          formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py

index c3fde53f5ef06a56b54e94b20b72a7e98c1992a5..a6d9b5fee1f4864d82c7f8bb83e87884c96afe3b 100644 (file)
--- a/youtube_dl/extractor/vuclip.py
+++ b/youtube_dl/extractor/vuclip.py
@@ -49,7 +49,7 @@ class VuClipIE(InfoExtractor):
          links_code = self._search_regex(
              r'''(?xs)
                  (?:
-                    <img\s+src="/im/play.gif".*?>|
+                    <img\s+src="[^"]*/play.gif".*?>|
                      <!--\ player\ end\ -->\s*</div><!--\ thumb\ end-->
                  )
                  (.*?)
diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py

index 1eb24a3d67ffa92838ce41301b3b47d401482609..faa167e65861af3bb4803ab96fe931c15597dc00 100644 (file)
--- a/youtube_dl/extractor/vulture.py
+++ b/youtube_dl/extractor/vulture.py
@@ -44,7 +44,7 @@ class VultureIE(InfoExtractor):
          query_webpage = self._download_webpage(
              query_url, display_id, note='Downloading query page')
          params_json = self._search_regex(
-            r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n',
+            r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n',
              query_webpage,
              'player params')
          params = json.loads(params_json)
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py

index d6dec25ca9e7bb9de539e89c147e22b7381e3719..f69d46a2858077ed76ec9c8fc86166668f27c705 100644 (file)
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -37,7 +37,8 @@ class WimpIE(InfoExtractor):
          video_id = mobj.group(1)
          webpage = self._download_webpage(url, video_id)
          video_url = self._search_regex(
-            r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL')
+            [r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
+            webpage, 'video URL')
          if YoutubeIE.suitable(video_url):
              self.to_screen('Found YouTube video')
              return {
diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py

index 8c6241aedf7249343a725ab705968d0af963294a..7c9d8af6f2585207347d58d08fc607ebf4d28900 100644 (file)
--- a/youtube_dl/extractor/xminus.py
+++ b/youtube_dl/extractor/xminus.py
@@ -43,7 +43,7 @@ class XMinusIE(InfoExtractor):
              r'minus_track\.dur_sec=\'([0-9]*?)\'',
              webpage, 'duration', fatal=False))
          filesize_approx = parse_filesize(self._html_search_regex(
-            r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
+            r'<div id="finfo"[^>]*>\s*↓\s*([0-9.]+\s*[a-zA-Z][bB])',
              webpage, 'approximate filesize', fatal=False))
          tbr = int_or_none(self._html_search_regex(
              r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
@@ -58,7 +58,7 @@ class XMinusIE(InfoExtractor):
              description = re.sub(' *\r *', '\n', description)
  
          enc_token = self._html_search_regex(
-            r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
+            r'minus_track\.s?tkn="(.+?)"', webpage, 'enc_token')
          token = ''.join(
              c if pos == 3 else compat_chr(compat_ord(c) - 1)
              for pos, c in enumerate(reversed(enc_token)))
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index bf4e659ac6981c77f7e5f3c77578c4808634d766..f9afbdbab611e233c7f7014ae7d66e996f2b7c31 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -15,6 +15,7 @@ from ..utils import (
      unescapeHTML,
      ExtractorError,
      int_or_none,
+    mimetype2ext,
  )
  
  from .nbc import NBCSportsVPlayerIE
@@ -236,6 +237,22 @@ class YahooIE(InfoExtractor):
  
          self._sort_formats(formats)
  
+        closed_captions = self._html_search_regex(
+            r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
+            default='[]')
+
+        cc_json = self._parse_json(closed_captions, video_id, fatal=False)
+        subtitles = {}
+        if cc_json:
+            for closed_caption in cc_json:
+                lang = closed_caption['lang']
+                if lang not in subtitles:
+                    subtitles[lang] = []
+                subtitles[lang].append({
+                    'url': closed_caption['url'],
+                    'ext': mimetype2ext(closed_caption['content_type']),
+                })
+
          return {
              'id': video_id,
              'display_id': display_id,
@@ -244,6 +261,7 @@ class YahooIE(InfoExtractor):
              'description': clean_html(meta['description']),
              'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
              'duration': int_or_none(meta.get('duration')),
+            'subtitles': subtitles,
          }
  
  
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index ed9ed9ed63ec9b40d929f83cb2e56ee4d63f9e7f..52d198fa3c2eb36a1a3d41620cd645b90d52f854 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1665,6 +1665,7 @@ def mimetype2ext(mt):
      return {
          'x-ms-wmv': 'wmv',
          'x-mp4-fragmented': 'mp4',
+        'ttml+xml': 'ttml',
      }.get(res, res)
  
  
@@ -1848,9 +1849,9 @@ def dfxp2srt(dfxp_data):
          out = str_or_empty(node.text)
  
          for child in node:
-            if child.tag == _x('ttml:br'):
+            if child.tag in (_x('ttml:br'), 'br'):
                  out += '\n' + str_or_empty(child.tail)
-            elif child.tag == _x('ttml:span'):
+            elif child.tag in (_x('ttml:span'), 'span'):
                  out += str_or_empty(parse_node(child))
              else:
                  out += str_or_empty(xml.etree.ElementTree.tostring(child))
@@ -1859,7 +1860,10 @@ def dfxp2srt(dfxp_data):
  
      dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
      out = []
-    paras = dfxp.findall(_x('.//ttml:p'))
+    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+
+    if not paras:
+        raise ValueError('Invalid dfxp/TTML subtitle')
  
      for para, index in zip(paras, itertools.count(1)):
          begin_time = parse_dfxp_time_expr(para.attrib['begin'])
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 38f00bc9bc2ef476ddb813d7bcd26e5d13f4947d..b333851534e9edd9c75ff70ee4350874530ea8f7 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.05.15'
+__version__ = '2015.05.20'
author	Sergey M․ <dstftw@gmail.com>
	Wed, 20 May 2015 16:10:06 +0000 (22:10 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Wed, 20 May 2015 16:10:06 +0000 (22:10 +0600)
docs/supportedsites.md		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
tox.ini		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/instagram.py		patch \| blob \| history
youtube_dl/extractor/letv.py		patch \| blob \| history
youtube_dl/extractor/qqmusic.py		patch \| blob \| history
youtube_dl/extractor/sbs.py		patch \| blob \| history
youtube_dl/extractor/sohu.py		patch \| blob \| history
youtube_dl/extractor/tv2.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ultimedia.py		patch \| blob \| history
youtube_dl/extractor/vier.py		patch \| blob \| history
youtube_dl/extractor/vuclip.py		patch \| blob \| history
youtube_dl/extractor/vulture.py		patch \| blob \| history
youtube_dl/extractor/wimp.py		patch \| blob \| history
youtube_dl/extractor/xminus.py		patch \| blob \| history
youtube_dl/extractor/yahoo.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history