Merge branch 'karrierevideos' of https://github.com/misterhat/youtube-dl into misterh...

author Sergey M․ <dstftw@gmail.com>

Sun, 24 May 2015 13:19:48 +0000 (19:19 +0600)

committer Sergey M․ <dstftw@gmail.com>

Sun, 24 May 2015 13:19:48 +0000 (19:19 +0600)
author Sergey M․ <dstftw@gmail.com>
Sun, 24 May 2015 13:19:48 +0000 (19:19 +0600)
committer Sergey M․ <dstftw@gmail.com>
Sun, 24 May 2015 13:19:48 +0000 (19:19 +0600)
diff --git a/AUTHORS b/AUTHORS

index 267b8da1e6ffbda7853d53996de4020111074834..ebed7ebb3a3222492111d7579ae5bdd10096cf6b 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -124,3 +124,4 @@ Mohammad Teimori Pabandi
  Roman Le Négrate
  Matthias Küch
  Julian Richen
+Ping O.
diff --git a/README.md b/README.md

index 3d9436456c4bd29d11bf160efa27769b05edde2d..e51bb534341e389a26a466f1fb4c3ef721731016 100644 (file)
--- a/README.md
+++ b/README.md
@@ -17,12 +17,12 @@ youtube-dl - download videos from youtube.com or other video platforms
  To install it right away for all UNIX users (Linux, OS X, etc.), type:
  
      sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
-    sudo chmod a+x /usr/local/bin/youtube-dl
+    sudo chmod a+rx /usr/local/bin/youtube-dl
  
  If you do not have curl, you can alternatively use a recent wget:
  
      sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
-    sudo chmod a+x /usr/local/bin/youtube-dl
+    sudo chmod a+rx /usr/local/bin/youtube-dl
  
  Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
  
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 43fbe8b1d75bf260dd98d29ee7cb029b4c373365..a4879bd9a1a4b5221e824927e0ef0ec4d6c9f734 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -142,6 +142,7 @@
   - **Eporner**
   - **EroProfile**
   - **Escapist**
+ - **ESPN** (Currently broken)
   - **EveryonesMixtape**
   - **exfm**: ex.fm
   - **ExpoTV**
@@ -338,6 +339,7 @@
   - **OktoberfestTV**
   - **on.aol.com**
   - **Ooyala**
+ - **OoyalaExternal**
   - **OpenFilm**
   - **orf:fm4**: radio FM4
   - **orf:iptv**: iptv.ORF.at
@@ -451,6 +453,7 @@
   - **Spike**
   - **Sport5**
   - **SportBox**
+ - **SportBoxEmbed**
   - **SportDeutschland**
   - **Srf**
   - **SRMediathek**: Saarländischer Rundfunk
@@ -510,6 +513,8 @@
   - **Turbo**
   - **Tutv**
   - **tv.dfb.de**
+ - **TV2**
+ - **TV2Article**
   - **TV4**: tv4.se and tv4play.se
   - **tvigle**: Интернет-телевидение Tvigle.ru
   - **tvp.pl**
diff --git a/test/test_utils.py b/test/test_utils.py

index b401070371bfcea183abc2b08419ddd0c75dd3fe..e13e11b59f7f427e8c082f003c650268895ef6f3 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -621,6 +621,21 @@ Line
  '''
          self.assertEqual(dfxp2srt(dfxp_data), srt_data)
  
+        dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
+            <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+            <body>
+                <div xml:lang="en">
+                    <p begin="0" end="1">The first line</p>
+                </div>
+            </body>
+            </tt>'''
+        srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The first line
+
+'''
+        self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
+
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/tox.ini b/tox.ini

index 00c6e00e3b72c4de21dc725173e3bb60ea5fa55b..cd805fe8ac27481937a1000a5a37412ff4f0d923 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -4,6 +4,8 @@ envlist = py26,py27,py33,py34
  deps =
     nose
     coverage
+# We need a valid $HOME for test_compat_expanduser
+passenv = HOME
  defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
      --exclude test_subtitles.py --exclude test_write_annotations.py
      --exclude test_youtube_lists.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 5df889945947eda4ae1b8a152a1e325c9cb56936..d1953c18f39b438740aec88a1aadf4d529a8e0b4 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1368,7 +1368,7 @@ class YoutubeDL(object):
                          postprocessors = []
                          self.report_warning('You have requested multiple '
                                              'formats but ffmpeg or avconv are not installed.'
-                                            ' The formats won\'t be merged')
+                                            ' The formats won\'t be merged.')
                      else:
                          postprocessors = [merger]
  
@@ -1395,8 +1395,8 @@ class YoutubeDL(object):
                      requested_formats = info_dict['requested_formats']
                      if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
                          info_dict['ext'] = 'mkv'
-                        self.report_warning('You have requested formats incompatible for merge. '
-                                            'The formats will be merged into mkv')
+                        self.report_warning(
+                            'Requested formats are incompatible for merge and will be merged into mkv.')
                      # Ensure filename always has a correct extension for successful merge
                      filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
                      if os.path.exists(encodeFilename(filename)):
@@ -1527,6 +1527,7 @@ class YoutubeDL(object):
              pps_chain.extend(ie_info['__postprocessors'])
          pps_chain.extend(self._pps)
          for pp in pps_chain:
+            files_to_delete = []
              try:
                  files_to_delete, info = pp.run(info)
              except PostProcessingError as e:
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py

index 07224d5084158184ac30c927b1b79802d398c336..7817adcfdd546f70cfb76e0634b8df8ddbcaf8e0 100644 (file)
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -152,7 +152,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
      """
      NONCE_LENGTH_BYTES = 8
  
-    data = bytes_to_intlist(base64.b64decode(data))
+    data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
      password = bytes_to_intlist(password.encode('utf-8'))
  
      key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index d131d3ec31645e30611be62180d95e488ebbdcc6..79bcd910666baf7a9802dc50f7ae072827976b52 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -339,8 +339,7 @@ from .newstube import NewstubeIE
  from .nextmedia import (
      NextMediaIE,
      NextMediaActionNewsIE,
-    AppleDailyRealtimeNewsIE,
-    AppleDailyAnimationNewsIE
+    AppleDailyIE,
  )
  from .nfb import NFBIE
  from .nfl import NFLIE
@@ -573,7 +572,10 @@ from .tumblr import TumblrIE
  from .tunein import TuneInIE
  from .turbo import TurboIE
  from .tutv import TutvIE
-from .tv2 import TV2IE
+from .tv2 import (
+    TV2IE,
+    TV2ArticleIE,
+)
  from .tv4 import TV4IE
  from .tvigle import TvigleIE
  from .tvp import TvpIE, TvpSeriesIE
@@ -645,7 +647,10 @@ from .vine import (
      VineIE,
      VineUserIE,
  )
-from .viki import VikiIE
+from .viki import (
+    VikiIE,
+    VikiChannelIE,
+)
  from .vk import (
      VKIE,
      VKUserVideosIE,
diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py

index c922f695905d70e4052ddfa5c8f336c01221413b..0206d96db4670fb29a40353839dae15911b9c6d3 100644 (file)
--- a/youtube_dl/extractor/chilloutzone.py
+++ b/youtube_dl/extractor/chilloutzone.py
@@ -57,7 +57,7 @@ class ChilloutzoneIE(InfoExtractor):
  
          base64_video_info = self._html_search_regex(
              r'var cozVidData = "(.+?)";', webpage, 'video data')
-        decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
+        decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8')
          video_info_dict = json.loads(decoded_video_info)
  
          # get video information from dict
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py

index 5efc5f4fe556a4424542b441a83f2d6dbd5bc8e7..3b1bd4033fd1c01986c83ab44cc1cebaa1b19e5b 100644 (file)
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -12,7 +12,7 @@ from ..utils import (
  
  class CNNIE(InfoExtractor):
      _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
-        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
  
      _TESTS = [{
          'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py

index f25ab319e66d4d5b151cd9a9d4509807b6a88617..baa24c6d13abe016cceb83bb927db15d7d300509 100644 (file)
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -1,8 +1,11 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-from .common import InfoExtractor, ExtractorError
-from ..utils import parse_iso8601
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+)
  
  
  class DRTVIE(InfoExtractor):
@@ -60,19 +63,31 @@ class DRTVIE(InfoExtractor):
                  restricted_to_denmark = asset['RestrictedToDenmark']
                  spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
                  for link in asset['Links']:
-                    target = link['Target']
                      uri = link['Uri']
+                    target = link['Target']
                      format_id = target
-                    preference = -1 if target == 'HDS' else -2
+                    preference = None
                      if spoken_subtitles:
-                        preference -= 2
+                        preference = -1
                          format_id += '-spoken-subtitles'
-                    formats.append({
-                        'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
-                        'format_id': format_id,
-                        'ext': link['FileFormat'],
-                        'preference': preference,
-                    })
+                    if target == 'HDS':
+                        formats.extend(self._extract_f4m_formats(
+                            uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
+                            video_id, preference, f4m_id=format_id))
+                    elif target == 'HLS':
+                        formats.extend(self._extract_m3u8_formats(
+                            uri, video_id, 'mp4', preference=preference,
+                            m3u8_id=format_id))
+                    else:
+                        bitrate = link.get('Bitrate')
+                        if bitrate:
+                            format_id += '-%s' % bitrate
+                        formats.append({
+                            'url': uri,
+                            'format_id': format_id,
+                            'tbr': bitrate,
+                            'ext': link.get('FileFormat'),
+                        })
                  subtitles_list = asset.get('SubtitlesList')
                  if isinstance(subtitles_list, list):
                      LANGS = {
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py

index 70f8efe27578c4d43b27378a4a2c80d495a7488c..9a5a8f4bb44039e6c52968801033a3d12a73d835 100644 (file)
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@@ -4,22 +4,28 @@ from .tnaflix import TNAFlixIE
  
  
  class EMPFlixIE(TNAFlixIE):
-    _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
  
      _TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
      _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
      _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
  
-    _TEST = {
-        'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
-        'md5': 'b1bc15b6412d33902d6e5952035fcabc',
-        'info_dict': {
-            'id': '33051',
-            'display_id': 'Amateur-Finger-Fuck',
-            'ext': 'mp4',
-            'title': 'Amateur Finger Fuck',
-            'description': 'Amateur solo finger fucking.',
-            'thumbnail': 're:https?://.*\.jpg$',
-            'age_limit': 18,
+    _TESTS = [
+        {
+            'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
+            'md5': 'b1bc15b6412d33902d6e5952035fcabc',
+            'info_dict': {
+                'id': '33051',
+                'display_id': 'Amateur-Finger-Fuck',
+                'ext': 'mp4',
+                'title': 'Amateur Finger Fuck',
+                'description': 'Amateur solo finger fucking.',
+                'thumbnail': 're:https?://.*\.jpg$',
+                'age_limit': 18,
+            }
+        },
+        {
+            'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
+            'matching_only': True,
          }
-    }
+    ]
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py

index 65f6ca103973bb25c016ae92fcb551c65def31d1..b107557880345157fcf66cb7e6e1b4a92b25af2e 100644 (file)
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -7,9 +7,9 @@ from ..utils import int_or_none
  
  
  class InstagramIE(InfoExtractor):
-    _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
+    _VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
      _TEST = {
-        'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
+        'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
          'md5': '0d2da106a9d2631273e192b372806516',
          'info_dict': {
              'id': 'aye83DjauH',
@@ -41,11 +41,11 @@ class InstagramIE(InfoExtractor):
  
  
  class InstagramUserIE(InfoExtractor):
-    _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+    _VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
      IE_DESC = 'Instagram user profile'
      IE_NAME = 'instagram:user'
      _TEST = {
-        'url': 'http://instagram.com/porsche',
+        'url': 'https://instagram.com/porsche',
          'info_dict': {
              'id': 'porsche',
              'title': 'porsche',
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py

index 1484ac0d267697dceb34c9e406e3a26b26a37f54..da896caf160f6f0a0ae49167ae546cd9da4d45fa 100644 (file)
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -50,9 +50,7 @@ class LetvIE(InfoExtractor):
              'title': '与龙共舞 完整版',
              'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
          },
-        'params': {
-            'cn_verification_proxy': 'http://proxy.uku.im:8888'
-        },
+        'skip': 'Only available in China',
      }]
  
      @staticmethod
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py

index d8897eb90d526b7b7d2e5a5ace5bec84ebb40031..7091f3335e8223ea0a089ba7cff127a983b12d7d 100644 (file)
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -20,7 +20,6 @@ class MiTeleIE(InfoExtractor):
  
      _TESTS = [{
          'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
-        'md5': '6a75fe9d0d3275bead0cb683c616fddb',
          'info_dict': {
              'id': '0fce117d',
              'ext': 'mp4',
@@ -29,6 +28,10 @@ class MiTeleIE(InfoExtractor):
              'display_id': 'programa-144',
              'duration': 2913,
          },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
      }]
  
      def _real_extract(self, url):
@@ -56,12 +59,14 @@ class MiTeleIE(InfoExtractor):
              episode,
              transform_source=strip_jsonp
          )
+        formats = self._extract_m3u8_formats(
+            token_info['tokenizedUrl'], episode, ext='mp4')
  
          return {
              'id': embed_data['videoId'],
              'display_id': episode,
              'title': info_el.find('title').text,
-            'url': token_info['tokenizedUrl'],
+            'formats': formats,
              'description': get_element_by_attribute('class', 'text', webpage),
              'thumbnail': info_el.find('thumb').text,
              'duration': parse_duration(info_el.find('duration').text),
diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py

index 862b706bf96719aa071f1f89c73f2a4ef45a20b1..944096e1ca15de964fcdf896adf988c9aa2264bd 100644 (file)
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -22,6 +22,18 @@ class NBAIE(InfoExtractor):
      }, {
          'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
          'only_matching': True,
+    }, {
+        'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+        'info_dict': {
+            'id': '0041400301-cle-atl-recap.nba',
+            'ext': 'mp4',
+            'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1',
+            'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
+            'duration': 228,
+        },
+        'params': {
+            'skip_download': True,
+        }
      }]
  
      def _real_extract(self, url):
@@ -35,8 +47,12 @@ class NBAIE(InfoExtractor):
              self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
  
          description = self._og_search_description(webpage)
-        duration = parse_duration(
-            self._html_search_meta('duration', webpage, 'duration'))
+        duration_str = self._html_search_meta(
+            'duration', webpage, 'duration', default=None)
+        if not duration_str:
+            duration_str = self._html_search_regex(
+                r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False)
+        duration = parse_duration(duration_str)
  
          return {
              'id': shortened_video_id,
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py

index 02dba4ef639e64deff790f94bd5cd0cd6da2a4cb..d1b7cff4cfbf30c76c52ae98ad247e8907be6abd 100644 (file)
--- a/youtube_dl/extractor/nextmedia.py
+++ b/youtube_dl/extractor/nextmedia.py
@@ -89,8 +89,8 @@ class NextMediaActionNewsIE(NextMediaIE):
          return self._extract_from_nextmedia_page(news_id, url, article_page)
  
  
-class AppleDailyRealtimeNewsIE(NextMediaIE):
-    _VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+class AppleDailyIE(NextMediaIE):
+    _VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
      _TESTS = [{
          'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
          'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
@@ -99,7 +99,7 @@ class AppleDailyRealtimeNewsIE(NextMediaIE):
              'ext': 'mp4',
              'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
              'thumbnail': 're:^https?://.*\.jpg$',
-            'description': 'md5:b23787119933404ce515c6356a8c355c',
+            'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
              'upload_date': '20150128',
          }
      }, {
@@ -110,26 +110,10 @@ class AppleDailyRealtimeNewsIE(NextMediaIE):
              'ext': 'mp4',
              'title': '不滿被踩腳　山東兩大媽一路打下車',
              'thumbnail': 're:^https?://.*\.jpg$',
-            'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d',
+            'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
              'upload_date': '20150128',
          }
-    }]
-
-    _URL_PATTERN = r'\{url: \'(.+)\'\}'
-
-    def _fetch_title(self, page):
-        return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title')
-
-    def _fetch_thumbnail(self, page):
-        return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
-
-    def _fetch_timestamp(self, page):
-        return None
-
-
-class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
-    _VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
-    _TESTS = [{
+    }, {
          'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
          'md5': '03df296d95dedc2d5886debbb80cb43f',
          'info_dict': {
@@ -154,10 +138,22 @@ class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
          'expected_warnings': [
              'video thumbnail',
          ]
+    }, {
+        'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
+        'only_matching': True,
      }]
  
+    _URL_PATTERN = r'\{url: \'(.+)\'\}'
+
      def _fetch_title(self, page):
-        return self._html_search_meta('description', page, 'news title')
+        return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None) or
+                self._html_search_meta('description', page, 'news title'))
+
+    def _fetch_thumbnail(self, page):
+        return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
+
+    def _fetch_timestamp(self, page):
+        return None
  
      def _fetch_description(self, page):
          return self._html_search_meta('description', page, 'news description')
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py

index 7cc7996642cae1de1ca2a585391d167025b92162..255d4abc131519ec470ccdc2b1a64b7d38d9f44b 100644 (file)
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -17,7 +17,7 @@ from ..utils import (
  class ProSiebenSat1IE(InfoExtractor):
      IE_NAME = 'prosiebensat1'
      IE_DESC = 'ProSiebenSat.1 Digital'
-    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at)|ran\.de|fem\.com)/(?P<id>.+)'
  
      _TESTS = [
          {
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py

index 13113820bc4c820012bd618104c1b192897a6985..b540033e25a8c8e033f9d7f7b49d6b6ae46f755b 100644 (file)
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dl/extractor/qqmusic.py
@@ -26,7 +26,7 @@ class QQMusicIE(InfoExtractor):
              'title': '可惜没如果',
              'upload_date': '20141227',
              'creator': '林俊杰',
-            'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
+            'description': 'md5:d327722d0361576fde558f1ac68a7065',
          }
      }]
  
@@ -60,6 +60,8 @@ class QQMusicIE(InfoExtractor):
          lrc_content = self._html_search_regex(
              r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
              detail_info_page, 'LRC lyrics', default=None)
+        if lrc_content:
+            lrc_content = lrc_content.replace('\\n', '\n')
  
          guid = self.m_r_get_ruin()
  
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py

index 849300140ecbf598874d22b090262eabec1e7ea5..82cd98ac742bf436b24fbbc77cac9a6fb8a44ff6 100644 (file)
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -17,7 +17,7 @@ from ..utils import (
  
  
  def _decrypt_url(png):
-    encrypted_data = base64.b64decode(png)
+    encrypted_data = base64.b64decode(png.encode('utf-8'))
      text_index = encrypted_data.find(b'tEXt')
      text_chunk = encrypted_data[text_index - 4:]
      length = struct_unpack('!I', text_chunk[:4])[0]
diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py

index 55604637dca22533cd765529dcb2abfb759fd9c1..d9df0686133a6772deb1e58260069857620afc58 100644 (file)
--- a/youtube_dl/extractor/rutv.py
+++ b/youtube_dl/extractor/rutv.py
@@ -104,7 +104,7 @@ class RUTVIE(InfoExtractor):
      @classmethod
      def _extract_url(cls, webpage):
          mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
          if mobj:
              return mobj.group('url')
  
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py

index 26ced716e8a875f1c4c5c9527b856475dce83f9e..9f3e944e73532a92c5213ce95e7633fe6bc4c212 100644 (file)
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -47,7 +47,7 @@ class SharedIE(InfoExtractor):
          video_url = self._html_search_regex(
              r'data-url="([^"]+)"', video_page, 'video URL')
          title = base64.b64decode(self._html_search_meta(
-            'full:title', webpage, 'title')).decode('utf-8')
+            'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
          filesize = int_or_none(self._html_search_meta(
              'full:size', webpage, 'file size', fatal=False))
          thumbnail = self._html_search_regex(
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py

index eab4adfca46f3686ac7d23ca581681e56d5066f1..29bd9ce6f76247b5ac997050075c7e720d8a3b2b 100644 (file)
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -23,9 +23,7 @@ class SohuIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'MV：Far East Movement《The Illest》',
          },
-        'params': {
-            'cn_verification_proxy': 'proxy.uku.im:8888'
-        }
+        'skip': 'On available in China',
      }, {
          'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
          'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py

index 8686f9d11fa3178eefe3eb71fe5a1413e729beeb..86d509ae5351a3cc15be66dda9f485d63ec166ba 100644 (file)
--- a/youtube_dl/extractor/sportbox.py
+++ b/youtube_dl/extractor/sportbox.py
@@ -6,8 +6,7 @@ import re
  from .common import InfoExtractor
  from ..compat import compat_urlparse
  from ..utils import (
-    parse_duration,
-    parse_iso8601,
+    unified_strdate,
  )
  
  
@@ -20,11 +19,9 @@ class SportBoxIE(InfoExtractor):
              'id': '80822',
              'ext': 'mp4',
              'title': 'Гонка 2  заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн',
-            'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed',
+            'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad',
              'thumbnail': 're:^https?://.*\.jpg$',
-            'timestamp': 1411896237,
              'upload_date': '20140928',
-            'duration': 4846,
          },
          'params': {
              # m3u8 download
@@ -48,17 +45,13 @@ class SportBoxIE(InfoExtractor):
              r'src="/?(vdl/player/[^"]+)"', webpage, 'player')
  
          title = self._html_search_regex(
-            r'<h1 itemprop="name">([^<]+)</h1>', webpage, 'title')
-        description = self._html_search_regex(
-            r'(?s)<div itemprop="description">(.+?)</div>',
-            webpage, 'description', fatal=False)
+            [r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'],
+            webpage, 'title')
+        description = self._og_search_description(webpage) or self._html_search_meta(
+            'description', webpage, 'description')
          thumbnail = self._og_search_thumbnail(webpage)
-        timestamp = parse_iso8601(self._search_regex(
-            r'<span itemprop="uploadDate">([^<]+)</span>',
-            webpage, 'timestamp', fatal=False))
-        duration = parse_duration(self._html_search_regex(
-            r'<meta itemprop="duration" content="PT([^"]+)">',
-            webpage, 'duration', fatal=False))
+        upload_date = unified_strdate(self._html_search_meta(
+            'dateCreated', webpage, 'upload date'))
  
          return {
              '_type': 'url_transparent',
@@ -67,8 +60,7 @@ class SportBoxIE(InfoExtractor):
              'title': title,
              'description': description,
              'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'duration': duration,
+            'upload_date': upload_date,
          }
  
  
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py

index 251a686804b6f26915c3fa25d9f6b2cc1f98ed4b..a0c744fd16b633b08e7bbb632b77cf40e8410710 100644 (file)
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@@ -16,6 +16,10 @@ class TelecincoIE(MiTeleIE):
              'title': 'Con Martín Berasategui, hacer un bacalao al ...',
              'duration': 662,
          },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
      }, {
          'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
          'only_matching': True,
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py

index d48cbbf140054e639f7191acfa0909972ef3ab76..59af9aba06399cefcc6c2049c958dfb3819bb20a 100644 (file)
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@@ -10,26 +10,32 @@ from ..utils import (
  
  
  class TNAFlixIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
  
      _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
      _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
      _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
  
-    _TEST = {
-        'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
-        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
-        'info_dict': {
-            'id': '553878',
-            'display_id': 'Carmella-Decesare-striptease',
-            'ext': 'mp4',
-            'title': 'Carmella Decesare - striptease',
-            'description': '',
-            'thumbnail': 're:https?://.*\.jpg$',
-            'duration': 91,
-            'age_limit': 18,
+    _TESTS = [
+        {
+            'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
+            'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+            'info_dict': {
+                'id': '553878',
+                'display_id': 'Carmella-Decesare-striptease',
+                'ext': 'mp4',
+                'title': 'Carmella Decesare - striptease',
+                'description': '',
+                'thumbnail': 're:https?://.*\.jpg$',
+                'duration': 91,
+                'age_limit': 18,
+            }
+        },
+        {
+            'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
+            'matching_only': True,
          }
-    }
+    ]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py

index 4de0aac523313eced334aab38a9a20c7bf08dfc7..fad720b681e125ac495b26ba3870dbd65340e3ce 100644 (file)
--- a/youtube_dl/extractor/tutv.py
+++ b/youtube_dl/extractor/tutv.py
@@ -26,7 +26,7 @@ class TutvIE(InfoExtractor):
  
          data_content = self._download_webpage(
              'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
-        video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
+        video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8')
  
          return {
              'id': internal_id,
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py

index 2dcc0e971e4bd72aa1038480ce8b410a6a4e8e15..fa338b936de7d3fef15cf24bccc05255bc928ee6 100644 (file)
--- a/youtube_dl/extractor/tv2.py
+++ b/youtube_dl/extractor/tv2.py
@@ -1,12 +1,15 @@
  # encoding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..utils import (
      determine_ext,
      int_or_none,
      float_or_none,
      parse_iso8601,
+    remove_end,
  )
  
  
@@ -91,3 +94,33 @@ class TV2IE(InfoExtractor):
              'categories': categories,
              'formats': formats,
          }
+
+
+class TV2ArticleIE(InfoExtractor):
+    _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
+        'info_dict': {
+            'id': '6930542',
+            'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret',
+            'description': 'md5:339573779d3eea3542ffe12006190954',
+        },
+        'playlist_count': 2,
+    }, {
+        'url': 'http://www.tv2.no/a/6930542',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2')
+            for video_id in re.findall(r'data-assetid="(\d+)"', webpage)]
+
+        title = remove_end(self._og_search_title(webpage), ' - TV2.no')
+        description = remove_end(self._og_search_description(webpage), ' - TV2.no')
+
+        return self.playlist_result(entries, playlist_id, title, description)
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py

index 96c809eaf7155290210e0f8b18d3a2c7c948ba97..c4751050ec60901c2750b2f1692059f6246e23dc 100644 (file)
--- a/youtube_dl/extractor/ultimedia.py
+++ b/youtube_dl/extractor/ultimedia.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
  from ..utils import (
      ExtractorError,
      qualities,
@@ -44,9 +45,9 @@ class UltimediaIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        deliver_url = self._search_regex(
-            r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
-            webpage, 'deliver URL')
+        deliver_url = self._proto_relative_url(self._search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
+            webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':')
  
          deliver_page = self._download_webpage(
              deliver_url, video_id, 'Downloading iframe page')
@@ -57,7 +58,8 @@ class UltimediaIE(InfoExtractor):
  
          player = self._parse_json(
              self._search_regex(
-                r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
+                r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on",
+                deliver_page, 'player'),
              video_id)
  
          quality = qualities(['flash', 'html5'])
diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py

index ececc7ee0118932716ca9bdb06779cf94e6dc0ec..591024eaded0cdddbb0779bd942c0fa8f63d86a6 100644 (file)
--- a/youtube_dl/extractor/videott.py
+++ b/youtube_dl/extractor/videott.py
@@ -43,7 +43,7 @@ class VideoTtIE(InfoExtractor):
  
          formats = [
              {
-                'url': base64.b64decode(res['u']).decode('utf-8'),
+                'url': base64.b64decode(res['u'].encode('utf-8')).decode('utf-8'),
                  'ext': 'flv',
                  'format_id': res['l'],
              } for res in settings['res'] if res['u']
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py

index 619039e516c96209c38953e6b73f93a6895df54c..15377097e658b20e75a08f19b370be3bef2158c7 100644 (file)
--- a/youtube_dl/extractor/vier.py
+++ b/youtube_dl/extractor/vier.py
@@ -38,11 +38,14 @@ class VierIE(InfoExtractor):
          webpage = self._download_webpage(url, display_id)
  
          video_id = self._search_regex(
-            r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
+            [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
+            webpage, 'video id')
          application = self._search_regex(
-            r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
+            [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
+            webpage, 'application', default='vier_vod')
          filename = self._search_regex(
-            r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
+            [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
+            webpage, 'filename')
  
          playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
          formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

index cf6af1e5cdb6315d325d2bd355d384cc283a3e0c..7f2fb1ca8896e29e48a41a9efddaded987ba1e96 100644 (file)
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,29 +1,65 @@
  from __future__ import unicode_literals
  
-import re
+import time
+import hmac
+import hashlib
+import itertools
  
-from ..compat import (
-    compat_urlparse,
-    compat_urllib_request,
-)
  from ..utils import (
      ExtractorError,
-    unescapeHTML,
-    unified_strdate,
-    US_RATINGS,
-    determine_ext,
-    mimetype2ext,
+    int_or_none,
+    parse_age_limit,
+    parse_iso8601,
  )
  from .common import InfoExtractor
  
  
-class VikiIE(InfoExtractor):
-    IE_NAME = 'viki'
+class VikiBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
+    _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
+    _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
+
+    _APP = '65535a'
+    _APP_VERSION = '2.2.5.1428709186'
+    _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
+
+    def _prepare_call(self, path, timestamp=None):
+        path += '?' if '?' not in path else '&'
+        if not timestamp:
+            timestamp = int(time.time())
+        query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+        sig = hmac.new(
+            self._APP_SECRET.encode('ascii'),
+            query.encode('ascii'),
+            hashlib.sha1
+        ).hexdigest()
+        return self._API_URL_TEMPLATE % (query, sig)
+
+    def _call_api(self, path, video_id, note, timestamp=None):
+        resp = self._download_json(
+            self._prepare_call(path, timestamp), video_id, note)
+
+        error = resp.get('error')
+        if error:
+            if error == 'invalid timestamp':
+                resp = self._download_json(
+                    self._prepare_call(path, int(resp['current_timestamp'])),
+                    video_id, '%s (retry)' % note)
+                error = resp.get('error')
+            if error:
+                self._raise_error(resp['error'])
+
+        return resp
  
-    # iPad2
-    _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5'
+    def _raise_error(self, error):
+        raise ExtractorError(
+            '%s returned error: %s' % (self.IE_NAME, error),
+            expected=True)
  
-    _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
+
+class VikiIE(VikiBaseIE):
+    IE_NAME = 'viki'
+    _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
      _TESTS = [{
          'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
          'info_dict': {
@@ -37,111 +73,218 @@ class VikiIE(InfoExtractor):
          },
          'skip': 'Blocked in the US',
      }, {
+        # clip
          'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
-        'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c',
+        'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
          'info_dict': {
              'id': '1067139v',
              'ext': 'mp4',
+            'title': "'The Avengers: Age of Ultron' Press Conference",
              'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
+            'duration': 352,
+            'timestamp': 1430380829,
              'upload_date': '20150430',
-            'title': '\'The Avengers: Age of Ultron\' Press Conference',
+            'uploader': 'Arirang TV',
+            'like_count': int,
+            'age_limit': 0,
          }
      }, {
          'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
          'info_dict': {
              'id': '1048879v',
              'ext': 'mp4',
-            'upload_date': '20140820',
-            'description': 'md5:54ff56d51bdfc7a30441ec967394e91c',
              'title': 'Ankhon Dekhi',
+            'duration': 6512,
+            'timestamp': 1408532356,
+            'upload_date': '20140820',
+            'uploader': 'Spuul',
+            'like_count': int,
+            'age_limit': 13,
          },
          'params': {
-            # requires ffmpeg
+            # m3u8 download
              'skip_download': True,
          }
+    }, {
+        # episode
+        'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
+        'md5': '190f3ef426005ba3a080a63325955bc3',
+        'info_dict': {
+            'id': '44699v',
+            'ext': 'mp4',
+            'title': 'Boys Over Flowers - Episode 1',
+            'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2',
+            'duration': 4155,
+            'timestamp': 1270496524,
+            'upload_date': '20100405',
+            'uploader': 'group8',
+            'like_count': int,
+            'age_limit': 13,
+        }
+    }, {
+        # youtube external
+        'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
+        'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
+        'info_dict': {
+            'id': '50562v',
+            'ext': 'mp4',
+            'title': 'Poor Nastya [COMPLETE] - Episode 1',
+            'description': '',
+            'duration': 607,
+            'timestamp': 1274949505,
+            'upload_date': '20101213',
+            'uploader': 'ad14065n',
+            'uploader_id': 'ad14065n',
+            'like_count': int,
+            'age_limit': 13,
+        }
+    }, {
+        'url': 'http://www.viki.com/player/44699v',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
-        webpage = self._download_webpage(url, video_id)
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-
-        uploader_m = re.search(
-            r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
-        if uploader_m is None:
-            uploader = None
-        else:
-            uploader = uploader_m.group(1).strip()
-
-        rating_str = self._html_search_regex(
-            r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
-            'rating information', default='').strip()
-        age_limit = US_RATINGS.get(rating_str)
-
-        req = compat_urllib_request.Request(
-            'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id)
-        req.add_header('User-Agent', self._USER_AGENT)
-        info_webpage = self._download_webpage(
-            req, video_id, note='Downloading info page')
-        err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None)
-        if err_msg:
-            if 'not available in your region' in err_msg:
-                raise ExtractorError(
-                    'Video %s is blocked from your location.' % video_id,
-                    expected=True)
-            else:
-                raise ExtractorError('Viki said: ' + err_msg)
-        mobj = re.search(
-            r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage)
-        if not mobj:
-            raise ExtractorError('Unable to find video URL')
-        video_url = unescapeHTML(mobj.group('url'))
-        video_ext = mimetype2ext(mobj.group('mime_type'))
-
-        if determine_ext(video_url) == 'm3u8':
-            formats = self._extract_m3u8_formats(
-                video_url, video_id, ext=video_ext)
-        else:
-            formats = [{
-                'url': video_url,
-                'ext': video_ext,
-            }]
-
-        upload_date_str = self._html_search_regex(
-            r'"created_at":"([^"]+)"', info_webpage, 'upload date')
-        upload_date = (
-            unified_strdate(upload_date_str)
-            if upload_date_str is not None
-            else None
-        )
-
-        # subtitles
-        video_subtitles = self.extract_subtitles(video_id, info_webpage)
-
-        return {
+        video = self._call_api(
+            'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
+
+        title = None
+        titles = video.get('titles')
+        if titles:
+            title = titles.get('en') or titles[titles.keys()[0]]
+        if not title:
+            title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
+            container_titles = video.get('container', {}).get('titles')
+            if container_titles:
+                container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]]
+                title = '%s - %s' % (container_title, title)
+
+        descriptions = video.get('descriptions')
+        description = descriptions.get('en') or descriptions[titles.keys()[0]] if descriptions else None
+
+        duration = int_or_none(video.get('duration'))
+        timestamp = parse_iso8601(video.get('created_at'))
+        uploader = video.get('author')
+        like_count = int_or_none(video.get('likes', {}).get('count'))
+        age_limit = parse_age_limit(video.get('rating'))
+
+        thumbnails = []
+        for thumbnail_id, thumbnail in video.get('images', {}).items():
+            thumbnails.append({
+                'id': thumbnail_id,
+                'url': thumbnail.get('url'),
+            })
+
+        subtitles = {}
+        for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
+            subtitles[subtitle_lang] = [{
+                'ext': subtitles_format,
+                'url': self._prepare_call(
+                    'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
+            } for subtitles_format in ('srt', 'vtt')]
+
+        result = {
              'id': video_id,
              'title': title,
-            'formats': formats,
              'description': description,
-            'thumbnail': thumbnail,
-            'age_limit': age_limit,
+            'duration': duration,
+            'timestamp': timestamp,
              'uploader': uploader,
-            'subtitles': video_subtitles,
-            'upload_date': upload_date,
+            'like_count': like_count,
+            'age_limit': age_limit,
+            'thumbnails': thumbnails,
+            'subtitles': subtitles,
          }
  
-    def _get_subtitles(self, video_id, info_webpage):
-        res = {}
-        for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
-            sturl = unescapeHTML(sturl_html)
-            m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
-            if not m:
-                continue
-            res[m.group('lang')] = [{
-                'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
-                'ext': 'vtt',
-            }]
-        return res
+        streams = self._call_api(
+            'videos/%s/streams.json' % video_id, video_id,
+            'Downloading video streams JSON')
+
+        if 'external' in streams:
+            result.update({
+                '_type': 'url_transparent',
+                'url': streams['external']['url'],
+            })
+            return result
+
+        formats = []
+        for format_id, stream_dict in streams.items():
+            height = self._search_regex(
+                r'^(\d+)[pP]$', format_id, 'height', default=None)
+            for protocol, format_dict in stream_dict.items():
+                if format_id == 'm3u8':
+                    formats = self._extract_m3u8_formats(
+                        format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
+                else:
+                    formats.append({
+                        'url': format_dict['url'],
+                        'format_id': '%s-%s' % (format_id, protocol),
+                        'height': height,
+                    })
+        self._sort_formats(formats)
+
+        result['formats'] = formats
+        return result
+
+
+class VikiChannelIE(VikiBaseIE):
+    IE_NAME = 'viki:channel'
+    _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
+    _TESTS = [{
+        'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
+        'info_dict': {
+            'id': '50c',
+            'title': 'Boys Over Flowers',
+            'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
+        },
+        'playlist_count': 70,
+    }, {
+        'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
+        'info_dict': {
+            'id': '1354c',
+            'title': 'Poor Nastya [COMPLETE]',
+            'description': 'md5:05bf5471385aa8b21c18ad450e350525',
+        },
+        'playlist_count': 127,
+    }, {
+        'url': 'http://www.viki.com/news/24569c-showbiz-korea',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.viki.com/artists/2141c-shinee',
+        'only_matching': True,
+    }]
+
+    _PER_PAGE = 25
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+
+        channel = self._call_api(
+            'containers/%s.json' % channel_id, channel_id,
+            'Downloading channel JSON')
+
+        titles = channel['titles']
+        title = titles.get('en') or titles[titles.keys()[0]]
+
+        descriptions = channel['descriptions']
+        description = descriptions.get('en') or descriptions[descriptions.keys()[0]]
+
+        entries = []
+        for video_type in ('episodes', 'clips', 'movies'):
+            for page_num in itertools.count(1):
+                page = self._call_api(
+                    'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
+                    % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
+                    'Downloading %s JSON page #%d' % (video_type, page_num))
+                for video in page['response']:
+                    video_id = video['id']
+                    entries.append(self.url_result(
+                        'http://www.viki.com/videos/%s' % video_id, 'Viki'))
+                if not page['pagination']['next']:
+                    break
+
+        return self.playlist_result(entries, channel_id, title, description)
diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py

index c3fde53f5ef06a56b54e94b20b72a7e98c1992a5..a6d9b5fee1f4864d82c7f8bb83e87884c96afe3b 100644 (file)
--- a/youtube_dl/extractor/vuclip.py
+++ b/youtube_dl/extractor/vuclip.py
@@ -49,7 +49,7 @@ class VuClipIE(InfoExtractor):
          links_code = self._search_regex(
              r'''(?xs)
                  (?:
-                    <img\s+src="/im/play.gif".*?>|
+                    <img\s+src="[^"]*/play.gif".*?>|
                      <!--\ player\ end\ -->\s*</div><!--\ thumb\ end-->
                  )
                  (.*?)
diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py

index 1eb24a3d67ffa92838ce41301b3b47d401482609..faa167e65861af3bb4803ab96fe931c15597dc00 100644 (file)
--- a/youtube_dl/extractor/vulture.py
+++ b/youtube_dl/extractor/vulture.py
@@ -44,7 +44,7 @@ class VultureIE(InfoExtractor):
          query_webpage = self._download_webpage(
              query_url, display_id, note='Downloading query page')
          params_json = self._search_regex(
-            r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n',
+            r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n',
              query_webpage,
              'player params')
          params = json.loads(params_json)
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py

index d6dec25ca9e7bb9de539e89c147e22b7381e3719..f69d46a2858077ed76ec9c8fc86166668f27c705 100644 (file)
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -37,7 +37,8 @@ class WimpIE(InfoExtractor):
          video_id = mobj.group(1)
          webpage = self._download_webpage(url, video_id)
          video_url = self._search_regex(
-            r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL')
+            [r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
+            webpage, 'video URL')
          if YoutubeIE.suitable(video_url):
              self.to_screen('Found YouTube video')
              return {
diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py

index 8c6241aedf7249343a725ab705968d0af963294a..7c9d8af6f2585207347d58d08fc607ebf4d28900 100644 (file)
--- a/youtube_dl/extractor/xminus.py
+++ b/youtube_dl/extractor/xminus.py
@@ -43,7 +43,7 @@ class XMinusIE(InfoExtractor):
              r'minus_track\.dur_sec=\'([0-9]*?)\'',
              webpage, 'duration', fatal=False))
          filesize_approx = parse_filesize(self._html_search_regex(
-            r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
+            r'<div id="finfo"[^>]*>\s*↓\s*([0-9.]+\s*[a-zA-Z][bB])',
              webpage, 'approximate filesize', fatal=False))
          tbr = int_or_none(self._html_search_regex(
              r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
@@ -58,7 +58,7 @@ class XMinusIE(InfoExtractor):
              description = re.sub(' *\r *', '\n', description)
  
          enc_token = self._html_search_regex(
-            r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
+            r'minus_track\.s?tkn="(.+?)"', webpage, 'enc_token')
          token = ''.join(
              c if pos == 3 else compat_chr(compat_ord(c) - 1)
              for pos, c in enumerate(reversed(enc_token)))
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index bf4e659ac6981c77f7e5f3c77578c4808634d766..f9afbdbab611e233c7f7014ae7d66e996f2b7c31 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -15,6 +15,7 @@ from ..utils import (
      unescapeHTML,
      ExtractorError,
      int_or_none,
+    mimetype2ext,
  )
  
  from .nbc import NBCSportsVPlayerIE
@@ -236,6 +237,22 @@ class YahooIE(InfoExtractor):
  
          self._sort_formats(formats)
  
+        closed_captions = self._html_search_regex(
+            r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
+            default='[]')
+
+        cc_json = self._parse_json(closed_captions, video_id, fatal=False)
+        subtitles = {}
+        if cc_json:
+            for closed_caption in cc_json:
+                lang = closed_caption['lang']
+                if lang not in subtitles:
+                    subtitles[lang] = []
+                subtitles[lang].append({
+                    'url': closed_caption['url'],
+                    'ext': mimetype2ext(closed_caption['content_type']),
+                })
+
          return {
              'id': video_id,
              'display_id': display_id,
@@ -244,6 +261,7 @@ class YahooIE(InfoExtractor):
              'description': clean_html(meta['description']),
              'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
              'duration': int_or_none(meta.get('duration')),
+            'subtitles': subtitles,
          }
  
  
diff --git a/youtube_dl/options.py b/youtube_dl/options.py

index 22dbc3aec7866ad3f5d048c35737486a8fdac8fc..5a2315bd96ce0c6abfdf4a8bea65aa68e6fa370b 100644 (file)
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -537,7 +537,7 @@ def parseOpts(overrideArguments=None):
      verbosity.add_option(
          '--dump-pages', '--dump-intermediate-pages',
          action='store_true', dest='dump_intermediate_pages', default=False,
-        help='Print downloaded pages to debug problems (very verbose)')
+        help='Print downloaded pages encoded using base64 to debug problems (very verbose)')
      verbosity.add_option(
          '--write-pages',
          action='store_true', dest='write_pages', default=False,
@@ -713,7 +713,7 @@ def parseOpts(overrideArguments=None):
          help='Parse additional metadata like song title / artist from the video title. '
               'The format syntax is the same as --output, '
               'the parsed parameters replace existing values. '
-             'Additional templates: %(album), %(artist). '
+             'Additional templates: %(album)s, %(artist)s. '
               'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
               '"Coldplay - Paradise"')
      postproc.add_option(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index ed9ed9ed63ec9b40d929f83cb2e56ee4d63f9e7f..52d198fa3c2eb36a1a3d41620cd645b90d52f854 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1665,6 +1665,7 @@ def mimetype2ext(mt):
      return {
          'x-ms-wmv': 'wmv',
          'x-mp4-fragmented': 'mp4',
+        'ttml+xml': 'ttml',
      }.get(res, res)
  
  
@@ -1848,9 +1849,9 @@ def dfxp2srt(dfxp_data):
          out = str_or_empty(node.text)
  
          for child in node:
-            if child.tag == _x('ttml:br'):
+            if child.tag in (_x('ttml:br'), 'br'):
                  out += '\n' + str_or_empty(child.tail)
-            elif child.tag == _x('ttml:span'):
+            elif child.tag in (_x('ttml:span'), 'span'):
                  out += str_or_empty(parse_node(child))
              else:
                  out += str_or_empty(xml.etree.ElementTree.tostring(child))
@@ -1859,7 +1860,10 @@ def dfxp2srt(dfxp_data):
  
      dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
      out = []
-    paras = dfxp.findall(_x('.//ttml:p'))
+    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+
+    if not paras:
+        raise ValueError('Invalid dfxp/TTML subtitle')
  
      for para, index in zip(paras, itertools.count(1)):
          begin_time = parse_dfxp_time_expr(para.attrib['begin'])
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 38f00bc9bc2ef476ddb813d7bcd26e5d13f4947d..b333851534e9edd9c75ff70ee4350874530ea8f7 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.05.15'
+__version__ = '2015.05.20'
author	Sergey M․ <dstftw@gmail.com>
	Sun, 24 May 2015 13:19:48 +0000 (19:19 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Sun, 24 May 2015 13:19:48 +0000 (19:19 +0600)
AUTHORS		patch \| blob \| history
README.md		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
tox.ini		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/aes.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/chilloutzone.py		patch \| blob \| history
youtube_dl/extractor/cnn.py		patch \| blob \| history
youtube_dl/extractor/drtv.py		patch \| blob \| history
youtube_dl/extractor/empflix.py		patch \| blob \| history
youtube_dl/extractor/instagram.py		patch \| blob \| history
youtube_dl/extractor/letv.py		patch \| blob \| history
youtube_dl/extractor/mitele.py		patch \| blob \| history
youtube_dl/extractor/nba.py		patch \| blob \| history
youtube_dl/extractor/nextmedia.py		patch \| blob \| history
youtube_dl/extractor/prosiebensat1.py		patch \| blob \| history
youtube_dl/extractor/qqmusic.py		patch \| blob \| history
youtube_dl/extractor/rtve.py		patch \| blob \| history
youtube_dl/extractor/rutv.py		patch \| blob \| history
youtube_dl/extractor/shared.py		patch \| blob \| history
youtube_dl/extractor/sohu.py		patch \| blob \| history
youtube_dl/extractor/sportbox.py		patch \| blob \| history
youtube_dl/extractor/telecinco.py		patch \| blob \| history
youtube_dl/extractor/tnaflix.py		patch \| blob \| history
youtube_dl/extractor/tutv.py		patch \| blob \| history
youtube_dl/extractor/tv2.py		patch \| blob \| history
youtube_dl/extractor/ultimedia.py		patch \| blob \| history
youtube_dl/extractor/videott.py		patch \| blob \| history
youtube_dl/extractor/vier.py		patch \| blob \| history
youtube_dl/extractor/viki.py		patch \| blob \| history
youtube_dl/extractor/vuclip.py		patch \| blob \| history
youtube_dl/extractor/vulture.py		patch \| blob \| history
youtube_dl/extractor/wimp.py		patch \| blob \| history
youtube_dl/extractor/xminus.py		patch \| blob \| history
youtube_dl/extractor/yahoo.py		patch \| blob \| history
youtube_dl/options.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history