Merge pull request #8092 from bpfoley/twitter-thumbnail

author remitamine <remitamine@gmail.com>

Wed, 16 Mar 2016 12:16:27 +0000 (13:16 +0100)

committer remitamine <remitamine@gmail.com>

Wed, 16 Mar 2016 12:16:27 +0000 (13:16 +0100)
author remitamine <remitamine@gmail.com>
Wed, 16 Mar 2016 12:16:27 +0000 (13:16 +0100)
committer remitamine <remitamine@gmail.com>
Wed, 16 Mar 2016 12:16:27 +0000 (13:16 +0100)
diff --git a/.gitignore b/.gitignore

index 0422adf4456ec35166f5d2bce6b832c67601c2dc..26dbde73d412673ee9c53ee06a476a803a92edc7 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
  *.pyc
  *.pyo
+*.class
  *~
  *.DS_Store
  wine-py2exe/
@@ -32,4 +33,4 @@ test/testdata
  .tox
  youtube-dl.zsh
  .idea
-.idea/*
-\ No newline at end of file
+.idea/*
diff --git a/AUTHORS b/AUTHORS

index b51e23f2d6552e570717ebc6520dbcf3a5d17714..aa48cd5a6015aa965a23b4203349e3bc0a6f690d 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -161,3 +161,5 @@ Jens Wille
  Robin Houtevelts
  Patrick Griffis
  Aidan Rowe
+mutantmonkey
+Ben Congdon
diff --git a/Makefile b/Makefile

index cb449b7e61c4c27d44177cc775c26fa79ca97a3a..e98806791327feaa67cd780e5395ce0cc960bbd8 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -3,6 +3,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
  clean:
         rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
         find . -name "*.pyc" -delete
+       find . -name "*.class" -delete
  
  PREFIX ?= /usr/local
  BINDIR ?= $(PREFIX)/bin
@@ -44,7 +45,7 @@ test:
  ot: offlinetest
  
  offlinetest: codetest
-       nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
+       $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
  
  tar: youtube-dl.tar.gz
  
diff --git a/README.md b/README.md

index d66804a7a19f667f0d8e3b19b707eec46bf2bb8d..68db546ef33b951f30d3639fccddeeb960ae5c91 100644 (file)
--- a/README.md
+++ b/README.md
@@ -80,6 +80,8 @@ which means you can modify it, redistribute it or use it however you like.
                                       on Windows)
      --flat-playlist                  Do not extract the videos of a playlist,
                                       only list them.
+    --mark-watched                   Mark videos watched (YouTube only)
+    --no-mark-watched                Do not mark videos watched (YouTube only)
      --no-color                       Do not emit color codes in output
  
  ## Network Options:
@@ -179,7 +181,7 @@ which means you can modify it, redistribute it or use it however you like.
                                       to play it)
      --external-downloader COMMAND    Use the specified external downloader.
                                       Currently supports
-                                     aria2c,axel,curl,httpie,wget
+                                     aria2c,avconv,axel,curl,ffmpeg,httpie,wget
      --external-downloader-args ARGS  Give these arguments to the external
                                       downloader
  
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 43403233d898ceefba6832fd7b55fb1f6d5f4367..a6dcc2576ad6fa8f8b4e05227323f33d5a2ccc12 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -54,6 +54,7 @@
   - **AtresPlayer**
   - **ATTTechChannel**
   - **AudiMedia**
+ - **AudioBoom**
   - **audiomack**
   - **audiomack:album**
   - **Azubu**
@@ -167,6 +168,8 @@
   - **Dump**
   - **Dumpert**
   - **dvtv**: http://video.aktualne.cz/
+ - **dw**
+ - **dw:article**
   - **EaglePlatform**
   - **EbaumsWorld**
   - **EchoMsk**
@@ -190,10 +193,10 @@
   - **ExpoTV**
   - **ExtremeTube**
   - **facebook**
- - **facebook:post**
   - **faz.net**
   - **fc2**
   - **Fczenit**
+ - **features.aol.com**
   - **fernsehkritik.tv**
   - **Firstpost**
   - **FiveTV**
@@ -293,6 +296,7 @@
   - **kontrtube**: KontrTube.ru - Труба зовёт
   - **KrasView**: Красвью
   - **Ku6**
+ - **KUSI**
   - **kuwo:album**: 酷我音乐 - 专辑
   - **kuwo:category**: 酷我音乐 - 分类
   - **kuwo:chart**: 酷我音乐 - 排行榜
@@ -301,12 +305,11 @@
   - **kuwo:song**: 酷我音乐
   - **la7.tv**
   - **Laola1Tv**
+ - **Le**: 乐视网
   - **Lecture2Go**
   - **Lemonde**
- - **Letv**: 乐视网
+ - **LePlaylist**
   - **LetvCloud**: 乐视云
- - **LetvPlaylist**
- - **LetvTv**
   - **Libsyn**
   - **life:embed**
   - **lifenews**: LIFE | NEWS
@@ -324,6 +327,7 @@
   - **m6**
   - **macgamestore**: MacGameStore trailers
   - **mailru**: Видео@Mail.Ru
+ - **MakersChannel**
   - **MakerTV**
   - **Malemotion**
   - **MatchTV**
@@ -334,6 +338,7 @@
   - **Mgoon**
   - **Minhateca**
   - **MinistryGrid**
+ - **Minoto**
   - **miomio.tv**
   - **MiTele**: mitele.es
   - **mixcloud**
@@ -421,6 +426,7 @@
   - **Npr**
   - **NRK**
   - **NRKPlaylist**
+ - **NRKSkole**: NRK Skole
   - **NRKTV**: NRK TV and NRK Radio
   - **ntv.ru**
   - **Nuvid**
@@ -669,6 +675,7 @@
   - **UDNEmbed**: 聯合影音
   - **Unistra**
   - **Urort**: NRK P3 Urørt
+ - **USAToday**
   - **ustream**
   - **ustream:channel**
   - **Ustudio**
@@ -682,6 +689,7 @@
   - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
   - **vh1.com**
   - **Vice**
+ - **ViceShow**
   - **Viddler**
   - **video.google:search**: Google Video search
   - **video.mit.edu**
@@ -709,6 +717,7 @@
   - **vimeo:channel**
   - **vimeo:group**
   - **vimeo:likes**: Vimeo user likes
+ - **vimeo:ondemand**
   - **vimeo:review**: Review pages on vimeo
   - **vimeo:user**
   - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
diff --git a/test/helper.py b/test/helper.py

index bdd7acca4d91c490f29c21aeac7cc9ba01c86952..f2d87821290095c1f9526f50db5d80ab31969d56 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -11,8 +11,11 @@ import sys
  
  import youtube_dl.extractor
  from youtube_dl import YoutubeDL
-from youtube_dl.utils import (
+from youtube_dl.compat import (
+    compat_os_name,
      compat_str,
+)
+from youtube_dl.utils import (
      preferredencoding,
      write_string,
  )
@@ -42,7 +45,7 @@ def report_warning(message):
      Print the message to stderr, it will be prefixed with 'WARNING:'
      If stderr is a tty file the 'WARNING:' will be colored
      '''
-    if sys.stderr.isatty() and os.name != 'nt':
+    if sys.stderr.isatty() and compat_os_name != 'nt':
          _msg_header = '\033[0;33mWARNING:\033[0m'
      else:
          _msg_header = 'WARNING:'
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index 59f7ab49dbe4458b5b821d9fae7d629ffab5db1a..efbee3b711b046f62fbb486375486a9e558e5035 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -502,6 +502,9 @@ class TestYoutubeDL(unittest.TestCase):
          assertRegexpMatches(self, ydl._format_note({
              'vbr': 10,
          }), '^\s*10k$')
+        assertRegexpMatches(self, ydl._format_note({
+            'fps': 30,
+        }), '^30fps$')
  
      def test_postprocessors(self):
          filename = 'post-processor-testfile.mp4'
diff --git a/test/test_http.py b/test/test_http.py

index f2e305b6fed3ce2f0574a7c20e89ffb977934f28..fc59b1aed6ddc2db10598a1a4b954a128e3d3133 100644 (file)
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -52,7 +52,12 @@ class TestHTTP(unittest.TestCase):
              ('localhost', 0), HTTPTestRequestHandler)
          self.httpd.socket = ssl.wrap_socket(
              self.httpd.socket, certfile=certfn, server_side=True)
-        self.port = self.httpd.socket.getsockname()[1]
+        if os.name == 'java':
+            # In Jython SSLSocket is not a subclass of socket.socket
+            sock = self.httpd.socket.sock
+        else:
+            sock = self.httpd.socket
+        self.port = sock.getsockname()[1]
          self.server_thread = threading.Thread(target=self.httpd.serve_forever)
          self.server_thread.daemon = True
          self.server_thread.start()
diff --git a/test/test_utils.py b/test/test_utils.py

index cb85e18f08574c84ecd2c8616d69e36aff123717..5a01099775846904c26ec3ea5b396ac16409344f 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -42,6 +42,7 @@ from youtube_dl.utils import (
      orderedSet,
      parse_duration,
      parse_filesize,
+    parse_count,
      parse_iso8601,
      read_batch_urls,
      sanitize_filename,
@@ -62,6 +63,7 @@ from youtube_dl.utils import (
      lowercase_escape,
      url_basename,
      urlencode_postdata,
+    update_url_query,
      version_tuple,
      xpath_with_ns,
      xpath_element,
@@ -78,6 +80,8 @@ from youtube_dl.utils import (
  from youtube_dl.compat import (
      compat_chr,
      compat_etree_fromstring,
+    compat_urlparse,
+    compat_parse_qs,
  )
  
  
@@ -456,6 +460,40 @@ class TestUtil(unittest.TestCase):
          data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
          self.assertTrue(isinstance(data, bytes))
  
+    def test_update_url_query(self):
+        def query_dict(url):
+            return compat_parse_qs(compat_urlparse.urlparse(url).query)
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
+            query_dict('http://example.com/path?quality=HD&format=mp4'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
+            query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'fields': 'id,formats,subtitles'})),
+            query_dict('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
+            query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path?manifest=f4m', {'manifest': []})),
+            query_dict('http://example.com/path'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
+            query_dict('http://example.com/path?system=LINUX'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'fields': b'id,formats,subtitles'})),
+            query_dict('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'width': 1080, 'height': 720})),
+            query_dict('http://example.com/path?width=1080&height=720'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'bitrate': 5020.43})),
+            query_dict('http://example.com/path?bitrate=5020.43'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'test': '第二行тест'})),
+            query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+
      def test_dict_get(self):
          FALSE_VALUES = {
              'none': None,
@@ -656,6 +694,15 @@ class TestUtil(unittest.TestCase):
          self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
          self.assertEqual(parse_filesize('1,24 KB'), 1240)
  
+    def test_parse_count(self):
+        self.assertEqual(parse_count(None), None)
+        self.assertEqual(parse_count(''), None)
+        self.assertEqual(parse_count('0'), 0)
+        self.assertEqual(parse_count('1000'), 1000)
+        self.assertEqual(parse_count('1.000'), 1000)
+        self.assertEqual(parse_count('1.1k'), 1100)
+        self.assertEqual(parse_count('1.1kk'), 1100000)
+
      def test_version_tuple(self):
          self.assertEqual(version_tuple('1'), (1,))
          self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 8f3a8b9e301a1b075ab0490993a65de075378730..8c651cd52375e1dcf986307b57f447fce4025543 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -24,9 +24,6 @@ import time
  import tokenize
  import traceback
  
-if os.name == 'nt':
-    import ctypes
-
  from .compat import (
      compat_basestring,
      compat_cookiejar,
@@ -34,6 +31,7 @@ from .compat import (
      compat_get_terminal_size,
      compat_http_client,
      compat_kwargs,
+    compat_os_name,
      compat_str,
      compat_tokenize_tokenize,
      compat_urllib_error,
@@ -87,6 +85,7 @@ from .extractor import get_info_extractor, gen_extractors
  from .downloader import get_suitable_downloader
  from .downloader.rtmp import rtmpdump_version
  from .postprocessor import (
+    FFmpegFixupM3u8PP,
      FFmpegFixupM4aPP,
      FFmpegFixupStretchedPP,
      FFmpegMergerPP,
@@ -95,6 +94,9 @@ from .postprocessor import (
  )
  from .version import __version__
  
+if compat_os_name == 'nt':
+    import ctypes
+
  
  class YoutubeDL(object):
      """YoutubeDL class.
@@ -450,7 +452,7 @@ class YoutubeDL(object):
      def to_console_title(self, message):
          if not self.params.get('consoletitle', False):
              return
-        if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
+        if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
              # c_wchar_p() might not be necessary if `message` is
              # already of type unicode()
              ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
@@ -521,7 +523,7 @@ class YoutubeDL(object):
          else:
              if self.params.get('no_warnings'):
                  return
-            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
                  _msg_header = '\033[0;33mWARNING:\033[0m'
              else:
                  _msg_header = 'WARNING:'
@@ -533,7 +535,7 @@ class YoutubeDL(object):
          Do the same as trouble, but prefixes the message with 'ERROR:', colored
          in red if stderr is a tty file.
          '''
-        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
              _msg_header = '\033[0;31mERROR:\033[0m'
          else:
              _msg_header = 'ERROR:'
@@ -566,7 +568,7 @@ class YoutubeDL(object):
                  elif template_dict.get('height'):
                      template_dict['resolution'] = '%sp' % template_dict['height']
                  elif template_dict.get('width'):
-                    template_dict['resolution'] = '?x%d' % template_dict['width']
+                    template_dict['resolution'] = '%dx?' % template_dict['width']
  
              sanitize = lambda k, v: sanitize_filename(
                  compat_str(v),
@@ -1232,6 +1234,10 @@ class YoutubeDL(object):
                  if t.get('id') is None:
                      t['id'] = '%d' % i
  
+        if self.params.get('list_thumbnails'):
+            self.list_thumbnails(info_dict)
+            return
+
          if thumbnails and 'thumbnail' not in info_dict:
              info_dict['thumbnail'] = thumbnails[-1]['url']
  
@@ -1333,9 +1339,6 @@ class YoutubeDL(object):
          if self.params.get('listformats'):
              self.list_formats(info_dict)
              return
-        if self.params.get('list_thumbnails'):
-            self.list_thumbnails(info_dict)
-            return
  
          req_format = self.params.get('format')
          if req_format is None:
@@ -1637,6 +1640,8 @@ class YoutubeDL(object):
                  if fixup_policy is None:
                      fixup_policy = 'detect_or_warn'
  
+                INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+
                  stretched_ratio = info_dict.get('stretched_ratio')
                  if stretched_ratio is not None and stretched_ratio != 1:
                      if fixup_policy == 'warn':
@@ -1649,15 +1654,18 @@ class YoutubeDL(object):
                              info_dict['__postprocessors'].append(stretched_pp)
                          else:
                              self.report_warning(
-                                '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
-                                    info_dict['id'], stretched_ratio))
+                                '%s: Non-uniform pixel ratio (%s). %s'
+                                % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
                      else:
                          assert fixup_policy in ('ignore', 'never')
  
-                if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
+                if (info_dict.get('requested_formats') is None and
+                        info_dict.get('container') == 'm4a_dash'):
                      if fixup_policy == 'warn':
-                        self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
-                            info_dict['id']))
+                        self.report_warning(
+                            '%s: writing DASH m4a. '
+                            'Only some players support this container.'
+                            % info_dict['id'])
                      elif fixup_policy == 'detect_or_warn':
                          fixup_pp = FFmpegFixupM4aPP(self)
                          if fixup_pp.available:
@@ -1665,8 +1673,27 @@ class YoutubeDL(object):
                              info_dict['__postprocessors'].append(fixup_pp)
                          else:
                              self.report_warning(
-                                '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
-                                    info_dict['id']))
+                                '%s: writing DASH m4a. '
+                                'Only some players support this container. %s'
+                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+                    else:
+                        assert fixup_policy in ('ignore', 'never')
+
+                if (info_dict.get('protocol') == 'm3u8_native' or
+                        info_dict.get('protocol') == 'm3u8' and
+                        self.params.get('hls_prefer_native')):
+                    if fixup_policy == 'warn':
+                        self.report_warning('%s: malformated aac bitstream.' % (
+                            info_dict['id']))
+                    elif fixup_policy == 'detect_or_warn':
+                        fixup_pp = FFmpegFixupM3u8PP(self)
+                        if fixup_pp.available:
+                            info_dict.setdefault('__postprocessors', [])
+                            info_dict['__postprocessors'].append(fixup_pp)
+                        else:
+                            self.report_warning(
+                                '%s: malformated aac bitstream. %s'
+                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
                      else:
                          assert fixup_policy in ('ignore', 'never')
  
@@ -1830,7 +1857,9 @@ class YoutubeDL(object):
          if fdict.get('vbr') is not None:
              res += '%4dk' % fdict['vbr']
          if fdict.get('fps') is not None:
-            res += ', %sfps' % fdict['fps']
+            if res:
+                res += ', '
+            res += '%sfps' % fdict['fps']
          if fdict.get('acodec') is not None:
              if res:
                  res += ', '
@@ -1873,13 +1902,8 @@ class YoutubeDL(object):
      def list_thumbnails(self, info_dict):
          thumbnails = info_dict.get('thumbnails')
          if not thumbnails:
-            tn_url = info_dict.get('thumbnail')
-            if tn_url:
-                thumbnails = [{'id': '0', 'url': tn_url}]
-            else:
-                self.to_screen(
-                    '[info] No thumbnails present for %s' % info_dict['id'])
-                return
+            self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
+            return
  
          self.to_screen(
              '[info] Thumbnails for %s:' % info_dict['id'])
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py

index 7b9afc36df2a2df9336f24054334782577f3bd52..74702786a76ee612a7f9ec91567752e22e70eda6 100644 (file)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -331,6 +331,9 @@ def compat_ord(c):
          return ord(c)
  
  
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
  if sys.version_info >= (3, 0):
      compat_getenv = os.getenv
      compat_expanduser = os.path.expanduser
@@ -351,7 +354,7 @@ else:
      # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
      # for different platforms with correct environment variables decoding.
  
-    if os.name == 'posix':
+    if compat_os_name == 'posix':
          def compat_expanduser(path):
              """Expand ~ and ~user constructions.  If user or $HOME is unknown,
              do nothing."""
@@ -375,7 +378,7 @@ else:
                  userhome = pwent.pw_dir
              userhome = userhome.rstrip('/')
              return (userhome + path[i:]) or '/'
-    elif os.name == 'nt' or os.name == 'ce':
+    elif compat_os_name == 'nt' or compat_os_name == 'ce':
          def compat_expanduser(path):
              """Expand ~ and ~user constructs.
  
@@ -562,6 +565,7 @@ __all__ = [
      'compat_itertools_count',
      'compat_kwargs',
      'compat_ord',
+    'compat_os_name',
      'compat_parse_qs',
      'compat_print',
      'compat_shlex_split',
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py

index dccc59212d3028bb9a96f0eb9ffff4acb0be681e..73b34fdae96262000b290df8031a4a1f6eb5e721 100644 (file)
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -1,14 +1,16 @@
  from __future__ import unicode_literals
  
  from .common import FileDownloader
-from .external import get_external_downloader
  from .f4m import F4mFD
  from .hls import HlsFD
-from .hls import NativeHlsFD
  from .http import HttpFD
-from .rtsp import RtspFD
  from .rtmp import RtmpFD
  from .dash import DashSegmentsFD
+from .rtsp import RtspFD
+from .external import (
+    get_external_downloader,
+    FFmpegFD,
+)
  
  from ..utils import (
      determine_protocol,
@@ -16,8 +18,8 @@ from ..utils import (
  
  PROTOCOL_MAP = {
      'rtmp': RtmpFD,
-    'm3u8_native': NativeHlsFD,
-    'm3u8': HlsFD,
+    'm3u8_native': HlsFD,
+    'm3u8': FFmpegFD,
      'mms': RtspFD,
      'rtsp': RtspFD,
      'f4m': F4mFD,
@@ -30,14 +32,17 @@ def get_suitable_downloader(info_dict, params={}):
      protocol = determine_protocol(info_dict)
      info_dict['protocol'] = protocol
  
+    # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
+    #     return FFmpegFD
+
      external_downloader = params.get('external_downloader')
      if external_downloader is not None:
          ed = get_external_downloader(external_downloader)
-        if ed.supports(info_dict):
+        if ed.can_download(info_dict):
              return ed
  
      if protocol == 'm3u8' and params.get('hls_prefer_native'):
-        return NativeHlsFD
+        return HlsFD
  
      return PROTOCOL_MAP.get(protocol, HttpFD)
  
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py

index 2d51540518f7ee40c7d002d632007d5aa9542697..f39db58f6c13f623a00b37ef0565dad70f18a305 100644 (file)
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -5,6 +5,7 @@ import re
  import sys
  import time
  
+from ..compat import compat_os_name
  from ..utils import (
      encodeFilename,
      error_to_compat_str,
@@ -219,7 +220,7 @@ class FileDownloader(object):
          if self.params.get('progress_with_newline', False):
              self.to_screen(fullmsg)
          else:
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                  prev_len = getattr(self, '_report_progress_prev_line_length',
                                     0)
                  if prev_len > len(fullmsg):
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py

index 2bc01126693fa4b520a34afe0ad0a67a61370829..30277dc205787d226360cfc950e5c08796a5fc03 100644 (file)
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -2,8 +2,11 @@ from __future__ import unicode_literals
  
  import os.path
  import subprocess
+import sys
+import re
  
  from .common import FileDownloader
+from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
  from ..utils import (
      cli_option,
      cli_valueless_option,
@@ -11,6 +14,8 @@ from ..utils import (
      cli_configuration_args,
      encodeFilename,
      encodeArgument,
+    handle_youtubedl_headers,
+    check_executable,
  )
  
  
@@ -45,10 +50,18 @@ class ExternalFD(FileDownloader):
      def exe(self):
          return self.params.get('external_downloader')
  
+    @classmethod
+    def available(cls):
+        return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
+
      @classmethod
      def supports(cls, info_dict):
          return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
  
+    @classmethod
+    def can_download(cls, info_dict):
+        return cls.available() and cls.supports(info_dict)
+
      def _option(self, command_option, param):
          return cli_option(self.params, command_option, param)
  
@@ -76,6 +89,8 @@ class ExternalFD(FileDownloader):
  
  
  class CurlFD(ExternalFD):
+    AVAILABLE_OPT = '-V'
+
      def _make_cmd(self, tmpfilename, info_dict):
          cmd = [self.exe, '--location', '-o', tmpfilename]
          for key, val in info_dict['http_headers'].items():
@@ -89,6 +104,8 @@ class CurlFD(ExternalFD):
  
  
  class AxelFD(ExternalFD):
+    AVAILABLE_OPT = '-V'
+
      def _make_cmd(self, tmpfilename, info_dict):
          cmd = [self.exe, '-o', tmpfilename]
          for key, val in info_dict['http_headers'].items():
@@ -99,6 +116,8 @@ class AxelFD(ExternalFD):
  
  
  class WgetFD(ExternalFD):
+    AVAILABLE_OPT = '--version'
+
      def _make_cmd(self, tmpfilename, info_dict):
          cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
          for key, val in info_dict['http_headers'].items():
@@ -112,6 +131,8 @@ class WgetFD(ExternalFD):
  
  
  class Aria2cFD(ExternalFD):
+    AVAILABLE_OPT = '-v'
+
      def _make_cmd(self, tmpfilename, info_dict):
          cmd = [self.exe, '-c']
          cmd += self._configuration_args([
@@ -130,12 +151,112 @@ class Aria2cFD(ExternalFD):
  
  
  class HttpieFD(ExternalFD):
+    @classmethod
+    def available(cls):
+        return check_executable('http', ['--version'])
+
      def _make_cmd(self, tmpfilename, info_dict):
          cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
          for key, val in info_dict['http_headers'].items():
              cmd += ['%s:%s' % (key, val)]
          return cmd
  
+
+class FFmpegFD(ExternalFD):
+    @classmethod
+    def supports(cls, info_dict):
+        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+
+    @classmethod
+    def available(cls):
+        return FFmpegPostProcessor().available
+
+    def _call_downloader(self, tmpfilename, info_dict):
+        url = info_dict['url']
+        ffpp = FFmpegPostProcessor(downloader=self)
+        if not ffpp.available:
+            self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+            return False
+        ffpp.check_version()
+
+        args = [ffpp.executable, '-y']
+
+        args += self._configuration_args()
+
+        # start_time = info_dict.get('start_time') or 0
+        # if start_time:
+        #     args += ['-ss', compat_str(start_time)]
+        # end_time = info_dict.get('end_time')
+        # if end_time:
+        #     args += ['-t', compat_str(end_time - start_time)]
+
+        if info_dict['http_headers'] and re.match(r'^https?://', url):
+            # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
+            # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
+            headers = handle_youtubedl_headers(info_dict['http_headers'])
+            args += [
+                '-headers',
+                ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
+
+        protocol = info_dict.get('protocol')
+
+        if protocol == 'rtmp':
+            player_url = info_dict.get('player_url')
+            page_url = info_dict.get('page_url')
+            app = info_dict.get('app')
+            play_path = info_dict.get('play_path')
+            tc_url = info_dict.get('tc_url')
+            flash_version = info_dict.get('flash_version')
+            live = info_dict.get('rtmp_live', False)
+            if player_url is not None:
+                args += ['-rtmp_swfverify', player_url]
+            if page_url is not None:
+                args += ['-rtmp_pageurl', page_url]
+            if app is not None:
+                args += ['-rtmp_app', app]
+            if play_path is not None:
+                args += ['-rtmp_playpath', play_path]
+            if tc_url is not None:
+                args += ['-rtmp_tcurl', tc_url]
+            if flash_version is not None:
+                args += ['-rtmp_flashver', flash_version]
+            if live:
+                args += ['-rtmp_live', 'live']
+
+        args += ['-i', url, '-c', 'copy']
+        if protocol == 'm3u8':
+            if self.params.get('hls_use_mpegts', False):
+                args += ['-f', 'mpegts']
+            else:
+                args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+        elif protocol == 'rtmp':
+            args += ['-f', 'flv']
+        else:
+            args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
+
+        args = [encodeArgument(opt) for opt in args]
+        args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
+
+        self._debug_cmd(args)
+
+        proc = subprocess.Popen(args, stdin=subprocess.PIPE)
+        try:
+            retval = proc.wait()
+        except KeyboardInterrupt:
+            # subprocces.run would send the SIGKILL signal to ffmpeg and the
+            # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+            # produces a file that is playable (this is mostly useful for live
+            # streams). Note that Windows is not affected and produces playable
+            # files (see https://github.com/rg3/youtube-dl/issues/8300).
+            if sys.platform != 'win32':
+                proc.communicate(b'q')
+            raise
+        return retval
+
+
+class AVconvFD(FFmpegFD):
+    pass
+
  _BY_NAME = dict(
      (klass.get_basename(), klass)
      for name, klass in globals().items()
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py

index 5bc99492bc7b90abdc5c133b3f6c573d54fe9ed3..a5bae96699e0b0f81fd11deab4900fe5ed8b820d 100644 (file)
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -99,7 +99,8 @@ class FragmentFD(FileDownloader):
                      state['eta'] = self.calc_eta(
                          start, time_now, estimated_size,
                          state['downloaded_bytes'])
-                state['speed'] = s.get('speed')
+                state['speed'] = s.get('speed') or ctx.get('speed')
+                ctx['speed'] = state['speed']
                  ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
              self._hook_progress(state)
  
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py

index 2a775bf0023f7ddc09507d66ab660a8dd97d19b2..a01dac031aa3b0c012a4262d210d16ef2b10a47a 100644 (file)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -1,87 +1,19 @@
  from __future__ import unicode_literals
  
-import os
+import os.path
  import re
-import subprocess
-import sys
  
-from .common import FileDownloader
  from .fragment import FragmentFD
  
  from ..compat import compat_urlparse
-from ..postprocessor.ffmpeg import FFmpegPostProcessor
  from ..utils import (
-    encodeArgument,
      encodeFilename,
      sanitize_open,
-    handle_youtubedl_headers,
  )
  
  
-class HlsFD(FileDownloader):
-    def real_download(self, filename, info_dict):
-        url = info_dict['url']
-        self.report_destination(filename)
-        tmpfilename = self.temp_name(filename)
-
-        ffpp = FFmpegPostProcessor(downloader=self)
-        if not ffpp.available:
-            self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
-            return False
-        ffpp.check_version()
-
-        args = [ffpp.executable, '-y']
-
-        if info_dict['http_headers'] and re.match(r'^https?://', url):
-            # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
-            # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
-            headers = handle_youtubedl_headers(info_dict['http_headers'])
-            args += [
-                '-headers',
-                ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
-
-        args += ['-i', url, '-c', 'copy']
-        if self.params.get('hls_use_mpegts', False):
-            args += ['-f', 'mpegts']
-        else:
-            args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
-
-        args = [encodeArgument(opt) for opt in args]
-        args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
-
-        self._debug_cmd(args)
-
-        proc = subprocess.Popen(args, stdin=subprocess.PIPE)
-        try:
-            retval = proc.wait()
-        except KeyboardInterrupt:
-            # subprocces.run would send the SIGKILL signal to ffmpeg and the
-            # mp4 file couldn't be played, but if we ask ffmpeg to quit it
-            # produces a file that is playable (this is mostly useful for live
-            # streams). Note that Windows is not affected and produces playable
-            # files (see https://github.com/rg3/youtube-dl/issues/8300).
-            if sys.platform != 'win32':
-                proc.communicate(b'q')
-            raise
-        if retval == 0:
-            fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
-            self.try_rename(tmpfilename, filename)
-            self._hook_progress({
-                'downloaded_bytes': fsize,
-                'total_bytes': fsize,
-                'filename': filename,
-                'status': 'finished',
-            })
-            return True
-        else:
-            self.to_stderr('\n')
-            self.report_error('%s exited with code %d' % (ffpp.basename, retval))
-            return False
-
-
-class NativeHlsFD(FragmentFD):
-    """ A more limited implementation that does not require ffmpeg """
+class HlsFD(FragmentFD):
+    """ A limited implementation that does not require ffmpeg """
  
      FD_NAME = 'hlsnative'
  
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 037654a23b538e01c895e917fb6b2c050adc20bf..9502d07a4cd1e5a462f430ad415be97d3cf0ea00 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -23,7 +23,10 @@ from .alphaporno import AlphaPornoIE
  from .animeondemand import AnimeOnDemandIE
  from .anitube import AnitubeIE
  from .anysex import AnySexIE
-from .aol import AolIE
+from .aol import (
+    AolIE,
+    AolFeaturesIE,
+)
  from .allocine import AllocineIE
  from .aparat import AparatIE
  from .appleconnect import AppleConnectIE
@@ -51,6 +54,7 @@ from .arte import (
  from .atresplayer import AtresPlayerIE
  from .atttechchannel import ATTTechChannelIE
  from .audimedia import AudiMediaIE
+from .audioboom import AudioBoomIE
  from .audiomack import AudiomackIE, AudiomackAlbumIE
  from .azubu import AzubuIE, AzubuLiveIE
  from .baidu import BaiduVideoIE
@@ -185,6 +189,10 @@ from .dumpert import DumpertIE
  from .defense import DefenseGouvFrIE
  from .discovery import DiscoveryIE
  from .dropbox import DropboxIE
+from .dw import (
+    DWIE,
+    DWArticleIE,
+)
  from .eagleplatform import EaglePlatformIE
  from .ebaumsworld import EbaumsWorldIE
  from .echomsk import EchoMskIE
@@ -209,10 +217,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
  from .exfm import ExfmIE
  from .expotv import ExpoTVIE
  from .extremetube import ExtremeTubeIE
-from .facebook import (
-    FacebookIE,
-    FacebookPostIE,
-)
+from .facebook import FacebookIE
  from .faz import FazIE
  from .fc2 import FC2IE
  from .fczenit import FczenitIE
@@ -340,6 +345,7 @@ from .konserthusetplay import KonserthusetPlayIE
  from .kontrtube import KontrTubeIE
  from .krasview import KrasViewIE
  from .ku6 import Ku6IE
+from .kusi import KUSIIE
  from .kuwo import (
      KuwoIE,
      KuwoAlbumIE,
@@ -383,6 +389,7 @@ from .lynda import (
  from .m6 import M6IE
  from .macgamestore import MacGameStoreIE
  from .mailru import MailRuIE
+from .makerschannel import MakersChannelIE
  from .makertv import MakerTVIE
  from .malemotion import MalemotionIE
  from .matchtv import MatchTVIE
@@ -392,6 +399,7 @@ from .metacritic import MetacriticIE
  from .mgoon import MgoonIE
  from .minhateca import MinhatecaIE
  from .ministrygrid import MinistryGridIE
+from .minoto import MinotoIE
  from .miomio import MioMioIE
  from .mit import TechTVMITIE, MITIE, OCWMITIE
  from .mitele import MiTeleIE
@@ -590,6 +598,7 @@ from .regiotv import RegioTVIE
  from .restudy import RestudyIE
  from .reverbnation import ReverbNationIE
  from .revision3 import Revision3IE
+from .rice import RICEIE
  from .ringtv import RingTVIE
  from .ro220 import Ro220IE
  from .rottentomatoes import RottenTomatoesIE
@@ -728,6 +737,7 @@ from .theplatform import (
      ThePlatformFeedIE,
  )
  from .thesixtyone import TheSixtyOneIE
+from .thestar import TheStarIE
  from .thisamericanlife import ThisAmericanLifeIE
  from .thisav import ThisAVIE
  from .tinypic import TinyPicIE
@@ -774,6 +784,7 @@ from .tv2 import (
      TV2IE,
      TV2ArticleIE,
  )
+from .tv3 import TV3IE
  from .tv4 import TV4IE
  from .tvc import (
      TVCIE,
@@ -813,6 +824,7 @@ from .udn import UDNEmbedIE
  from .digiteka import DigitekaIE
  from .unistra import UnistraIE
  from .urort import UrortIE
+from .usatoday import USATodayIE
  from .ustream import UstreamIE, UstreamChannelIE
  from .ustudio import UstudioIE
  from .varzesh3 import Varzesh3IE
@@ -828,7 +840,10 @@ from .vgtv import (
      VGTVIE,
  )
  from .vh1 import VH1IE
-from .vice import ViceIE
+from .vice import (
+    ViceIE,
+    ViceShowIE,
+)
  from .viddler import ViddlerIE
  from .videodetective import VideoDetectiveIE
  from .videofyme import VideofyMeIE
@@ -855,6 +870,7 @@ from .vimeo import (
      VimeoChannelIE,
      VimeoGroupsIE,
      VimeoLikesIE,
+    VimeoOndemandIE,
      VimeoReviewIE,
      VimeoUserIE,
      VimeoWatchLaterIE,
diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py

index 5b2c0dc9ac10aa826d5757f2fc75738c376219a3..cddcaa489791eb9393d7832350935f2cab8b7c79 100644 (file)
--- a/youtube_dl/extractor/aljazeera.py
+++ b/youtube_dl/extractor/aljazeera.py
@@ -13,24 +13,18 @@ class AlJazeeraIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'The Slum - Episode 1: Deliverance',
              'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
-            'uploader': 'Al Jazeera English',
+            'uploader_id': '665003303001',
+            'timestamp': 1411116829,
+            'upload_date': '20140919',
          },
-        'add_ie': ['BrightcoveLegacy'],
+        'add_ie': ['BrightcoveNew'],
          'skip': 'Not accessible from Travis CI server',
      }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
  
      def _real_extract(self, url):
          program_name = self._match_id(url)
          webpage = self._download_webpage(url, program_name)
          brightcove_id = self._search_regex(
              r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
-
-        return {
-            '_type': 'url',
-            'url': (
-                'brightcove:'
-                'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
-                '&%40videoPlayer={0}'.format(brightcove_id)
-            ),
-            'ie_key': 'BrightcoveLegacy',
-        }
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py

index b51eafc45928f8e6ff4ce571763593f71b715583..b761b2cc4c5d3d4b70766ed56ff5c3529dd39e6b 100644 (file)
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@@ -1,24 +1,11 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  
  
  class AolIE(InfoExtractor):
      IE_NAME = 'on.aol.com'
-    _VALID_URL = r'''(?x)
-        (?:
-            aol-video:|
-            http://on\.aol\.com/
-            (?:
-                video/.*-|
-                playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
-            )
-        )
-        (?P<id>[0-9]+)
-        (?:$|\?)
-    '''
+    _VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
  
      _TESTS = [{
          'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@@ -29,42 +16,31 @@ class AolIE(InfoExtractor):
              'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
          },
          'add_ie': ['FiveMin'],
-    }, {
-        'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
-        'info_dict': {
-            'id': '152147',
-            'title': 'Brace Yourself - Today\'s Weirdest News',
-        },
-        'playlist_mincount': 10,
      }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        playlist_id = mobj.group('playlist_id')
-        if not playlist_id or self._downloader.params.get('noplaylist'):
-            return self.url_result('5min:%s' % video_id)
+        video_id = self._match_id(url)
+        return self.url_result('5min:%s' % video_id)
  
-        self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  
-        webpage = self._download_webpage(url, playlist_id)
-        title = self._html_search_regex(
-            r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
-        playlist_html = self._search_regex(
-            r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
-            'playlist HTML')
-        entries = [{
-            '_type': 'url',
-            'url': 'aol-video:%s' % m.group('id'),
-            'ie_key': 'Aol',
-        } for m in re.finditer(
-            r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
-            playlist_html)]
+class AolFeaturesIE(InfoExtractor):
+    IE_NAME = 'features.aol.com'
+    _VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)'
  
-        return {
-            '_type': 'playlist',
-            'id': playlist_id,
-            'display_id': mobj.group('playlist_display_id'),
-            'title': title,
-            'entries': entries,
-        }
+    _TESTS = [{
+        'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
+        'md5': '7db483bb0c09c85e241f84a34238cc75',
+        'info_dict': {
+            'id': '519507715',
+            'ext': 'mp4',
+            'title': 'What To Watch - February 17, 2016',
+        },
+        'add_ie': ['FiveMin'],
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        return self.url_result(self._search_regex(
+            r'<script type="text/javascript" src="(https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js[^"]+)"',
+            webpage, '5min embed url'), 'FiveMin')
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py

index efde7e207bc8d166e80f2a26429797684535d114..3e119e21b39ba2ab6bc504cf1d19a90008bfbd24 100644 (file)
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -121,15 +121,18 @@ class ArteTVPlus7IE(InfoExtractor):
                  json_url = compat_parse_qs(
                      compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
          if json_url:
-            return self._extract_from_json_url(json_url, video_id, lang)
-        # Differend kind of embed URL (e.g.
+            title = self._search_regex(
+                r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
+                webpage, 'title', default=None, group='title')
+            return self._extract_from_json_url(json_url, video_id, lang, title=title)
+        # Different kind of embed URL (e.g.
          # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
          embed_url = self._search_regex(
              r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
              webpage, 'embed url', group='url')
          return self.url_result(embed_url)
  
-    def _extract_from_json_url(self, json_url, video_id, lang):
+    def _extract_from_json_url(self, json_url, video_id, lang, title=None):
          info = self._download_json(json_url, video_id)
          player_info = info['videoJsonPlayer']
  
@@ -137,7 +140,7 @@ class ArteTVPlus7IE(InfoExtractor):
          if not upload_date_str:
              upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
  
-        title = player_info['VTI'].strip()
+        title = (player_info.get('VTI') or title or player_info['VID']).strip()
          subtitle = player_info.get('VSU', '').strip()
          if subtitle:
              title += ' - %s' % subtitle
diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py

index 3b2effa15fe15a5527644349d785b452540c7568..aa6925623140f08090515fda2f42a7debd5545ac 100644 (file)
--- a/youtube_dl/extractor/audimedia.py
+++ b/youtube_dl/extractor/audimedia.py
@@ -10,9 +10,9 @@ from ..utils import (
  
  
  class AudiMediaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
      _TEST = {
-        'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
+        'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
          'md5': '79a8b71c46d49042609795ab59779b66',
          'info_dict': {
              'id': '1565',
@@ -32,7 +32,10 @@ class AudiMediaIE(InfoExtractor):
          display_id = self._match_id(url)
          webpage = self._download_webpage(url, display_id)
  
-        raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')
+        raw_payload = self._search_regex([
+            r'class="amtv-embed"[^>]+id="([^"]+)"',
+            r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
+        ], webpage, 'raw payload')
          _, stage_mode, video_id, lang = raw_payload.split('-')
  
          # TODO: handle s and e stage_mode (live streams and ended live streams)
@@ -59,13 +62,19 @@ class AudiMediaIE(InfoExtractor):
                  video_version_url = video_version.get('download_url') or video_version.get('stream_url')
                  if not video_version_url:
                      continue
-                formats.append({
+                f = {
                      'url': video_version_url,
                      'width': int_or_none(video_version.get('width')),
                      'height': int_or_none(video_version.get('height')),
                      'abr': int_or_none(video_version.get('audio_bitrate')),
                      'vbr': int_or_none(video_version.get('video_bitrate')),
-                })
+                }
+                bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
+                if bitrate:
+                    f.update({
+                        'format_id': 'http-%s' % bitrate,
+                    })
+                formats.append(f)
              self._sort_formats(formats)
  
              return {
diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py

new file mode 100644 (file)

index 0000000..2ec2d70
--- /dev/null
+++ b/youtube_dl/extractor/audioboom.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class AudioBoomIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
+        'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
+        'info_dict': {
+            'id': '4279833',
+            'ext': 'mp3',
+            'title': '3/09/2016 Czaban Hour 3',
+            'description': 'Guest:   Nate Davis - NFL free agency,   Guest:   Stan Gans',
+            'duration': 2245.72,
+            'uploader': 'Steve Czaban',
+            'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        clip = None
+
+        clip_store = self._parse_json(
+            self._search_regex(
+                r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id,
+                webpage, 'clip store', default='{}', group='json'),
+            video_id, fatal=False)
+        if clip_store:
+            clips = clip_store.get('clips')
+            if clips and isinstance(clips, list) and isinstance(clips[0], dict):
+                clip = clips[0]
+
+        def from_clip(field):
+            if clip:
+                clip.get(field)
+
+        audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
+            'audio', webpage, 'audio url')
+        title = from_clip('title') or self._og_search_title(webpage)
+        description = from_clip('description') or self._og_search_description(webpage)
+
+        duration = float_or_none(from_clip('duration') or self._html_search_meta(
+            'weibo:audio:duration', webpage))
+
+        uploader = from_clip('author') or self._og_search_property(
+            'audio:artist', webpage, 'uploader', fatal=False)
+        uploader_url = from_clip('author_url') or self._html_search_meta(
+            'audioboo:channel', webpage, 'uploader url')
+
+        return {
+            'id': video_id,
+            'url': audio_url,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'uploader': uploader,
+            'uploader_url': uploader_url,
+        }
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py

index 9d0dfb9611687b15075d0e6fc7d57dfa0244c60a..e62b3860e99b106d08ef79cf593e180fe8c9496c 100644 (file)
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -10,7 +10,6 @@ from ..utils import (
      int_or_none,
      parse_duration,
      parse_iso8601,
-    remove_end,
      unescapeHTML,
  )
  from ..compat import (
@@ -561,7 +560,7 @@ class BBCIE(BBCCoUkIE):
          'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
          'info_dict': {
              'id': '3662a707-0af9-3149-963f-47bea720b460',
-            'title': 'BBC Blogs - Adam Curtis - BUGGER',
+            'title': 'BUGGER',
          },
          'playlist_count': 18,
      }, {
@@ -670,9 +669,17 @@ class BBCIE(BBCCoUkIE):
          'url': 'http://www.bbc.com/sport/0/football/34475836',
          'info_dict': {
              'id': '34475836',
-            'title': 'What Liverpool can expect from Klopp',
+            'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
          },
          'playlist_count': 3,
+    }, {
+        # school report article with single video
+        'url': 'http://www.bbc.co.uk/schoolreport/35744779',
+        'info_dict': {
+            'id': '35744779',
+            'title': 'School which breaks down barriers in Jerusalem',
+        },
+        'playlist_count': 1,
      }, {
          # single video with playlist URL from weather section
          'url': 'http://www.bbc.com/weather/features/33601775',
@@ -735,8 +742,17 @@ class BBCIE(BBCCoUkIE):
  
          json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
          timestamp = json_ld_info.get('timestamp')
+
          playlist_title = json_ld_info.get('title')
-        playlist_description = json_ld_info.get('description')
+        if not playlist_title:
+            playlist_title = self._og_search_title(
+                webpage, default=None) or self._html_search_regex(
+                r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
+            if playlist_title:
+                playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
+
+        playlist_description = json_ld_info.get(
+            'description') or self._og_search_description(webpage, default=None)
  
          if not timestamp:
              timestamp = parse_iso8601(self._search_regex(
@@ -797,8 +813,6 @@ class BBCIE(BBCCoUkIE):
                                  playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
  
          if entries:
-            playlist_title = playlist_title or remove_end(self._og_search_title(webpage), ' - BBC News')
-            playlist_description = playlist_description or self._og_search_description(webpage, default=None)
              return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
  
          # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
@@ -829,10 +843,6 @@ class BBCIE(BBCCoUkIE):
                  'subtitles': subtitles,
              }
  
-        playlist_title = self._html_search_regex(
-            r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
-        playlist_description = self._og_search_description(webpage, default=None)
-
          def extract_all(pattern):
              return list(filter(None, map(
                  lambda s: self._parse_json(s, playlist_id, fatal=False),
diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py

index 38bda3af5a189cc7a2c8d65937a7d710edd0211f..7a8e1f60b82923b643918e43924fa64a5250cb83 100644 (file)
--- a/youtube_dl/extractor/bleacherreport.py
+++ b/youtube_dl/extractor/bleacherreport.py
@@ -28,10 +28,10 @@ class BleacherReportIE(InfoExtractor):
          'add_ie': ['Ooyala'],
      }, {
          'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
-        'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
+        'md5': '6a5cd403418c7b01719248ca97fb0692',
          'info_dict': {
              'id': '2586817',
-            'ext': 'mp4',
+            'ext': 'webm',
              'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
              'timestamp': 1446839961,
              'uploader': 'Sean Fay',
@@ -93,10 +93,14 @@ class BleacherReportCMSIE(AMPIE):
          'md5': '8c2c12e3af7805152675446c905d159b',
          'info_dict': {
              'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'flv',
+            'ext': 'mp4',
              'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
              'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
          },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index c947337f9f3d54c730487c3e050c00a029bb2d1b..3ab383461e463a6160f743f6a649fb00aea4eba0 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -13,6 +13,7 @@ from ..compat import (
      compat_urllib_parse_urlparse,
      compat_urlparse,
      compat_xml_parse_error,
+    compat_HTTPError,
  )
  from ..utils import (
      determine_ext,
@@ -23,16 +24,16 @@ from ..utils import (
      js_to_json,
      int_or_none,
      parse_iso8601,
-    sanitized_Request,
      unescapeHTML,
      unsmuggle_url,
+    update_url_query,
  )
  
  
  class BrightcoveLegacyIE(InfoExtractor):
      IE_NAME = 'brightcove:legacy'
      _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
-    _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
+    _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
  
      _TESTS = [
          {
@@ -155,8 +156,8 @@ class BrightcoveLegacyIE(InfoExtractor):
          # Not all pages define this value
          if playerKey is not None:
              params['playerKey'] = playerKey
-        # The three fields hold the id of the video
-        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
+        # These fields hold the id of the video
+        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
          if videoPlayer is not None:
              params['@videoPlayer'] = videoPlayer
          linkBase = find_param('linkBaseURL')
@@ -184,8 +185,7 @@ class BrightcoveLegacyIE(InfoExtractor):
  
      @classmethod
      def _make_brightcove_url(cls, params):
-        data = compat_urllib_parse.urlencode(params)
-        return cls._FEDERATED_URL_TEMPLATE % data
+        return update_url_query(cls._FEDERATED_URL, params)
  
      @classmethod
      def _extract_brightcove_url(cls, webpage):
@@ -239,7 +239,7 @@ class BrightcoveLegacyIE(InfoExtractor):
              # We set the original url as the default 'Referer' header
              referer = smuggled_data.get('Referer', url)
              return self._get_video_info(
-                videoPlayer[0], query_str, query, referer=referer)
+                videoPlayer[0], query, referer=referer)
          elif 'playerKey' in query:
              player_key = query['playerKey']
              return self._get_playlist_info(player_key[0])
@@ -248,15 +248,14 @@ class BrightcoveLegacyIE(InfoExtractor):
                  'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
                  expected=True)
  
-    def _get_video_info(self, video_id, query_str, query, referer=None):
-        request_url = self._FEDERATED_URL_TEMPLATE % query_str
-        req = sanitized_Request(request_url)
+    def _get_video_info(self, video_id, query, referer=None):
+        headers = {}
          linkBase = query.get('linkBaseURL')
          if linkBase is not None:
              referer = linkBase[0]
          if referer is not None:
-            req.add_header('Referer', referer)
-        webpage = self._download_webpage(req, video_id)
+            headers['Referer'] = referer
+        webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
  
          error_msg = self._html_search_regex(
              r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
@@ -355,7 +354,7 @@ class BrightcoveLegacyIE(InfoExtractor):
  
  class BrightcoveNewIE(InfoExtractor):
      IE_NAME = 'brightcove:new'
-    _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>(?:ref:)?\d+)'
+    _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
      _TESTS = [{
          'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
          'md5': 'c8100925723840d4b0d243f7025703be',
@@ -391,6 +390,10 @@ class BrightcoveNewIE(InfoExtractor):
          # ref: prefixed video id
          'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
          'only_matching': True,
+    }, {
+        # non numeric ref: prefixed video id
+        'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
+        'only_matching': True,
      }]
  
      @staticmethod
@@ -410,8 +413,8 @@ class BrightcoveNewIE(InfoExtractor):
  
          # Look for iframe embeds [1]
          for _, url in re.findall(
-                r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
-            entries.append(url)
+                r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
+            entries.append(url if url.startswith('http') else 'http:' + url)
  
          # Look for embed_in_page embeds [2]
          for video_id, account_id, player_id, embed in re.findall(
@@ -420,11 +423,11 @@ class BrightcoveNewIE(InfoExtractor):
                  # According to [4] data-video-id may be prefixed with ref:
                  r'''(?sx)
                      <video[^>]+
-                        data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*?
+                        data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
                      </video>.*?
                      <script[^>]+
                          src=["\'](?:https?:)?//players\.brightcove\.net/
-                        (\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
+                        (\d+)/([\da-f-]+)_([^/]+)/index(?:\.min)?\.js
                  ''', webpage):
              entries.append(
                  'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
@@ -454,24 +457,33 @@ class BrightcoveNewIE(InfoExtractor):
                  r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
                  webpage, 'policy key', group='pk')
  
-        req = sanitized_Request(
-            'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
-            % (account_id, video_id),
-            headers={'Accept': 'application/json;pk=%s' % policy_key})
-        json_data = self._download_json(req, video_id)
+        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
+        try:
+            json_data = self._download_json(api_url, video_id, headers={
+                'Accept': 'application/json;pk=%s' % policy_key
+            })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                json_data = self._parse_json(e.cause.read().decode(), video_id)
+                raise ExtractorError(json_data[0]['message'], expected=True)
+            raise
  
          title = json_data['name']
  
          formats = []
          for source in json_data.get('sources', []):
+            container = source.get('container')
              source_type = source.get('type')
              src = source.get('src')
-            if source_type == 'application/x-mpegURL':
+            if source_type == 'application/x-mpegURL' or container == 'M2TS':
                  if not src:
                      continue
                  formats.extend(self._extract_m3u8_formats(
-                    src, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    src, video_id, 'mp4', m3u8_id='hls', fatal=False))
+            elif source_type == 'application/dash+xml':
+                if not src:
+                    continue
+                formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
              else:
                  streaming_src = source.get('streaming_src')
                  stream_name, app_name = source.get('stream_name'), source.get('app_name')
@@ -479,15 +491,23 @@ class BrightcoveNewIE(InfoExtractor):
                      continue
                  tbr = float_or_none(source.get('avg_bitrate'), 1000)
                  height = int_or_none(source.get('height'))
+                width = int_or_none(source.get('width'))
                  f = {
                      'tbr': tbr,
-                    'width': int_or_none(source.get('width')),
-                    'height': height,
                      'filesize': int_or_none(source.get('size')),
-                    'container': source.get('container'),
-                    'vcodec': source.get('codec'),
-                    'ext': source.get('container').lower(),
+                    'container': container,
+                    'ext': container.lower(),
                  }
+                if width == 0 and height == 0:
+                    f.update({
+                        'vcodec': 'none',
+                    })
+                else:
+                    f.update({
+                        'width': width,
+                        'height': height,
+                        'vcodec': source.get('codec'),
+                    })
  
                  def build_format_id(kind):
                      format_id = kind
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py

index 6d9cd8abd1545ff09d27c991a7dc7c5d2cc2a872..042c4f2f13757ab3f0c942932ada8e7a01160055 100644 (file)
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -21,6 +21,10 @@ class CinemassacreIE(InfoExtractor):
                  'title': '“Angry Video Game Nerd: The Movie” – Trailer',
                  'description': 'md5:fb87405fcb42a331742a0dce2708560b',
              },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
          },
          {
              'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
@@ -31,14 +35,18 @@ class CinemassacreIE(InfoExtractor):
                  'upload_date': '20131002',
                  'title': 'The Mummy’s Hand (1940)',
              },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
          },
          {
              # Youtube embedded video
              'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
-            'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
+            'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
              'info_dict': {
                  'id': 'OEVzPCY2T-g',
-                'ext': 'mp4',
+                'ext': 'webm',
                  'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
                  'upload_date': '20061207',
                  'uploader': 'Cinemassacre',
@@ -49,12 +57,12 @@ class CinemassacreIE(InfoExtractor):
          {
              # Youtube embedded video
              'url': 'http://cinemassacre.com/2006/09/01/mckids/',
-            'md5': '6eb30961fa795fedc750eac4881ad2e1',
+            'md5': '7393c4e0f54602ad110c793eb7a6513a',
              'info_dict': {
                  'id': 'FnxsNhuikpo',
-                'ext': 'mp4',
+                'ext': 'webm',
                  'upload_date': '20060901',
-                'uploader': 'Cinemassacre Extras',
+                'uploader': 'Cinemassacre Extra',
                  'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
                  'uploader_id': 'Cinemassacre',
                  'title': 'AVGN: McKids',
@@ -69,7 +77,11 @@ class CinemassacreIE(InfoExtractor):
                  'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
                  'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
                  'upload_date': '20150525',
-            }
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
          }
      ]
  
diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py

index 5c3908f72b2f94c5557aaeaaa2e19ec78716c0f0..3cf0bf95b4386b393df28d46367887c2e7952ae9 100644 (file)
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@@ -51,9 +51,7 @@ class CNETIE(ThePlatformIE):
              uploader = None
              uploader_id = None
  
-        mpx_account = data['config']['uvpConfig']['default']['mpx_account']
-
-        metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
+        metadata = self.get_metadata('kYEXFC/%s' % list(vdata['files'].values())[0], video_id)
          description = vdata.get('description') or metadata.get('description')
          duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
  
@@ -62,7 +60,7 @@ class CNETIE(ThePlatformIE):
          for (fkey, vid) in vdata['files'].items():
              if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
                  continue
-            release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
+            release_url = 'http://link.theplatform.com/s/kYEXFC/%s?format=SMIL&mbr=true' % vid
              if fkey == 'hds':
                  release_url += '&manifest=f4m'
              tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 402f2f436dcd59bf00821166625eb72144b16a07..770105a5b58013bbcf76e342280b76827724dae4 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -15,13 +15,14 @@ import math
  from ..compat import (
      compat_cookiejar,
      compat_cookies,
+    compat_etree_fromstring,
      compat_getpass,
      compat_http_client,
+    compat_os_name,
+    compat_str,
      compat_urllib_error,
      compat_urllib_parse,
      compat_urlparse,
-    compat_str,
-    compat_etree_fromstring,
  )
  from ..utils import (
      NO_DEFAULT,
@@ -47,6 +48,7 @@ from ..utils import (
      determine_protocol,
      parse_duration,
      mimetype2ext,
+    update_url_query,
  )
  
  
@@ -104,7 +106,7 @@ class InfoExtractor(object):
                      * protocol   The protocol that will be used for the actual
                                   download, lower-case.
                                   "http", "https", "rtsp", "rtmp", "rtmpe",
-                                 "m3u8", or "m3u8_native".
+                                 "m3u8", "m3u8_native" or "http_dash_segments".
                      * preference Order number of this format. If this field is
                                   present and not None, the formats get sorted
                                   by this field, regardless of all other values.
@@ -344,7 +346,7 @@ class InfoExtractor(object):
      def IE_NAME(self):
          return compat_str(type(self).__name__[:-2])
  
-    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None):
          """ Returns the response handle """
          if note is None:
              self.report_download_webpage(video_id)
@@ -353,6 +355,12 @@ class InfoExtractor(object):
                  self.to_screen('%s' % (note,))
              else:
                  self.to_screen('%s: %s' % (video_id, note))
+        # data, headers and query params will be ignored for `Request` objects
+        if isinstance(url_or_request, compat_str):
+            if query:
+                url_or_request = update_url_query(url_or_request, query)
+            if data or headers:
+                url_or_request = sanitized_Request(url_or_request, data, headers or {})
          try:
              return self._downloader.urlopen(url_or_request)
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -368,13 +376,13 @@ class InfoExtractor(object):
                  self._downloader.report_warning(errmsg)
                  return False
  
-    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
+    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None):
          """ Returns a tuple (page content as string, URL handle) """
          # Strip hashes from the URL (#1038)
          if isinstance(url_or_request, (compat_str, str)):
              url_or_request = url_or_request.partition('#')[0]
  
-        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
+        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
          if urlh is False:
              assert not fatal
              return False
@@ -427,7 +435,7 @@ class InfoExtractor(object):
              self.to_screen('Saving request to ' + filename)
              # Working around MAX_PATH limitation on Windows (see
              # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                  absfilepath = os.path.abspath(filename)
                  if len(absfilepath) > 259:
                      filename = '\\\\?\\' + absfilepath
@@ -461,13 +469,13 @@ class InfoExtractor(object):
  
          return content
  
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
+    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None):
          """ Returns the data of the page as a string """
          success = False
          try_count = 0
          while success is False:
              try:
-                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
                  success = True
              except compat_http_client.IncompleteRead as e:
                  try_count += 1
@@ -482,10 +490,10 @@ class InfoExtractor(object):
  
      def _download_xml(self, url_or_request, video_id,
                        note='Downloading XML', errnote='Unable to download XML',
-                      transform_source=None, fatal=True, encoding=None):
+                      transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None):
          """Return the xml as an xml.etree.ElementTree.Element"""
          xml_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
+            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
          if xml_string is False:
              return xml_string
          if transform_source:
@@ -496,10 +504,10 @@ class InfoExtractor(object):
                         note='Downloading JSON metadata',
                         errnote='Unable to download JSON metadata',
                         transform_source=None,
-                       fatal=True, encoding=None):
+                       fatal=True, encoding=None, data=None, headers=None, query=None):
          json_string = self._download_webpage(
              url_or_request, video_id, note, errnote, fatal=fatal,
-            encoding=encoding)
+            encoding=encoding, data=data, headers=headers, query=query)
          if (not fatal) and json_string is False:
              return None
          return self._parse_json(
@@ -596,7 +604,7 @@ class InfoExtractor(object):
                  if mobj:
                      break
  
-        if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
+        if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
              _name = '\033[0;34m%s\033[0m' % name
          else:
              _name = name
@@ -854,6 +862,7 @@ class InfoExtractor(object):
              proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1
  
              if f.get('vcodec') == 'none':  # audio only
+                preference -= 50
                  if self._downloader.params.get('prefer_free_formats'):
                      ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
                  else:
@@ -864,6 +873,8 @@ class InfoExtractor(object):
                  except ValueError:
                      audio_ext_preference = -1
              else:
+                if f.get('acodec') == 'none':  # video only
+                    preference -= 40
                  if self._downloader.params.get('prefer_free_formats'):
                      ORDER = ['flv', 'mp4', 'webm']
                  else:
@@ -965,6 +976,13 @@ class InfoExtractor(object):
          if manifest is False:
              return []
  
+        return self._parse_f4m_formats(
+            manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+            transform_source=transform_source, fatal=fatal)
+
+    def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
+                           transform_source=lambda s: fix_xml_ampersands(s).strip(),
+                           fatal=True):
          formats = []
          manifest_version = '1.0'
          media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
@@ -990,7 +1008,8 @@ class InfoExtractor(object):
                  # bitrate in f4m downloader
                  if determine_ext(manifest_url) == 'f4m':
                      formats.extend(self._extract_f4m_formats(
-                        manifest_url, video_id, preference, f4m_id, fatal=fatal))
+                        manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+                        transform_source=transform_source, fatal=fatal))
                      continue
              tbr = int_or_none(media_el.attrib.get('bitrate'))
              formats.append({
@@ -1139,8 +1158,8 @@ class InfoExtractor(object):
                  out.append('{%s}%s' % (namespace, c))
          return '/'.join(out)
  
-    def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None):
-        smil = self._download_smil(smil_url, video_id, fatal=fatal)
+    def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+        smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
  
          if smil is False:
              assert not fatal
@@ -1157,10 +1176,10 @@ class InfoExtractor(object):
              return {}
          return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
  
-    def _download_smil(self, smil_url, video_id, fatal=True):
+    def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
          return self._download_xml(
              smil_url, video_id, 'Downloading SMIL file',
-            'Unable to download SMIL file', fatal=fatal)
+            'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
  
      def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
          namespace = self._parse_smil_namespace(smil)
@@ -1446,8 +1465,9 @@ class InfoExtractor(object):
                          continue
                      representation_attrib = adaptation_set.attrib.copy()
                      representation_attrib.update(representation.attrib)
-                    mime_type = representation_attrib.get('mimeType')
-                    content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType')
+                    # According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
+                    mime_type = representation_attrib['mimeType']
+                    content_type = mime_type.split('/')[0]
                      if content_type == 'text':
                          # TODO implement WebVTT downloading
                          pass
@@ -1470,6 +1490,7 @@ class InfoExtractor(object):
                          f = {
                              'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
                              'url': base_url,
+                            'ext': mimetype2ext(mime_type),
                              'width': int_or_none(representation_attrib.get('width')),
                              'height': int_or_none(representation_attrib.get('height')),
                              'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py

index 373b3b4b4735d8544128c48a10037eed3c570e5d..bdc768c783b9b3213badc5cf4b354f6159142f9f 100644 (file)
--- a/youtube_dl/extractor/douyutv.py
+++ b/youtube_dl/extractor/douyutv.py
@@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
              'display_id': 'iseven',
              'ext': 'flv',
              'title': 're:^清晨醒脑！T-ara根本停不下来！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
+            'description': 'md5:f34981259a03e980a3c6404190a3ed61',
              'thumbnail': 're:^https?://.*\.jpg$',
              'uploader': '7师傅',
              'uploader_id': '431925',
@@ -26,7 +26,7 @@ class DouyuTVIE(InfoExtractor):
          },
          'params': {
              'skip_download': True,
-        }
+        },
      }, {
          'url': 'http://www.douyutv.com/85982',
          'info_dict': {
@@ -42,7 +42,24 @@ class DouyuTVIE(InfoExtractor):
          },
          'params': {
              'skip_download': True,
-        }
+        },
+        'skip': 'Romm not found',
+    }, {
+        'url': 'http://www.douyutv.com/17732',
+        'info_dict': {
+            'id': '17732',
+            'display_id': '17732',
+            'ext': 'flv',
+            'title': 're:^清晨醒脑！T-ara根本停不下来！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'md5:f34981259a03e980a3c6404190a3ed61',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': '7师傅',
+            'uploader_id': '431925',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py

new file mode 100644 (file)

index 0000000..b6c9855
--- /dev/null
+++ b/youtube_dl/extractor/dw.py
@@ -0,0 +1,85 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+from ..compat import compat_urlparse
+
+
+class DWIE(InfoExtractor):
+    IE_NAME = 'dw'
+    _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)'
+    _TESTS = [{
+        # video
+        'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
+        'md5': '7372046e1815c5a534b43f3c3c36e6e9',
+        'info_dict': {
+            'id': '19112290',
+            'ext': 'mp4',
+            'title': 'Intelligent light',
+            'description': 'md5:90e00d5881719f2a6a5827cb74985af1',
+            'upload_date': '20160311',
+        }
+    }, {
+        # audio
+        'url': 'http://www.dw.com/en/worldlink-my-business/av-19111941',
+        'md5': '2814c9a1321c3a51f8a7aeb067a360dd',
+        'info_dict': {
+            'id': '19111941',
+            'ext': 'mp3',
+            'title': 'WorldLink: My business',
+            'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
+            'upload_date': '20160311',
+        }
+    }]
+
+    def _real_extract(self, url):
+        media_id = self._match_id(url)
+        webpage = self._download_webpage(url, media_id)
+        hidden_inputs = self._hidden_inputs(webpage)
+        title = hidden_inputs['media_title']
+
+        formats = []
+        if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
+            formats = self._extract_smil_formats(
+                'http://www.dw.com/smil/v-%s' % media_id, media_id,
+                transform_source=lambda s: s.replace(
+                    'rtmp://tv-od.dw.de/flash/',
+                    'http://tv-download.dw.de/dwtv_video/flv/'))
+        else:
+            formats = [{'url': hidden_inputs['file_name']}]
+
+        return {
+            'id': media_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'thumbnail': hidden_inputs.get('preview_image'),
+            'duration': int_or_none(hidden_inputs.get('file_duration')),
+            'upload_date': hidden_inputs.get('display_date'),
+            'formats': formats,
+        }
+
+
+class DWArticleIE(InfoExtractor):
+    IE_NAME = 'dw:article'
+    _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.dw.com/en/no-hope-limited-options-for-refugees-in-idomeni/a-19111009',
+        'md5': '8ca657f9d068bbef74d6fc38b97fc869',
+        'info_dict': {
+            'id': '19105868',
+            'ext': 'mp4',
+            'title': 'The harsh life of refugees in Idomeni',
+            'description': 'md5:196015cc7e48ebf474db9399420043c7',
+            'upload_date': '20160310',
+        }
+    }
+
+    def _real_extract(self, url):
+        article_id = self._match_id(url)
+        webpage = self._download_webpage(url, article_id)
+        hidden_inputs = self._hidden_inputs(webpage)
+        media_id = hidden_inputs['media_id']
+        media_path = self._search_regex(r'href="([^"]+av-%s)"\s+class="overlayLink"' % media_id, webpage, 'media url')
+        media_url = compat_urlparse.urljoin(url, media_path)
+        return self.url_result(media_url, 'DW', media_id)
diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py

index 00a69e6312aede6069e062c6abff29137939daa9..8c725a4e631860584781b116e72b02dd05813fc2 100644 (file)
--- a/youtube_dl/extractor/elpais.py
+++ b/youtube_dl/extractor/elpais.py
@@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
      _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
      IE_DESC = 'El País'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
          'md5': '98406f301f19562170ec071b83433d55',
          'info_dict': {
@@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
              'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
              'upload_date': '20140206',
          }
-    }
+    }, {
+        'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
+        'md5': '3bd5b09509f3519d7d9e763179b013de',
+        'info_dict': {
+            'id': '1456340311_668921',
+            'ext': 'mp4',
+            'title': 'Cómo hacer el mejor café con cafetera italiana',
+            'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
+            'upload_date': '20160303',
+        }
+    }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
          prefix = self._html_search_regex(
-            r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
+            r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
          video_suffix = self._search_regex(
-            r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
+            r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
          video_url = prefix + video_suffix
          thumbnail_suffix = self._search_regex(
-            r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
-            fatal=False)
+            r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
+            webpage, 'thumbnail URL', fatal=False)
          thumbnail = (
              None if thumbnail_suffix is None
              else prefix + thumbnail_suffix)
          title = self._html_search_regex(
-            '<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+            (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
+             r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
              webpage, 'title')
-        date_str = self._search_regex(
+        upload_date = unified_strdate(self._search_regex(
              r'<p class="date-header date-int updated"\s+title="([^"]+)">',
-            webpage, 'upload date', fatal=False)
-        upload_date = (None if date_str is None else unified_strdate(date_str))
+            webpage, 'upload date', default=None) or self._html_search_meta(
+            'datePublished', webpage, 'timestamp'))
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py

index e4180701d7d5fe7f538d029e8ffb27235b6135df..e5e57d48518d3dd3999dad650d0c32406079ce33 100644 (file)
--- a/youtube_dl/extractor/engadget.py
+++ b/youtube_dl/extractor/engadget.py
@@ -1,21 +1,13 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
-from ..utils import (
-    url_basename,
-)
  
  
  class EngadgetIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://www.engadget.com/
-        (?:video(?:/5min)?/(?P<id>\d+)|
-            [\d/]+/.*?)
-        '''
+    _VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
  
      _TEST = {
-        'url': 'http://www.engadget.com/video/5min/518153925/',
+        'url': 'http://www.engadget.com/video/518153925/',
          'md5': 'c6820d4828a5064447a4d9fc73f312c9',
          'info_dict': {
              'id': '518153925',
@@ -27,15 +19,4 @@ class EngadgetIE(InfoExtractor):
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-
-        if video_id is not None:
-            return self.url_result('5min:%s' % video_id)
-        else:
-            title = url_basename(url)
-            webpage = self._download_webpage(url, title)
-            ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
-            return {
-                '_type': 'playlist',
-                'title': title,
-                'entries': [self.url_result('5min:%s' % vid) for vid in ids]
-            }
+        return self.url_result('5min:%s' % video_id)
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py

index 6c6c3b1bd460407322aab1f35ddd8e55cefaad17..f5bbd39d2d0e90996c118e3fae325034fc2bbb6d 100644 (file)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -37,7 +37,9 @@ class FacebookIE(InfoExtractor):
                                  video/embed|
                                  story\.php
                              )\?(?:.*?)(?:v|video_id|story_fbid)=|
-                            [^/]+/videos/(?:[^/]+/)?
+                            [^/]+/videos/(?:[^/]+/)?|
+                            [^/]+/posts/|
+                            groups/[^/]+/permalink/
                          )|
                      facebook:
                  )
@@ -50,6 +52,8 @@ class FacebookIE(InfoExtractor):
  
      _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
  
+    _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+
      _TESTS = [{
          'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
          'md5': '6a40d33c0eccbb1af76cf0485a052659',
@@ -81,6 +85,33 @@ class FacebookIE(InfoExtractor):
              'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
              'uploader': 'Demy de Zeeuw',
          },
+    }, {
+        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
+        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
+        'info_dict': {
+            'id': '544765982287235',
+            'ext': 'mp4',
+            'title': '"What are you doing running in the snow?"',
+            'uploader': 'FailArmy',
+        }
+    }, {
+        'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+        'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
+        'info_dict': {
+            'id': '1035862816472149',
+            'ext': 'mp4',
+            'title': 'What the Flock Is Going On In New Zealand  Credit: ViralHog',
+            'uploader': 'S. Saint',
+        },
+    }, {
+        'note': 'swf params escaped',
+        'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
+        'md5': '97ba073838964d12c70566e0085c2b91',
+        'info_dict': {
+            'id': '10153664894881749',
+            'ext': 'mp4',
+            'title': 'Facebook video #10153664894881749',
+        },
      }, {
          'url': 'https://www.facebook.com/video.php?v=10204634152394104',
          'only_matching': True,
@@ -94,7 +125,7 @@ class FacebookIE(InfoExtractor):
          'url': 'facebook:544765982287235',
          'only_matching': True,
      }, {
-        'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+        'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
          'only_matching': True,
      }]
  
@@ -164,19 +195,19 @@ class FacebookIE(InfoExtractor):
      def _real_initialize(self):
          self._login()
  
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
+    def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
+        req = sanitized_Request(url)
          req.add_header('User-Agent', self._CHROME_USER_AGENT)
          webpage = self._download_webpage(req, video_id)
  
          video_data = None
  
-        BEFORE = '{swf.addParam(param[0], param[1]);});\n'
+        BEFORE = '{swf.addParam(param[0], param[1]);});'
          AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
-        m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
+        m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
          if m:
-            data = dict(json.loads(m.group(1)))
+            swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
+            data = dict(json.loads(swf_params))
              params_raw = compat_urllib_parse_unquote(data['params'])
              video_data = json.loads(params_raw)['video_data']
  
@@ -189,13 +220,15 @@ class FacebookIE(InfoExtractor):
  
          if not video_data:
              server_js_data = self._parse_json(self._search_regex(
-                r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
+                r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
              for item in server_js_data.get('instances', []):
                  if item[1][0] == 'VideoConfig':
                      video_data = video_data_list2dict(item[2][0]['videoData'])
                      break
  
          if not video_data:
+            if not fatal_if_no_video:
+                return webpage, False
              m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
              if m_msg is not None:
                  raise ExtractorError(
@@ -241,39 +274,36 @@ class FacebookIE(InfoExtractor):
              video_title = 'Facebook video #%s' % video_id
          uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
  
-        return {
+        info_dict = {
              'id': video_id,
              'title': video_title,
              'formats': formats,
              'uploader': uploader,
          }
  
-
-class FacebookPostIE(InfoExtractor):
-    IE_NAME = 'facebook:post'
-    _VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
-        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
-        'info_dict': {
-            'id': '544765982287235',
-            'ext': 'mp4',
-            'title': '"What are you doing running in the snow?"',
-            'uploader': 'FailArmy',
-        }
-    }
+        return webpage, info_dict
  
      def _real_extract(self, url):
-        post_id = self._match_id(url)
+        video_id = self._match_id(url)
+
+        real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
+        webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
  
-        webpage = self._download_webpage(url, post_id)
+        if info_dict:
+            return info_dict
  
-        entries = [
-            self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
-            for video_id in self._parse_json(
-                self._search_regex(
-                    r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
-                    webpage, 'video ids', group='ids'),
-                post_id)]
+        if '/posts/' in url:
+            entries = [
+                self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
+                for vid in self._parse_json(
+                    self._search_regex(
+                        r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
+                        webpage, 'video ids', group='ids'),
+                    video_id)]
  
-        return self.playlist_result(entries, post_id)
+            return self.playlist_result(entries, video_id)
+        else:
+            _, info_dict = self._extract_from_url(
+                self._VIDEO_PAGE_TEMPLATE % video_id,
+                video_id, fatal_if_no_video=True)
+            return info_dict
diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py

index 2955965d908c15f21b3f8880993530779f97ec15..67d50a386ce812018047f711205b8619d75c1bf8 100644 (file)
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@@ -1,5 +1,7 @@
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
@@ -16,12 +18,7 @@ from ..utils import (
  
  class FiveMinIE(InfoExtractor):
      IE_NAME = '5min'
-    _VALID_URL = r'''(?x)
-        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
-            https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
-            5min:)
-        (?P<id>\d+)
-        '''
+    _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
  
      _TESTS = [
          {
@@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
                  'title': 'How to Make a Next-Level Fruit Salad',
                  'duration': 184,
              },
+            'skip': 'no longer available',
          },
      ]
      _ERRORS = {
@@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        sid = mobj.group('sid')
+
+        if mobj.group('query'):
+            qs = compat_parse_qs(mobj.group('query'))
+            if not qs.get('playList'):
+                raise ExtractorError('Invalid URL', expected=True)
+            video_id = qs['playList'][0]
+            if qs.get('sid'):
+                sid = qs['sid'][0]
+
          embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
-        embed_page = self._download_webpage(embed_url, video_id,
-                                            'Downloading embed page')
-        sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
-        query = compat_urllib_parse.urlencode({
-            'func': 'GetResults',
-            'playlist': video_id,
-            'sid': sid,
-            'isPlayerSeed': 'true',
-            'url': embed_url,
-        })
+        if not sid:
+            embed_page = self._download_webpage(embed_url, video_id,
+                                                'Downloading embed page')
+            sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
+
          response = self._download_json(
-            'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
+            'https://syn.5min.com/handlers/SenseHandler.ashx?' +
+            compat_urllib_parse.urlencode({
+                'func': 'GetResults',
+                'playlist': video_id,
+                'sid': sid,
+                'isPlayerSeed': 'true',
+                'url': embed_url,
+            }),
              video_id)
          if not response['success']:
              raise ExtractorError(
@@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
          parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
              compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
          for rendition in info['Renditions']:
-            if rendition['RenditionType'] == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
-            elif rendition['RenditionType'] == 'aac':
+            if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
                  continue
              else:
                  rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py

index 318ac013d44b9ca8ce9de5c77d67b2cd3c9bb1e1..1dc50318ce81feb2604cfef943ddfb90e0a7641b 100644 (file)
--- a/youtube_dl/extractor/foxnews.py
+++ b/youtube_dl/extractor/foxnews.py
@@ -36,6 +36,10 @@ class FoxNewsIE(AMPIE):
                  # 'upload_date': '20141204',
                  'thumbnail': 're:^https?://.*\.jpg$',
              },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
          },
          {
              'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py

index c210177f7297e174d38988a2e62f379a9a478305..1477708bbec14c38bf0db7801d09d68a22ff1546 100644 (file)
--- a/youtube_dl/extractor/freespeech.py
+++ b/youtube_dl/extractor/freespeech.py
@@ -14,7 +14,7 @@ class FreespeechIE(InfoExtractor):
          'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
          'info_dict': {
              'id': 'poKsVCZ64uU',
-            'ext': 'mp4',
+            'ext': 'webm',
              'title': 'Obama, Romney Campaign in Colorado Ahead of Debate',
              'description': 'Obama, Romney Campaign in Colorado Ahead of Debate',
              'uploader': 'freespeechtv',
diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dl/extractor/gameinformer.py

index 25870c131534dce64cda034a424cdd1aff3a033d..a66e309de6993210052d09c4107fe26a168f1b33 100644 (file)
--- a/youtube_dl/extractor/gameinformer.py
+++ b/youtube_dl/extractor/gameinformer.py
@@ -2,42 +2,27 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import int_or_none
  
  
  class GameInformerIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
      _TEST = {
          'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
+        'md5': '292f26da1ab4beb4c9099f1304d2b071',
          'info_dict': {
              'id': '4515472681001',
-            'ext': 'm3u8',
+            'ext': 'mp4',
              'title': 'Replay - Animal Crossing',
              'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
-            'timestamp': 1443457610706,
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
+            'timestamp': 1443457610,
+            'upload_date': '20150928',
+            'uploader_id': '694940074001',
          },
      }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
  
      def _real_extract(self, url):
          display_id = self._match_id(url)
          webpage = self._download_webpage(url, display_id)
-
-        bc_api_url = self._search_regex(r"getVideo\('([^']+)'", webpage, 'brightcove api url')
-        json_data = self._download_json(
-            bc_api_url + '&video_fields=id,name,shortDescription,publishedDate,videoStillURL,length,IOSRenditions',
-            display_id)
-
-        return {
-            'id': compat_str(json_data['id']),
-            'display_id': display_id,
-            'url': json_data['IOSRenditions'][0]['url'],
-            'title': json_data['name'],
-            'description': json_data.get('shortDescription'),
-            'timestamp': int_or_none(json_data.get('publishedDate')),
-            'duration': int_or_none(json_data.get('length')),
-        }
+        brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id')
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index ca745ae41ecf46dd4b0a6b5dbd80a3e836c5f794..8121f04a5e02cf672dc6ab0f152d21df8b98034e 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1242,28 +1242,34 @@ class GenericIE(InfoExtractor):
              full_response = self._request_webpage(request, video_id)
              head_response = full_response
  
+        info_dict = {
+            'id': video_id,
+            'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+        }
+
          # Check for direct link to a video
          content_type = head_response.headers.get('Content-Type', '')
          m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
          if m:
              upload_date = unified_strdate(
                  head_response.headers.get('Last-Modified'))
-            formats = []
-            if m.group('format_id').endswith('mpegurl'):
+            format_id = m.group('format_id')
+            if format_id.endswith('mpegurl'):
                  formats = self._extract_m3u8_formats(url, video_id, 'mp4')
+            elif format_id == 'f4m':
+                formats = self._extract_f4m_formats(url, video_id)
              else:
                  formats = [{
                      'format_id': m.group('format_id'),
                      'url': url,
                      'vcodec': 'none' if m.group('type') == 'audio' else None
                  }]
-            return {
-                'id': video_id,
-                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+            info_dict.update({
                  'direct': True,
                  'formats': formats,
                  'upload_date': upload_date,
-            }
+            })
+            return info_dict
  
          if not self._downloader.params.get('test', False) and not is_intentional:
              force = self._downloader.params.get('force_generic_extractor', False)
@@ -1291,13 +1297,12 @@ class GenericIE(InfoExtractor):
                  'URL could be a direct video link, returning it as such.')
              upload_date = unified_strdate(
                  head_response.headers.get('Last-Modified'))
-            return {
-                'id': video_id,
-                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+            info_dict.update({
                  'direct': True,
                  'url': url,
                  'upload_date': upload_date,
-            }
+            })
+            return info_dict
  
          webpage = self._webpage_read_content(
              full_response, url, video_id, prefix=first_bytes)
@@ -1314,12 +1319,12 @@ class GenericIE(InfoExtractor):
              elif doc.tag == '{http://xspf.org/ns/0/}playlist':
                  return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
              elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
-                return {
-                    'id': video_id,
-                    'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
-                    'formats': self._parse_mpd_formats(
-                        doc, video_id, mpd_base_url=url.rpartition('/')[0]),
-                }
+                info_dict['formats'] = self._parse_mpd_formats(
+                    doc, video_id, mpd_base_url=url.rpartition('/')[0])
+                return info_dict
+            elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
+                info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
+                return info_dict
          except compat_xml_parse_error:
              pass
  
@@ -1985,6 +1990,8 @@ class GenericIE(InfoExtractor):
                  entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
              elif ext == 'mpd':
                  entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
+            elif ext == 'f4m':
+                entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
              else:
                  entry_info_dict['url'] = video_url
  
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py

index 37be34091895392b014a8fe2167d9c708ca60e71..766fc26d0f01145bdd2456a221940fa60ece6953 100644 (file)
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -10,8 +10,8 @@ from ..utils import (
  
  
  class GoogleDriveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
-    _TEST = {
+    _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
+    _TESTS = [{
          'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
          'md5': '881f7700aec4f538571fa1e0eed4a7b6',
          'info_dict': {
@@ -20,7 +20,11 @@ class GoogleDriveIE(InfoExtractor):
              'title': 'Big Buck Bunny.mp4',
              'duration': 46,
          }
-    }
+    }, {
+        # video id is longer than 28 characters
+        'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
+        'only_matching': True,
+    }]
      _FORMATS_EXT = {
          '5': 'flv',
          '6': 'flv',
@@ -43,7 +47,7 @@ class GoogleDriveIE(InfoExtractor):
      @staticmethod
      def _extract_url(webpage):
          mobj = re.search(
-            r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
+            r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
              webpage)
          if mobj:
              return 'https://drive.google.com/file/d/%s' % mobj.group('id')
diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py

index 12fb5e8e1dcb1e5dfba8057a1496edcbe0f61f82..9622f198aa6aaf99094a9b85c5a914d4f0c07d46 100644 (file)
--- a/youtube_dl/extractor/indavideo.py
+++ b/youtube_dl/extractor/indavideo.py
@@ -73,7 +73,7 @@ class IndavideoEmbedIE(InfoExtractor):
              'url': self._proto_relative_url(thumbnail)
          } for thumbnail in video.get('thumbnails', [])]
  
-        tags = [tag['title'] for tag in video.get('tags', [])]
+        tags = [tag['title'] for tag in video.get('tags') or []]
  
          return {
              'id': video.get('id') or video_id,
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py

index d3bee3a1902c783543831fa508d30ce734a140f0..e7c0cb3f66ab542e79f86238d2db991047d6d453 100644 (file)
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -501,7 +501,7 @@ class IqiyiIE(InfoExtractor):
      def get_enc_key(self, video_id):
          # TODO: automatic key extraction
          # last update at 2016-01-22 for Zombie::bite
-        enc_key = '6ab6d0280511493ba85594779759d4ed'
+        enc_key = '8ed797d224d043e7ac23d95b70227d32'
          return enc_key
  
      def _extract_playlist(self, webpage):
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py

index eef7daa299813219c5211aefe2051a1160238319..137db873cc09f7e57b258bcf65b8331d8b36b8c0 100644 (file)
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
          webpage = self._download_webpage(url, title)
          title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
          config_url = self._html_search_regex(
-            r'data-src="(/contenu/medias/video.php.*?)"',
+            r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
              webpage, 'config URL')
          config_url = 'http://www.jeuxvideo.com' + config_url
  
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py

index ccbc39c665412980e6b6104e83ffaf2e8574517f..44d7c84a13f9bef9aa1d68dc5d38fe81b0af4a5f 100644 (file)
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -8,6 +8,7 @@ from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
      compat_urlparse,
+    compat_parse_qs,
  )
  from ..utils import (
      clean_html,
@@ -20,21 +21,17 @@ from ..utils import (
  class KalturaIE(InfoExtractor):
      _VALID_URL = r'''(?x)
                  (?:
-                    kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)|
+                    kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
                      https?://
                          (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
                          (?:
                              (?:
                                  # flash player
-                                index\.php/kwidget/
-                                (?:[^/]+/)*?wid/_(?P<partner_id>\d+)/
-                                (?:[^/]+/)*?entry_id/(?P<id>[0-9a-z_]+)|
+                                index\.php/kwidget|
                                  # html5 player
-                                html5/html5lib/
-                                (?:[^/]+/)*?entry_id/(?P<id_html5>[0-9a-z_]+)
-                                .*\?.*\bwid=_(?P<partner_id_html5>\d+)
+                                html5/html5lib/[^/]+/mwEmbedFrame\.php
                              )
-                        )
+                        )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
                  )
                  '''
      _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
@@ -127,10 +124,41 @@ class KalturaIE(InfoExtractor):
          url, smuggled_data = unsmuggle_url(url, {})
  
          mobj = re.match(self._VALID_URL, url)
-        partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
-        entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
-
-        info, flavor_assets = self._get_video_info(entry_id, partner_id)
+        partner_id, entry_id = mobj.group('partner_id', 'id')
+        ks = None
+        if partner_id and entry_id:
+            info, flavor_assets = self._get_video_info(entry_id, partner_id)
+        else:
+            path, query = mobj.group('path', 'query')
+            if not path and not query:
+                raise ExtractorError('Invalid URL', expected=True)
+            params = {}
+            if query:
+                params = compat_parse_qs(query)
+            if path:
+                splitted_path = path.split('/')
+                params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))))
+            if 'wid' in params:
+                partner_id = params['wid'][0][1:]
+            elif 'p' in params:
+                partner_id = params['p'][0]
+            else:
+                raise ExtractorError('Invalid URL', expected=True)
+            if 'entry_id' in params:
+                entry_id = params['entry_id'][0]
+                info, flavor_assets = self._get_video_info(entry_id, partner_id)
+            elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
+                reference_id = params['flashvars[referenceId]'][0]
+                webpage = self._download_webpage(url, reference_id)
+                entry_data = self._parse_json(self._search_regex(
+                    r'window\.kalturaIframePackageData\s*=\s*({.*});',
+                    webpage, 'kalturaIframePackageData'),
+                    reference_id)['entryResult']
+                info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
+                entry_id = info['id']
+            else:
+                raise ExtractorError('Invalid URL', expected=True)
+            ks = params.get('flashvars[ks]', [None])[0]
  
          source_url = smuggled_data.get('source_url')
          if source_url:
@@ -140,14 +168,19 @@ class KalturaIE(InfoExtractor):
          else:
              referrer = None
  
+        def sign_url(unsigned_url):
+            if ks:
+                unsigned_url += '/ks/%s' % ks
+            if referrer:
+                unsigned_url += '?referrer=%s' % referrer
+            return unsigned_url
+
          formats = []
          for f in flavor_assets:
              # Continue if asset is not ready
              if f['status'] != 2:
                  continue
-            video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
-            if referrer:
-                video_url += '?referrer=%s' % referrer
+            video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id']))
              formats.append({
                  'format_id': '%(fileExt)s-%(bitrate)s' % f,
                  'ext': f.get('fileExt'),
@@ -160,9 +193,7 @@ class KalturaIE(InfoExtractor):
                  'width': int_or_none(f.get('width')),
                  'url': video_url,
              })
-        m3u8_url = info['dataUrl'].replace('format/url', 'format/applehttp')
-        if referrer:
-            m3u8_url += '?referrer=%s' % referrer
+        m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp'))
          formats.extend(self._extract_m3u8_formats(
              m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
  
diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py

index 08a671fa86a007d3327ef03c257f1b943bd425db..61739efa7a4c3b84892083eab10237c23eb69e3d 100644 (file)
--- a/youtube_dl/extractor/khanacademy.py
+++ b/youtube_dl/extractor/khanacademy.py
@@ -14,10 +14,10 @@ class KhanAcademyIE(InfoExtractor):
  
      _TESTS = [{
          'url': 'http://www.khanacademy.org/video/one-time-pad',
-        'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
+        'md5': '7b391cce85e758fb94f763ddc1bbb979',
          'info_dict': {
              'id': 'one-time-pad',
-            'ext': 'mp4',
+            'ext': 'webm',
              'title': 'The one-time pad',
              'description': 'The perfect cipher',
              'duration': 176,
diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py

new file mode 100644 (file)

index 0000000..931f34c
--- /dev/null
+++ b/youtube_dl/extractor/kusi.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    timeconvert,
+    update_url_query,
+    xpath_text,
+)
+
+
+class KUSIIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
+    _TESTS = [{
+        'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
+        'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
+        'info_dict': {
+            'id': '12203019',
+            'ext': 'mp4',
+            'title': 'Turko Files: Case Closed! & Put On Hold!',
+            'duration': 231.0,
+            'upload_date': '20160210',
+            'timestamp': 1455087571,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+    }, {
+        'url': 'http://kusi.com/video?clipId=12203019',
+        'info_dict': {
+            'id': '12203019',
+            'ext': 'mp4',
+            'title': 'Turko Files: Case Closed! & Put On Hold!',
+            'duration': 231.0,
+            'upload_date': '20160210',
+            'timestamp': 1455087571,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+        'params': {
+            'skip_download': True,  # Same as previous one
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        clip_id = mobj.group('clipId')
+        video_id = clip_id or mobj.group('path')
+
+        webpage = self._download_webpage(url, video_id)
+
+        if clip_id is None:
+            video_id = clip_id = self._html_search_regex(
+                r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
+
+        affiliate_id = self._search_regex(
+            r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
+
+        # See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
+        xml_url = update_url_query('http://www.kusi.com/build.asp', {
+            'buildtype': 'buildfeaturexmlrequest',
+            'featureType': 'Clip',
+            'featureid': clip_id,
+            'affiliateno': affiliate_id,
+            'clientgroupid': '1',
+            'rnd': int(round(random.random() * 1000000)),
+        })
+
+        doc = self._download_xml(xml_url, video_id)
+
+        video_title = xpath_text(doc, 'HEADLINE', fatal=True)
+        duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
+        description = xpath_text(doc, 'ABSTRACT')
+        thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
+        createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
+
+        quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
+        formats = []
+        for quality in quality_options:
+            formats.append({
+                'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
+                'height': int_or_none(quality.attrib.get('height')),
+                'width': int_or_none(quality.attrib.get('width')),
+                'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'description': description,
+            'duration': duration,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'timestamp': createtion_time,
+        }
diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py

index 700e44b639a216d63921f607d980a53c3dc4f7cd..f94804d06865c9e5b72c936818dbce5f4bf09229 100644 (file)
--- a/youtube_dl/extractor/kuwo.py
+++ b/youtube_dl/extractor/kuwo.py
@@ -23,7 +23,7 @@ class KuwoBaseIE(InfoExtractor):
          {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
      ]
  
-    def _get_formats(self, song_id):
+    def _get_formats(self, song_id, tolerate_ip_deny=False):
          formats = []
          for file_format in self._FORMATS:
              song_url = self._download_webpage(
@@ -32,7 +32,7 @@ class KuwoBaseIE(InfoExtractor):
                  song_id, note='Download %s url info' % file_format['format'],
              )
  
-            if song_url == 'IPDeny':
+            if song_url == 'IPDeny' and not tolerate_ip_deny:
                  raise ExtractorError('This song is blocked in this region', expected=True)
  
              if song_url.startswith('http://') or song_url.startswith('https://'):
@@ -43,7 +43,12 @@ class KuwoBaseIE(InfoExtractor):
                      'preference': file_format['preference'],
                      'abr': file_format.get('abr'),
                  })
-        self._sort_formats(formats)
+
+        # XXX _sort_formats fails if there are not formats, while it's not the
+        # desired behavior if 'IPDeny' is ignored
+        # This check can be removed if https://github.com/rg3/youtube-dl/pull/8051 is merged
+        if not tolerate_ip_deny:
+            self._sort_formats(formats)
          return formats
  
  
@@ -288,10 +293,16 @@ class KuwoMvIE(KuwoBaseIE):
          'url': 'http://www.kuwo.cn/mv/6480076/',
          'info_dict': {
              'id': '6480076',
-            'ext': 'mkv',
-            'title': '我们家MV',
+            'ext': 'mp4',
+            'title': 'My HouseMV',
              'creator': '2PM',
          },
+        # In this video, music URLs (anti.s) are blocked outside China and
+        # USA, while the MV URL (mvurl) is available globally, so force the MV
+        # URL for consistent results in different countries
+        'params': {
+            'format': 'mv',
+        },
      }
      _FORMATS = KuwoBaseIE._FORMATS + [
          {'format': 'mkv', 'ext': 'mkv', 'preference': 250},
@@ -313,7 +324,17 @@ class KuwoMvIE(KuwoBaseIE):
          else:
              raise ExtractorError('Unable to find song or singer names')
  
-        formats = self._get_formats(song_id)
+        formats = self._get_formats(song_id, tolerate_ip_deny=True)
+
+        mv_url = self._download_webpage(
+            'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_%s' % song_id,
+            song_id, note='Download %s MV URL' % song_id)
+        formats.append({
+            'url': mv_url,
+            'format_id': 'mv',
+        })
+
+        self._sort_formats(formats)
  
          return {
              'id': song_id,
diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py

index d0cd3f5919350117a1babe44df1e1ea6ff4bb3c5..df47e88ba98a8d69d2dca43cf20ccb7366232f57 100644 (file)
--- a/youtube_dl/extractor/leeco.py
+++ b/youtube_dl/extractor/leeco.py
@@ -217,14 +217,8 @@ class LePlaylistIE(InfoExtractor):
          'playlist_mincount': 96
      }, {
          'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml',
-        'info_dict': {
-            'id': 'lswjzzjc',
-            # The title should be "劲舞青春", but I can't find a simple way to
-            # determine the playlist title
-            'title': '乐视午间自制剧场',
-            'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
-        },
-        'playlist_mincount': 7
+        # This series is moved to http://www.le.com/tv/10005297.html
+        'only_matching': True,
      }, {
          'url': 'http://www.le.com/comic/92063.html',
          'only_matching': True,
@@ -338,7 +332,7 @@ class LetvCloudIE(InfoExtractor):
              formats.append({
                  'url': url,
                  'ext': determine_ext(decoded_url),
-                'format_id': int_or_none(play_url.get('vtype')),
+                'format_id': str_or_none(play_url.get('vtype')),
                  'format_note': str_or_none(play_url.get('definition')),
                  'width': int_or_none(play_url.get('vwidth')),
                  'height': int_or_none(play_url.get('vheight')),
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py

index 38fb3d9e4166f5f4a188ab2436c27240a8b04283..eada7c299238953baa9fd3d8219b2754aa7f9356 100644 (file)
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -14,6 +14,7 @@ from ..utils import (
      xpath_with_ns,
      xpath_text,
      orderedSet,
+    update_url_query,
      int_or_none,
      float_or_none,
      parse_iso8601,
@@ -64,7 +65,7 @@ class LivestreamIE(InfoExtractor):
      def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
          base_ele = find_xpath_attr(
              smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
-        base = base_ele.get('content') if base_ele else 'http://livestreamvod-f.akamaihd.net/'
+        base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
  
          formats = []
          video_nodes = smil.findall(self._xpath_ns('.//video', namespace))
@@ -72,7 +73,10 @@ class LivestreamIE(InfoExtractor):
          for vn in video_nodes:
              tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
              furl = (
-                '%s%s?v=3.0.3&fp=WIN%%2014,0,0,145' % (base, vn.attrib['src']))
+                update_url_query(compat_urlparse.urljoin(base, vn.attrib['src']), {
+                    'v': '3.0.3',
+                    'fp': 'WIN% 14,0,0,145',
+                }))
              if 'clipBegin' in vn.attrib:
                  furl += '&ssek=' + vn.attrib['clipBegin']
              formats.append({
diff --git a/youtube_dl/extractor/makerschannel.py b/youtube_dl/extractor/makerschannel.py

new file mode 100644 (file)

index 0000000..f5d00e6
--- /dev/null
+++ b/youtube_dl/extractor/makerschannel.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class MakersChannelIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?makerschannel\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://makerschannel.com/en/zoomin/community-highlights?video_id=849',
+        'md5': '624a512c6969236b5967bf9286345ad1',
+        'info_dict': {
+            'id': '849',
+            'ext': 'mp4',
+            'title': 'Landing a bus on a plane is an epic win',
+            'uploader': 'ZoomIn',
+            'description': 'md5:cd9cca2ea7b69b78be81d07020c97139',
+        }
+    }
+
+    def _real_extract(self, url):
+        id_type, url_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, url_id)
+        video_data = self._html_search_regex(r'<div([^>]+data-%s-id="%s"[^>]+)>' % (id_type, url_id), webpage, 'video data')
+
+        def extract_data_val(attr, fatal=False):
+            return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal)
+        minoto_id = self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'minoto:%s' % minoto_id,
+            'id': extract_data_val('video-id', True),
+            'title': extract_data_val('title', True),
+            'description': extract_data_val('description'),
+            'thumbnail': extract_data_val('image'),
+            'uploader': extract_data_val('channel'),
+        }
diff --git a/youtube_dl/extractor/minoto.py b/youtube_dl/extractor/minoto.py

new file mode 100644 (file)

index 0000000..959a105
--- /dev/null
+++ b/youtube_dl/extractor/minoto.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class MinotoIE(InfoExtractor):
+    _VALID_URL = r'(?:minoto:|https?://(?:play|iframe|embed)\.minoto-video\.com/(?P<player_id>[0-9]+)/)(?P<id>[a-zA-Z0-9]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        player_id = mobj.group('player_id') or '1'
+        video_id = mobj.group('id')
+        video_data = self._download_json('http://play.minoto-video.com/%s/%s.js' % (player_id, video_id), video_id)
+        video_metadata = video_data['video-metadata']
+        formats = []
+        for fmt in video_data['video-files']:
+            fmt_url = fmt.get('url')
+            if not fmt_url:
+                continue
+            container = fmt.get('container')
+            if container == 'hls':
+                formats.extend(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+            else:
+                fmt_profile = fmt.get('profile') or {}
+                f = {
+                    'format_id': fmt_profile.get('name-short'),
+                    'format_note': fmt_profile.get('name'),
+                    'url': fmt_url,
+                    'container': container,
+                    'tbr': int_or_none(fmt.get('bitrate')),
+                    'filesize': int_or_none(fmt.get('filesize')),
+                    'width': int_or_none(fmt.get('width')),
+                    'height': int_or_none(fmt.get('height')),
+                }
+                codecs = fmt.get('codecs')
+                if codecs:
+                    codecs = codecs.split(',')
+                    if len(codecs) == 2:
+                        f.update({
+                            'vcodec': codecs[0],
+                            'acodec': codecs[1],
+                        })
+                formats.append(f)
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_metadata['title'],
+            'description': video_metadata.get('description'),
+            'thumbnail': video_metadata.get('video-poster', {}).get('url'),
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py

index 29ca45778a17654c4d2125ceda177b71cffca8a8..819c1b90bb755c873b3f7f1b64e07dc97126a9b9 100644 (file)
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -99,7 +99,7 @@ class OCWMITIE(InfoExtractor):
              'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
              'info_dict': {
                  'id': 'EObHWIEKGjA',
-                'ext': 'mp4',
+                'ext': 'webm',
                  'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
                  'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
                  'upload_date': '20121109',
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py

index c2b7ed9abbd27a2b2c8e0d9d95c59e387630180a..101497118275b7f1b5bf0564048f1dc9fc4b878b 100644 (file)
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse_unquote
  from ..utils import (
      ExtractorError,
      HEADRequest,
+    parse_count,
      str_to_int,
  )
  
@@ -85,8 +86,8 @@ class MixcloudIE(InfoExtractor):
          uploader_id = self._search_regex(
              r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
          description = self._og_search_description(webpage)
-        like_count = str_to_int(self._search_regex(
-            r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
+        like_count = parse_count(self._search_regex(
+            r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
              webpage, 'like count', fatal=False))
          view_count = str_to_int(self._search_regex(
              [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
diff --git a/youtube_dl/extractor/noz.py b/youtube_dl/extractor/noz.py

index 0ffb44b47eb673821f013e6696d495226711c2dd..656443c495f2268084acbabeb6623a69b02afcba 100644 (file)
--- a/youtube_dl/extractor/noz.py
+++ b/youtube_dl/extractor/noz.py
@@ -5,7 +5,9 @@ from .common import InfoExtractor
  from ..compat import compat_urllib_parse_unquote
  from ..utils import (
      int_or_none,
+    find_xpath_attr,
      xpath_text,
+    update_url_query,
  )
  
  
@@ -46,17 +48,32 @@ class NozIE(InfoExtractor):
              doc, './/article/movie/file/duration'))
          formats = []
          for qnode in doc.findall('.//article/movie/file/qualities/qual'):
-            video_node = qnode.find('./html_urls/video_url[@format="video/mp4"]')
-            if video_node is None:
-                continue  # auto
-            formats.append({
-                'url': video_node.text,
-                'format_name': xpath_text(qnode, './name'),
-                'format_id': xpath_text(qnode, './id'),
-                'height': int_or_none(xpath_text(qnode, './height')),
-                'width': int_or_none(xpath_text(qnode, './width')),
-                'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
-            })
+            http_url_ele = find_xpath_attr(
+                qnode, './html_urls/video_url', 'format', 'video/mp4')
+            http_url = http_url_ele.text if http_url_ele is not None else None
+            if http_url:
+                formats.append({
+                    'url': http_url,
+                    'format_name': xpath_text(qnode, './name'),
+                    'format_id': '%s-%s' % ('http', xpath_text(qnode, './id')),
+                    'height': int_or_none(xpath_text(qnode, './height')),
+                    'width': int_or_none(xpath_text(qnode, './width')),
+                    'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
+                })
+            else:
+                f4m_url = xpath_text(qnode, 'url_hd2')
+                if f4m_url:
+                    formats.extend(self._extract_f4m_formats(
+                        update_url_query(f4m_url, {'hdcore': '3.4.0'}),
+                        video_id, f4m_id='hds', fatal=False))
+                m3u8_url_ele = find_xpath_attr(
+                    qnode, './html_urls/video_url',
+                    'format', 'application/vnd.apple.mpegurl')
+                m3u8_url = m3u8_url_ele.text if m3u8_url_ele is not None else None
+                if m3u8_url:
+                    formats.extend(self._extract_m3u8_formats(
+                        m3u8_url, video_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
          self._sort_formats(formats)
  
          return {
diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py

index 6d5732d45c3d3e22d085319ff45449881ac73ad2..30a5f2de4475a934cfa467764d0ce559d3e68a74 100644 (file)
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@@ -12,14 +12,14 @@ class PyvideoIE(InfoExtractor):
      _TESTS = [
          {
              'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
+            'md5': '520915673e53a5c5d487c36e0c4d85b5',
              'info_dict': {
                  'id': '24_4WWkSmNo',
-                'ext': 'mp4',
+                'ext': 'webm',
                  'title': 'Become a logging expert in 30 minutes',
                  'description': 'md5:9665350d466c67fb5b1598de379021f7',
                  'upload_date': '20130320',
-                'uploader': 'NextDayVideo',
+                'uploader': 'Next Day Video',
                  'uploader_id': 'NextDayVideo',
              },
              'add_ie': ['Youtube'],
diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py

index b1b8800b97c9eb8caad2c03f999f1bc8f304c4da..99979ebe1a9fe82099076b46b576ef38a58bca8c 100644 (file)
--- a/youtube_dl/extractor/revision3.py
+++ b/youtube_dl/extractor/revision3.py
@@ -19,7 +19,7 @@ class Revision3IE(InfoExtractor):
          'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
          'md5': 'd94a72d85d0a829766de4deb8daaf7df',
          'info_dict': {
-            'id': '73034',
+            'id': '71089',
              'display_id': 'technobuffalo/5-google-predictions-for-2016',
              'ext': 'webm',
              'title': '5 Google Predictions for 2016',
@@ -31,6 +31,7 @@ class Revision3IE(InfoExtractor):
              'uploader_id': 'technobuffalo',
          }
      }, {
+        # Show
          'url': 'http://testtube.com/brainstuff',
          'info_dict': {
              'id': '251',
@@ -41,7 +42,7 @@ class Revision3IE(InfoExtractor):
      }, {
          'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
          'info_dict': {
-            'id': '60163',
+            'id': '58227',
              'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
              'duration': 275,
              'ext': 'webm',
@@ -52,18 +53,72 @@ class Revision3IE(InfoExtractor):
              'uploader': 'DNews',
              'uploader_id': 'dnews',
          },
+    }, {
+        'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+        'info_dict': {
+            'id': '71618',
+            'ext': 'mp4',
+            'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+            'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
+            'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
+            'uploader': 'Editors\' Picks',
+            'uploader_id': 'tt-editors-picks',
+            'timestamp': 1453309200,
+            'upload_date': '20160120',
+        },
+        'add_ie': ['Youtube'],
+    }, {
+        # Tag
+        'url': 'http://testtube.com/tech-news',
+        'info_dict': {
+            'id': '21018',
+            'title': 'tech news',
+        },
+        'playlist_mincount': 9,
      }]
      _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
      _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
  
      def _real_extract(self, url):
          domain, display_id = re.match(self._VALID_URL, url).groups()
+        site = domain.split('.')[0]
          page_info = self._download_json(
              self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
  
-        if page_info['data']['type'] == 'episode':
-            episode_data = page_info['data']
-            video_id = compat_str(episode_data['video']['data']['id'])
+        page_data = page_info['data']
+        page_type = page_data['type']
+        if page_type in ('episode', 'embed'):
+            show_data = page_data['show']['data']
+            page_id = compat_str(page_data['id'])
+            video_id = compat_str(page_data['video']['data']['id'])
+
+            preference = qualities(['mini', 'small', 'medium', 'large'])
+            thumbnails = [{
+                'url': image_url,
+                'id': image_id,
+                'preference': preference(image_id)
+            } for image_id, image_url in page_data.get('images', {}).items()]
+
+            info = {
+                'id': page_id,
+                'display_id': display_id,
+                'title': unescapeHTML(page_data['name']),
+                'description': unescapeHTML(page_data.get('summary')),
+                'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
+                'author': page_data.get('author'),
+                'uploader': show_data.get('name'),
+                'uploader_id': show_data.get('slug'),
+                'thumbnails': thumbnails,
+                'extractor_key': site,
+            }
+
+            if page_type == 'embed':
+                info.update({
+                    '_type': 'url_transparent',
+                    'url': page_data['video']['data']['embed'],
+                })
+                return info
+
              video_data = self._download_json(
                  'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
                  video_id)['items'][0]
@@ -84,36 +139,30 @@ class Revision3IE(InfoExtractor):
                          })
              self._sort_formats(formats)
  
-            preference = qualities(['mini', 'small', 'medium', 'large'])
-            thumbnails = [{
-                'url': image_url,
-                'id': image_id,
-                'preference': preference(image_id)
-            } for image_id, image_url in video_data.get('images', {}).items()]
-
-            return {
-                'id': video_id,
-                'display_id': display_id,
+            info.update({
                  'title': unescapeHTML(video_data['title']),
                  'description': unescapeHTML(video_data.get('summary')),
-                'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
-                'author': episode_data.get('author'),
                  'uploader': video_data.get('show', {}).get('name'),
                  'uploader_id': video_data.get('show', {}).get('slug'),
                  'duration': int_or_none(video_data.get('duration')),
-                'thumbnails': thumbnails,
                  'formats': formats,
-            }
+            })
+            return info
          else:
-            show_data = page_info['show']['data']
+            list_data = page_info[page_type]['data']
              episodes_data = page_info['episodes']['data']
              num_episodes = page_info['meta']['totalEpisodes']
              processed_episodes = 0
              entries = []
              page_num = 1
              while True:
-                entries.extend([self.url_result(
-                    'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
+                entries.extend([{
+                    '_type': 'url',
+                    'url': 'http://%s%s' % (domain, episode['path']),
+                    'id': compat_str(episode['id']),
+                    'ie_key': 'Revision3',
+                    'extractor_key': site,
+                } for episode in episodes_data])
                  processed_episodes += len(episodes_data)
                  if processed_episodes == num_episodes:
                      break
@@ -123,5 +172,5 @@ class Revision3IE(InfoExtractor):
                      display_id)['episodes']['data']
  
              return self.playlist_result(
-                entries, compat_str(show_data['id']),
-                show_data.get('name'), show_data.get('summary'))
+                entries, compat_str(list_data['id']),
+                list_data.get('name'), list_data.get('summary'))
diff --git a/youtube_dl/extractor/rice.py b/youtube_dl/extractor/rice.py

new file mode 100644 (file)

index 0000000..f855719
--- /dev/null
+++ b/youtube_dl/extractor/rice.py
@@ -0,0 +1,116 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+    xpath_text,
+    xpath_element,
+    int_or_none,
+    parse_iso8601,
+    ExtractorError,
+)
+
+
+class RICEIE(InfoExtractor):
+    _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)'
+    _TEST = {
+        'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw',
+        'md5': '9b83b4a2eead4912dc3b7fac7c449b6a',
+        'info_dict': {
+            'id': 'YEWIvbhb40aqdjMD1ALSqw',
+            'ext': 'mp4',
+            'title': 'Active Learning in Archeology',
+            'upload_date': '20140616',
+            'timestamp': 1402926346,
+        }
+    }
+    _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config'
+
+    def _real_extract(self, url):
+        qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
+        if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'):
+            raise ExtractorError('Invalid URL', expected=True)
+
+        portal_id = qs['PortalID'][0]
+        playlist_id = qs['DestinationID'][0]
+        content_id = qs['ContentID'][0]
+
+        content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={
+            'portalId': portal_id,
+            'playlistId': playlist_id,
+            'contentId': content_id
+        })
+        metadata = xpath_element(content_data, './/metaData', fatal=True)
+        title = xpath_text(metadata, 'primaryTitle', fatal=True)
+        encodings = xpath_element(content_data, './/encodings', fatal=True)
+        player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={
+            'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True),
+            'contentId': content_id,
+        })
+
+        common_fmt = {}
+        dimensions = xpath_text(encodings, 'dimensions')
+        if dimensions:
+            wh = dimensions.split('x')
+            if len(wh) == 2:
+                common_fmt.update({
+                    'width': int_or_none(wh[0]),
+                    'height': int_or_none(wh[1]),
+                })
+
+        formats = []
+        rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS))
+        if rtsp_path:
+            fmt = {
+                'url': rtsp_path,
+                'format_id': 'rtsp',
+            }
+            fmt.update(common_fmt)
+            formats.append(fmt)
+        for source in player_data.findall(self._xpath_ns('.//Source', self._NS)):
+            video_url = xpath_text(source, self._xpath_ns('File', self._NS))
+            if not video_url:
+                continue
+            if '.m3u8' in video_url:
+                formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+            else:
+                fmt = {
+                    'url': video_url,
+                    'format_id': video_url.split(':')[0],
+                }
+                fmt.update(common_fmt)
+                rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
+                if rtmp:
+                    fmt.update({
+                        'url': rtmp.group('url'),
+                        'play_path': rtmp.group('playpath'),
+                        'app': rtmp.group('app'),
+                        'ext': 'flv',
+                    })
+                formats.append(fmt)
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for content_asset in content_data.findall('.//contentAssets'):
+            asset_type = xpath_text(content_asset, 'type')
+            if asset_type == 'image':
+                image_url = xpath_text(content_asset, 'httpPath')
+                if not image_url:
+                    continue
+                thumbnails.append({
+                    'id': xpath_text(content_asset, 'ID'),
+                    'url': image_url,
+                })
+
+        return {
+            'id': content_id,
+            'title': title,
+            'description': xpath_text(metadata, 'abstract'),
+            'duration': int_or_none(xpath_text(metadata, 'duration')),
+            'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py

index 7de7b7273523ea8a43a6d22e8ab684afb4fc5875..256396bb8c21174fdaca4524c0b21d1ef7f802a9 100644 (file)
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -4,14 +4,13 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from .brightcove import BrightcoveLegacyIE
  
  from ..utils import (
      ExtractorError,
      sanitized_Request,
-    smuggle_url,
      std_headers,
      urlencode_postdata,
+    update_url_query,
  )
  
  
@@ -20,28 +19,30 @@ class SafariBaseIE(InfoExtractor):
      _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
      _NETRC_MACHINE = 'safari'
  
-    _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+    _API_BASE = 'https://www.safaribooksonline.com/api/v1'
      _API_FORMAT = 'json'
  
      LOGGED_IN = False
  
      def _real_initialize(self):
-        # We only need to log in once for courses or individual videos
-        if not self.LOGGED_IN:
-            self._login()
-            SafariBaseIE.LOGGED_IN = True
+        self._login()
  
      def _login(self):
+        # We only need to log in once for courses or individual videos
+        if self.LOGGED_IN:
+            return
+
          (username, password) = self._get_login_info()
          if username is None:
-            self.raise_login_required('safaribooksonline.com account is required')
+            return
  
-        headers = std_headers
+        headers = std_headers.copy()
          if 'Referer' not in headers:
              headers['Referer'] = self._LOGIN_URL
+        login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
  
          login_page = self._download_webpage(
-            self._LOGIN_URL, None,
+            login_page_request, None,
              'Downloading login form')
  
          csrf = self._html_search_regex(
@@ -66,6 +67,8 @@ class SafariBaseIE(InfoExtractor):
                  'Login failed; make sure your credentials are correct and try again.',
                  expected=True)
  
+        SafariBaseIE.LOGGED_IN = True
+
          self.to_screen('Login successful')
  
  
@@ -85,13 +88,15 @@ class SafariIE(SafariBaseIE):
  
      _TESTS = [{
          'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
-        'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+        'md5': 'dcc5a425e79f2564148652616af1f2a3',
          'info_dict': {
-            'id': '2842601850001',
+            'id': '0_qbqx90ic',
              'ext': 'mp4',
-            'title': 'Introduction',
+            'title': 'Introduction to Hadoop Fundamentals LiveLessons',
+            'timestamp': 1437758058,
+            'upload_date': '20150724',
+            'uploader_id': 'stork',
          },
-        'skip': 'Requires safaribooksonline account credentials',
      }, {
          'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
          'only_matching': True,
@@ -106,15 +111,30 @@ class SafariIE(SafariBaseIE):
          course_id = mobj.group('course_id')
          part = mobj.group('part')
  
-        webpage = self._download_webpage(
-            '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
-            part)
+        webpage = self._download_webpage(url, '%s/%s' % (course_id, part))
+        reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id')
+        partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id')
+        ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id')
+
+        query = {
+            'wid': '_%s' % partner_id,
+            'uiconf_id': ui_id,
+            'flashvars[referenceId]': reference_id,
+        }
  
-        bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
-        if not bc_url:
-            raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+        if self.LOGGED_IN:
+            kaltura_session = self._download_json(
+                '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
+                course_id, 'Downloading kaltura session JSON',
+                'Unable to download kaltura session JSON', fatal=False)
+            if kaltura_session:
+                session = kaltura_session.get('session')
+                if session:
+                    query['flashvars[ks]'] = session
  
-        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy')
+        return self.url_result(update_url_query(
+            'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query),
+            'Kaltura')
  
  
  class SafariCourseIE(SafariBaseIE):
@@ -140,7 +160,7 @@ class SafariCourseIE(SafariBaseIE):
          course_id = self._match_id(url)
  
          course_json = self._download_json(
-            '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+            '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
              course_id, 'Downloading course JSON')
  
          if 'chapters' not in course_json:
diff --git a/youtube_dl/extractor/sexu.py b/youtube_dl/extractor/sexu.py

index 6365a8779d74e2ac9d82ce83c32c404d51e64b2e..a99b2a8e7be1bc9de8a01d6ae2de6fb36055703c 100644 (file)
--- a/youtube_dl/extractor/sexu.py
+++ b/youtube_dl/extractor/sexu.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  
  
@@ -14,7 +12,7 @@ class SexuIE(InfoExtractor):
              'id': '961791',
              'ext': 'mp4',
              'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
-            'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
+            'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
              'categories': list,  # NSFW
              'thumbnail': 're:https?://.*\.jpg$',
              'age_limit': 18,
@@ -25,13 +23,18 @@ class SexuIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        quality_arr = self._search_regex(
-            r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
+        jwvideo = self._parse_json(
+            self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'),
+            video_id)
+
+        sources = jwvideo['sources']
+
          formats = [{
-            'url': fmt[0].replace('\\', ''),
-            'format_id': fmt[1],
-            'height': int(fmt[1][:3]),
-        } for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
+            'url': source['file'].replace('\\', ''),
+            'format_id': source.get('label'),
+            'height': self._search_regex(
+                r'^(\d+)[pP]', source.get('label', ''), 'height', default=None),
+        } for source in sources if source.get('file')]
          self._sort_formats(formats)
  
          title = self._html_search_regex(
@@ -40,9 +43,7 @@ class SexuIE(InfoExtractor):
          description = self._html_search_meta(
              'description', webpage, 'description')
  
-        thumbnail = self._html_search_regex(
-            r'image:\s*"([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+        thumbnail = jwvideo.get('image')
  
          categories_str = self._html_search_meta(
              'keywords', webpage, 'categories')
diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py

index 399c3b8eedbe57006a07eb187f1a78c4f090e6af..2ab30e45ff7c65ab7dd1d6cff7a1952764799cc0 100644 (file)
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -19,20 +19,25 @@ class SVTBaseIE(InfoExtractor):
          video_info = info['video']
          formats = []
          for vr in video_info['videoReferences']:
+            player_type = vr.get('playerType')
              vurl = vr['url']
              ext = determine_ext(vurl)
              if ext == 'm3u8':
                  formats.extend(self._extract_m3u8_formats(
                      vurl, video_id,
                      ext='mp4', entry_protocol='m3u8_native',
-                    m3u8_id=vr.get('playerType')))
+                    m3u8_id=player_type, fatal=False))
              elif ext == 'f4m':
                  formats.extend(self._extract_f4m_formats(
                      vurl + '?hdcore=3.3.0', video_id,
-                    f4m_id=vr.get('playerType')))
+                    f4m_id=player_type, fatal=False))
+            elif ext == 'mpd':
+                if player_type == 'dashhbbtv':
+                    formats.extend(self._extract_mpd_formats(
+                        vurl, video_id, mpd_id=player_type, fatal=False))
              else:
                  formats.append({
-                    'format_id': vr.get('playerType'),
+                    'format_id': player_type,
                      'url': vurl,
                  })
          self._sort_formats(formats)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index a48d77c309dcd1f9984cd0a6c71b7af574ca5498..cf8851438bb74000abb2692c34607f3137505f1d 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -73,7 +73,7 @@ class TEDIE(InfoExtractor):
          'add_ie': ['Youtube'],
          'info_dict': {
              'id': '_ZG8HBuDjgc',
-            'ext': 'mp4',
+            'ext': 'webm',
              'title': 'Douglas Adams: Parrots the Universe and Everything',
              'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
              'uploader': 'University of California Television (UCTV)',
diff --git a/youtube_dl/extractor/thestar.py b/youtube_dl/extractor/thestar.py

new file mode 100644 (file)

index 0000000..b7e9af2
--- /dev/null
+++ b/youtube_dl/extractor/thestar.py
@@ -0,0 +1,31 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveLegacyIE
+from ..compat import compat_parse_qs
+
+
+class TheStarIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?thestar\.com/(?:[^/]+/)*(?P<id>.+)\.html'
+    _TEST = {
+        'url': 'http://www.thestar.com/life/2016/02/01/mankind-why-this-woman-started-a-men-s-skincare-line.html',
+        'md5': '2c62dd4db2027e35579fefb97a8b6554',
+        'info_dict': {
+            'id': '4732393888001',
+            'ext': 'mp4',
+            'title': 'Mankind: Why this woman started a men\'s skin care line',
+            'description': 'Robert Cribb talks to Young Lee, the founder of Uncle Peter\'s MAN.',
+            'uploader_id': '794267642001',
+            'timestamp': 1454353482,
+            'upload_date': '20160201',
+        }
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+        brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py

index adc05ed5f077594302482257e35efa764e2d1773..17add954338c1f5d70640577f7e75a8afab5c7e3 100644 (file)
--- a/youtube_dl/extractor/tlc.py
+++ b/youtube_dl/extractor/tlc.py
@@ -4,12 +4,12 @@ import re
  
  from .common import InfoExtractor
  from .brightcove import BrightcoveLegacyIE
-from ..compat import compat_urlparse
+from ..compat import compat_parse_qs
  
  
  class TlcDeIE(InfoExtractor):
      IE_NAME = 'tlc.de'
-    _VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
+    _VALID_URL = r'http://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
  
      _TEST = {
          'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
@@ -17,32 +17,23 @@ class TlcDeIE(InfoExtractor):
              'id': '3235167922001',
              'ext': 'mp4',
              'title': 'Breaking Amish: Die Welt da draußen',
-            'uploader': 'Discovery Networks - Germany',
              'description': (
                  'Vier Amische und eine Mennonitin wagen in New York'
                  '  den Sprung in ein komplett anderes Leben. Begleitet sie auf'
                  ' ihrem spannenden Weg.'),
+            'timestamp': 1396598084,
+            'upload_date': '20140404',
+            'uploader_id': '1659832546',
          },
      }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
-        iframe_url = self._search_regex(
-            '<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
-            'iframe url')
-        # Otherwise we don't get the correct 'BrightcoveExperience' element,
-        # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
-        iframe_url = iframe_url.replace('.htm?', '.php?')
-        url_fragment = compat_urlparse.urlparse(url).fragment
-        if url_fragment:
-            # Since the fragment is not send to the server, we always get the same iframe
-            iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
-        iframe = self._download_webpage(iframe_url, title)
-
-        return {
-            '_type': 'url',
-            'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
-            'ie': BrightcoveLegacyIE.ie_key(),
-        }
+        brightcove_id = mobj.group('id')
+        if not brightcove_id:
+            title = mobj.group('title')
+            webpage = self._download_webpage(url, title)
+            brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+            brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/tv3.py b/youtube_dl/extractor/tv3.py

new file mode 100644 (file)

index 0000000..d3f690d
--- /dev/null
+++ b/youtube_dl/extractor/tv3.py
@@ -0,0 +1,33 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TV3IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
+    _TEST = {
+        'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
+        'info_dict': {
+            'id': '4659127992001',
+            'ext': 'mp4',
+            'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
+            'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
+            'uploader_id': '3812193411001',
+            'upload_date': '20151213',
+            'timestamp': 1449975272,
+        },
+        'expected_warnings': [
+            'Failed to download MPD manifest'
+        ],
+        'params': {
+            'skip_download': True,
+        },
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py

index 5b8586097e17a8e17662f3aa62dbd68175605bb8..d4169ec6dce32613243baa9ef553a2683178b7ef 100644 (file)
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -17,6 +17,7 @@ from ..utils import (
      encode_dict,
      ExtractorError,
      int_or_none,
+    orderedSet,
      parse_duration,
      parse_iso8601,
      sanitized_Request,
@@ -251,6 +252,7 @@ class TwitchVodIE(TwitchItemBaseIE):
                  self._USHER_BASE, item_id,
                  compat_urllib_parse.urlencode({
                      'allow_source': 'true',
+                    'allow_audio_only': 'true',
                      'allow_spectre': 'true',
                      'player': 'twitchweb',
                      'nauth': access_token['token'],
@@ -281,17 +283,37 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
          entries = []
          offset = 0
          limit = self._PAGE_LIMIT
+        broken_paging_detected = False
+        counter_override = None
          for counter in itertools.count(1):
              response = self._download_json(
                  self._PLAYLIST_URL % (channel_id, offset, limit),
-                channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
+                channel_id,
+                'Downloading %s videos JSON page %s'
+                % (self._PLAYLIST_TYPE, counter_override or counter))
              page_entries = self._extract_playlist_page(response)
              if not page_entries:
                  break
+            total = int_or_none(response.get('_total'))
+            # Since the beginning of March 2016 twitch's paging mechanism
+            # is completely broken on the twitch side. It simply ignores
+            # a limit and returns the whole offset number of videos.
+            # Working around by just requesting all videos at once.
+            # Upd: pagination bug was fixed by twitch on 15.03.2016.
+            if not broken_paging_detected and total and len(page_entries) > limit:
+                self.report_warning(
+                    'Twitch pagination is broken on twitch side, requesting all videos at once',
+                    channel_id)
+                broken_paging_detected = True
+                offset = total
+                counter_override = '(all at once)'
+                continue
              entries.extend(page_entries)
+            if broken_paging_detected or total and len(page_entries) >= total:
+                break
              offset += limit
          return self.playlist_result(
-            [self.url_result(entry) for entry in set(entries)],
+            [self.url_result(entry) for entry in orderedSet(entries)],
              channel_id, channel_name)
  
      def _extract_playlist_page(self, response):
@@ -303,7 +325,6 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
  
  
  class TwitchProfileIE(TwitchPlaylistBaseIE):
-    _WORKING = False
      IE_NAME = 'twitch:profile'
      _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
      _PLAYLIST_TYPE = 'profile'
@@ -319,7 +340,6 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
  
  
  class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
-    _WORKING = False
      IE_NAME = 'twitch:past_broadcasts'
      _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
      _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true'
@@ -336,7 +356,6 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
  
  
  class TwitchBookmarksIE(TwitchPlaylistBaseIE):
-    _WORKING = False
      IE_NAME = 'twitch:bookmarks'
      _VALID_URL = r'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
      _PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
@@ -414,6 +433,7 @@ class TwitchStreamIE(TwitchBaseIE):
  
          query = {
              'allow_source': 'true',
+            'allow_audio_only': 'true',
              'p': random.randint(1000000, 10000000),
              'player': 'twitchweb',
              'segment_preference': '4',
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py

index 67762a003075c71c8067bd763772642d77888d93..e70b2ab3c8d564cd907e8763fd8136e1b3827ac5 100644 (file)
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -102,6 +102,14 @@ class TwitterCardIE(TwitterBaseIE):
              r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
              video_id)
  
+        def _search_dimensions_in_video_url(a_format, video_url):
+            m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
+            if m:
+                a_format.update({
+                    'width': int(m.group('width')),
+                    'height': int(m.group('height')),
+                })
+
          playlist = config.get('playlist')
          if playlist:
              video_url = playlist[0]['source']
@@ -110,12 +118,8 @@ class TwitterCardIE(TwitterBaseIE):
                  'url': video_url,
              }
  
-            m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
-            if m:
-                f.update({
-                    'width': int(m.group('width')),
-                    'height': int(m.group('height')),
-                })
+            _search_dimensions_in_video_url(f, video_url)
+
              formats.append(f)
  
          vmap_url = config.get('vmapUrl') or config.get('vmap_url')
@@ -148,6 +152,8 @@ class TwitterCardIE(TwitterBaseIE):
                      if not a_format['vbr']:
                          del a_format['vbr']
  
+                    _search_dimensions_in_video_url(a_format, media_url)
+
                      formats.append(a_format)
  
              duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
diff --git a/youtube_dl/extractor/usatoday.py b/youtube_dl/extractor/usatoday.py

new file mode 100644 (file)

index 0000000..e5678dc
--- /dev/null
+++ b/youtube_dl/extractor/usatoday.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    get_element_by_attribute,
+    parse_duration,
+    update_url_query,
+    ExtractorError,
+)
+from ..compat import compat_str
+
+
+class USATodayIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)'
+    _TEST = {
+        'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/',
+        'md5': '4d40974481fa3475f8bccfd20c5361f8',
+        'info_dict': {
+            'id': '81729424',
+            'ext': 'mp4',
+            'title': 'US, France warn Syrian regime ahead of new peace talks',
+            'timestamp': 1457891045,
+            'description': 'md5:7e50464fdf2126b0f533748d3c78d58f',
+            'uploader_id': '29906170001',
+            'upload_date': '20160313',
+        }
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/29906170001/38a9eecc-bdd8-42a3-ba14-95397e48b3f8_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(update_url_query(url, {'ajax': 'true'}), display_id)
+        ui_video_data = get_element_by_attribute('class', 'ui-video-data', webpage)
+        if not ui_video_data:
+            raise ExtractorError('no video on the webpage', expected=True)
+        video_data = self._parse_json(ui_video_data, display_id)
+
+        return {
+            '_type': 'url_transparent',
+            'url': self.BRIGHTCOVE_URL_TEMPLATE % video_data['brightcove_id'],
+            'id': compat_str(video_data['id']),
+            'title': video_data['title'],
+            'thumbnail': video_data.get('thumbnail'),
+            'description': video_data.get('description'),
+            'duration': parse_duration(video_data.get('length')),
+            'ie_key': 'BrightcoveNew',
+        }
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py

index 14e945d494cd2f6e5f3b3e6a03ff6ebb076826dd..e148b1ef513321376efe1795056503ea2a8bcad8 100644 (file)
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -20,6 +20,7 @@ class VGTVIE(XstreamIE):
          'aftenbladet.no/tv': 'satv',
          'fvn.no/fvntv': 'fvntv',
          'aftenposten.no/webtv': 'aptv',
+        'ap.vgtv.no/webtv': 'aptv',
      }
  
      _APP_NAME_TO_VENDOR = {
@@ -35,7 +36,7 @@ class VGTVIE(XstreamIE):
                      (?P<host>
                          %s
                      )
-                    /
+                    /?
                      (?:
                          \#!/(?:video|live)/|
                          embed?.*id=
@@ -107,19 +108,27 @@ class VGTVIE(XstreamIE):
              'md5': 'fd828cd29774a729bf4d4425fe192972',
              'info_dict': {
                  'id': '21039',
-                'ext': 'mov',
+                'ext': 'mp4',
                  'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
                  'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
                  'duration': 66,
                  'timestamp': 1417002452,
                  'upload_date': '20141126',
                  'view_count': int,
-            }
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
          },
          {
              'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
              'only_matching': True,
          },
+        {
+            'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
+            'only_matching': True,
+        },
      ]
  
      def _real_extract(self, url):
@@ -144,8 +153,6 @@ class VGTVIE(XstreamIE):
          if len(video_id) == 5:
              if appname == 'bttv':
                  info = self._extract_video_info('btno', video_id)
-            elif appname == 'aptv':
-                info = self._extract_video_info('ap', video_id)
  
          streams = data['streamUrls']
          stream_type = data.get('streamType')
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py

index 3db6286e48c1402ff210d8f4cc666dac1a918a86..46c785ae183d72207ab12500618f3eb7b765373d 100644 (file)
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -1,31 +1,37 @@
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from .ooyala import OoyalaIE
  from ..utils import ExtractorError
  
  
  class ViceIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)+(?P<id>.+)'
-
-    _TESTS = [
-        {
-            'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
-            'info_dict': {
-                'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
-                'ext': 'mp4',
-                'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
-                'duration': 725.983,
-            },
-            'params': {
-                # Requires ffmpeg (m3u8 manifest)
-                'skip_download': True,
-            },
-        }, {
-            'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
-            'only_matching': True,
-        }
-    ]
+    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
+        'info_dict': {
+            'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
+            'ext': 'mp4',
+            'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+            'duration': 725.983,
+        },
+        'params': {
+            # Requires ffmpeg (m3u8 manifest)
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229',
+        'only_matching': True,
+    }, {
+        'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
@@ -38,3 +44,35 @@ class ViceIE(InfoExtractor):
          except ExtractorError:
              raise ExtractorError('The page doesn\'t contain a video', expected=True)
          return self.url_result(ooyala_url, ie='Ooyala')
+
+
+class ViceShowIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
+
+    _TEST = {
+        'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
+        'info_dict': {
+            'id': 'fuck-thats-delicious-2',
+            'title': "Fuck, That's Delicious",
+            'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
+        },
+        'playlist_count': 17,
+    }
+
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
+        webpage = self._download_webpage(url, show_id)
+
+        entries = [
+            self.url_result(video_url, ViceIE.ie_key())
+            for video_url, _ in re.findall(
+                r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
+                % ViceIE._VALID_URL, webpage)]
+
+        title = self._search_regex(
+            r'<title>(.+?)</title>', webpage, 'title', default=None)
+        if title:
+            title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
+        description = self._html_search_meta('description', webpage, 'description')
+
+        return self.playlist_result(entries, show_id, title, description)
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

index 433fc9914a1d59fbc6743e8e82815b429a695cc5..e04b814c8cf27755bfe0a86af3d5bf43262bd0da 100644 (file)
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -176,13 +176,13 @@ class VikiIE(VikiBaseIE):
      }, {
          # youtube external
          'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
-        'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
+        'md5': '63f8600c1da6f01b7640eee7eca4f1da',
          'info_dict': {
              'id': '50562v',
-            'ext': 'mp4',
+            'ext': 'webm',
              'title': 'Poor Nastya [COMPLETE] - Episode 1',
              'description': '',
-            'duration': 607,
+            'duration': 606,
              'timestamp': 1274949505,
              'upload_date': '20101213',
              'uploader': 'ad14065n',
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index 9f282a1da68ac9889f9cfe667a1a8bc7b8b3a71f..71c30d2cde54f11802f1e187160ae48c0ea88423 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -73,15 +73,26 @@ class VimeoIE(VimeoBaseInfoExtractor):
  
      # _VALID_URL matches Vimeo URLs
      _VALID_URL = r'''(?x)
-        https?://
-        (?:(?:www|(?P<player>player))\.)?
-        vimeo(?P<pro>pro)?\.com/
-        (?!channels/[^/?#]+/?(?:$|[?#])|album/)
-        (?:.*?/)?
-        (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
-        (?:videos?/)?
-        (?P<id>[0-9]+)
-        /?(?:[?&].*)?(?:[#].*)?$'''
+                    https?://
+                        (?:
+                            (?:
+                                www|
+                                (?P<player>player)
+                            )
+                            \.
+                        )?
+                        vimeo(?P<pro>pro)?\.com/
+                        (?!channels/[^/?#]+/?(?:$|[?#])|(?:album|ondemand)/)
+                        (?:.*?/)?
+                        (?:
+                            (?:
+                                play_redirect_hls|
+                                moogaloop\.swf)\?clip_id=
+                            )?
+                        (?:videos?/)?
+                        (?P<id>[0-9]+)
+                        /?(?:[?&].*)?(?:[#].*)?$
+                    '''
      IE_NAME = 'vimeo'
      _TESTS = [
          {
@@ -277,9 +288,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
  
      def _real_extract(self, url):
          url, data = unsmuggle_url(url, {})
-        headers = std_headers
+        headers = std_headers.copy()
          if 'http_headers' in data:
-            headers = headers.copy()
              headers.update(data['http_headers'])
          if 'Referer' not in headers:
              headers['Referer'] = url
@@ -294,7 +304,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
              url = 'https://vimeo.com/' + video_id
  
          # Retrieve video webpage to extract further information
-        request = sanitized_Request(url, None, headers)
+        request = sanitized_Request(url, headers=headers)
          try:
              webpage = self._download_webpage(request, video_id)
          except ExtractorError as ee:
@@ -498,6 +508,38 @@ class VimeoIE(VimeoBaseInfoExtractor):
          }
  
  
+class VimeoOndemandIE(VimeoBaseInfoExtractor):
+    IE_NAME = 'vimeo:ondemand'
+    _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        # ondemand video not available via https://vimeo.com/id
+        'url': 'https://vimeo.com/ondemand/20704',
+        'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
+        'info_dict': {
+            'id': '105442900',
+            'ext': 'mp4',
+            'title': 'המעבדה - במאי יותם פלדמן',
+            'uploader': 'גם סרטים',
+            'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms',
+            'uploader_id': 'gumfilms',
+        },
+    }, {
+        'url': 'https://vimeo.com/ondemand/nazmaalik',
+        'only_matching': True,
+    }, {
+        'url': 'https://vimeo.com/ondemand/141692381',
+        'only_matching': True,
+    }, {
+        'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        return self.url_result(self._og_search_video_url(webpage), VimeoIE.ie_key())
+
+
  class VimeoChannelIE(VimeoBaseInfoExtractor):
      IE_NAME = 'vimeo:channel'
      _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

index 670a438afff624b5c9f09d0b79fb34fc4e237a7a..d560a4b5e219c2d62cff17da8e47c3cfbb5f87ba 100644 (file)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -142,10 +142,10 @@ class VKIE(InfoExtractor):
              'url': 'https://vk.com/video276849682_170681728',
              'info_dict': {
                  'id': 'V3K4mi0SYkc',
-                'ext': 'mp4',
+                'ext': 'webm',
                  'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
                  'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
-                'duration': 179,
+                'duration': 178,
                  'upload_date': '20130116',
                  'uploader': "Children's Joy Foundation",
                  'uploader_id': 'thecjf',
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py

index 041ff6c555123d44c97bc63810d2aa7903ec069e..fb0accac744532625c04bb964c1fa031723ed8ff 100644 (file)
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -20,7 +20,7 @@ class WimpIE(InfoExtractor):
          'md5': '4e2986c793694b55b37cf92521d12bb4',
          'info_dict': {
              'id': 'clowncar',
-            'ext': 'mp4',
+            'ext': 'webm',
              'title': 'It\'s like a clown car.',
              'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
          },
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py

index d3cc1a29fa473fee2f58e91323774633be00fc4b..e699e663f60818b090bb6bf0ccdf24802c3c14c4 100644 (file)
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -10,13 +10,27 @@ from ..compat import (
      compat_urllib_parse,
  )
  from ..utils import (
+    ExtractorError,
      int_or_none,
      float_or_none,
      sanitized_Request,
  )
  
  
-class YandexMusicTrackIE(InfoExtractor):
+class YandexMusicBaseIE(InfoExtractor):
+    @staticmethod
+    def _handle_error(response):
+        error = response.get('error')
+        if error:
+            raise ExtractorError(error, expected=True)
+
+    def _download_json(self, *args, **kwargs):
+        response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
+        self._handle_error(response)
+        return response
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
      IE_NAME = 'yandexmusic:track'
      IE_DESC = 'Яндекс.Музыка - Трек'
      _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
@@ -73,7 +87,7 @@ class YandexMusicTrackIE(InfoExtractor):
          return self._get_track_info(track)
  
  
-class YandexMusicPlaylistBaseIE(InfoExtractor):
+class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
      def _build_playlist(self, tracks):
          return [
              self.url_result(
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py

index b29baafc441c220b4128c9363f341f7159b8df93..1124fe6c280cb0e23bee3a41ea323165ec714dce 100644 (file)
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -75,7 +75,7 @@ class YouPornIE(InfoExtractor):
          links = []
  
          sources = self._search_regex(
-            r'sources\s*:\s*({.+?})', webpage, 'sources', default=None)
+            r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
          if sources:
              for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
                  links.append(link)
@@ -101,8 +101,9 @@ class YouPornIE(InfoExtractor):
              }
              # Video URL's path looks like this:
              #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
              # We will benefit from it by extracting some metadata
-            mobj = re.search(r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
              if mobj:
                  height = int(mobj.group('height'))
                  bitrate = int(mobj.group('bitrate'))
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py

index 0d8ef6ca26c6ef7f1b7b402b387d20eebd3f8a8f..3ea5183999d5ed2adacbb05bccc6af93e8ac6750 100644 (file)
--- a/youtube_dl/postprocessor/__init__.py
+++ b/youtube_dl/postprocessor/__init__.py
@@ -6,6 +6,7 @@ from .ffmpeg import (
      FFmpegEmbedSubtitlePP,
      FFmpegExtractAudioPP,
      FFmpegFixupStretchedPP,
+    FFmpegFixupM3u8PP,
      FFmpegFixupM4aPP,
      FFmpegMergerPP,
      FFmpegMetadataPP,
@@ -26,6 +27,7 @@ __all__ = [
      'ExecAfterDownloadPP',
      'FFmpegEmbedSubtitlePP',
      'FFmpegExtractAudioPP',
+    'FFmpegFixupM3u8PP',
      'FFmpegFixupM4aPP',
      'FFmpegFixupStretchedPP',
      'FFmpegMergerPP',
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py

index 380bc6f292f2390fdf5dabd131a20184ba8334df..a8819f258013de5a1cbbf1a5b42ca87b2b4ef14e 100644 (file)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -25,6 +25,19 @@ from ..utils import (
  )
  
  
+EXT_TO_OUT_FORMATS = {
+    "aac": "adts",
+    "m4a": "ipod",
+    "mka": "matroska",
+    "mkv": "matroska",
+    "mpg": "mpeg",
+    "ogv": "ogg",
+    "ts": "mpegts",
+    "wma": "asf",
+    "wmv": "asf",
+}
+
+
  class FFmpegPostProcessorError(PostProcessingError):
      pass
  
@@ -391,10 +404,6 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
          for (name, value) in metadata.items():
              options.extend(['-metadata', '%s=%s' % (name, value)])
  
-        # https://github.com/rg3/youtube-dl/issues/8350
-        if info.get('protocol') == 'm3u8_native' or info.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
-            options.extend(['-bsf:a', 'aac_adtstoasc'])
-
          self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
          self.run_ffmpeg(filename, temp_filename, options)
          os.remove(encodeFilename(filename))
@@ -467,6 +476,21 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
          return [], info
  
  
+class FFmpegFixupM3u8PP(FFmpegPostProcessor):
+    def run(self, info):
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+        self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return [], info
+
+
  class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
      def __init__(self, downloader=None, format=None):
          super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py

index 480d48d057400fafb0acdfc8492fca31b1d2f674..e39ca60aa08326b6f05814ff800bb09c75755e48 100644 (file)
--- a/youtube_dl/postprocessor/xattrpp.py
+++ b/youtube_dl/postprocessor/xattrpp.py
@@ -6,6 +6,7 @@ import sys
  import errno
  
  from .common import PostProcessor
+from ..compat import compat_os_name
  from ..utils import (
      check_executable,
      hyphenate_date,
@@ -73,7 +74,7 @@ class XAttrMetadataPP(PostProcessor):
                      raise XAttrMetadataError(e.errno, e.strerror)
  
          except ImportError:
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                  # Write xattrs to NTFS Alternate Data Streams:
                  # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
                  def write_xattr(path, key, value):
@@ -168,7 +169,7 @@ class XAttrMetadataPP(PostProcessor):
                      'Unable to write extended attributes due to too long values.')
              else:
                  msg = 'This filesystem doesn\'t support extended attributes. '
-                if os.name == 'nt':
+                if compat_os_name == 'nt':
                      msg += 'You need to use NTFS.'
                  else:
                      msg += '(You may have to enable them in your /etc/fstab)'
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index a0234a3a8eadeb5855cbd622758a58ba1a7e8aee..ec186918cd8672ada2da2d5521e0ba8b22eb273d 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -495,6 +495,10 @@ def encodeFilename(s, for_subprocess=False):
      if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
          return s
  
+    # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+    if sys.platform.startswith('java'):
+        return s
+
      return s.encode(get_subprocess_encoding(), 'ignore')
  
  
@@ -1245,13 +1249,23 @@ if sys.platform == 'win32':
              raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
  
  else:
-    import fcntl
+    # Some platforms, such as Jython, is missing fcntl
+    try:
+        import fcntl
  
-    def _lock_file(f, exclusive):
-        fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+        def _lock_file(f, exclusive):
+            fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
  
-    def _unlock_file(f):
-        fcntl.flock(f, fcntl.LOCK_UN)
+        def _unlock_file(f):
+            fcntl.flock(f, fcntl.LOCK_UN)
+    except ImportError:
+        UNSUPPORTED_MSG = 'file locking is not supported on this platform'
+
+        def _lock_file(f, exclusive):
+            raise IOError(UNSUPPORTED_MSG)
+
+        def _unlock_file(f):
+            raise IOError(UNSUPPORTED_MSG)
  
  
  class locked_file(object):
@@ -1332,6 +1346,17 @@ def format_bytes(bytes):
      return '%.2f%s' % (converted, suffix)
  
  
+def lookup_unit_table(unit_table, s):
+    units_re = '|'.join(re.escape(u) for u in unit_table)
+    m = re.match(
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+    if not m:
+        return None
+    num_str = m.group('num').replace(',', '.')
+    mult = unit_table[m.group('unit')]
+    return int(float(num_str) * mult)
+
+
  def parse_filesize(s):
      if s is None:
          return None
@@ -1375,15 +1400,28 @@ def parse_filesize(s):
          'Yb': 1000 ** 8,
      }
  
-    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
-    m = re.match(
-        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
-    if not m:
+    return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_count(s):
+    if s is None:
          return None
  
-    num_str = m.group('num').replace(',', '.')
-    mult = _UNIT_TABLE[m.group('unit')]
-    return int(float(num_str) * mult)
+    s = s.strip()
+
+    if re.match(r'^[\d,.]+$', s):
+        return str_to_int(s)
+
+    _UNIT_TABLE = {
+        'k': 1000,
+        'K': 1000,
+        'm': 1000 ** 2,
+        'M': 1000 ** 2,
+        'kk': 1000 ** 2,
+        'KK': 1000 ** 2,
+    }
+
+    return lookup_unit_table(_UNIT_TABLE, s)
  
  
  def month_by_name(name):
@@ -1415,6 +1453,12 @@ def fix_xml_ampersands(xml_str):
  
  def setproctitle(title):
      assert isinstance(title, compat_str)
+
+    # ctypes in Jython is not complete
+    # http://bugs.jython.org/issue2148
+    if sys.platform.startswith('java'):
+        return
+
      try:
          libc = ctypes.cdll.LoadLibrary('libc.so.6')
      except OSError:
@@ -1749,6 +1793,15 @@ def urlencode_postdata(*args, **kargs):
      return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
  
  
+def update_url_query(url, query):
+    parsed_url = compat_urlparse.urlparse(url)
+    qs = compat_parse_qs(parsed_url.query)
+    qs.update(query)
+    qs = encode_dict(qs)
+    return compat_urlparse.urlunparse(parsed_url._replace(
+        query=compat_urllib_parse.urlencode(qs, True)))
+
+
  def encode_dict(d, encoding='utf-8'):
      def encode(v):
          return v.encode(encoding) if isinstance(v, compat_basestring) else v
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index adafd601b2178897e069627b6de7e744d6adf3ce..9216fa54791ebcead4388e0b9f5212ebcc9cd9bb 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2016.03.01'
+__version__ = '2016.03.14'
author	remitamine <remitamine@gmail.com>
	Wed, 16 Mar 2016 12:16:27 +0000 (13:16 +0100)
committer	remitamine <remitamine@gmail.com>
	Wed, 16 Mar 2016 12:16:27 +0000 (13:16 +0100)
.gitignore		patch \| blob \| history
AUTHORS		patch \| blob \| history
Makefile		patch \| blob \| history
README.md		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
test/helper.py		patch \| blob \| history
test/test_YoutubeDL.py		patch \| blob \| history
test/test_http.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/compat.py		patch \| blob \| history
youtube_dl/downloader/__init__.py		patch \| blob \| history
youtube_dl/downloader/common.py		patch \| blob \| history
youtube_dl/downloader/external.py		patch \| blob \| history
youtube_dl/downloader/fragment.py		patch \| blob \| history
youtube_dl/downloader/hls.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/aljazeera.py		patch \| blob \| history
youtube_dl/extractor/aol.py		patch \| blob \| history
youtube_dl/extractor/arte.py		patch \| blob \| history
youtube_dl/extractor/audimedia.py		patch \| blob \| history
youtube_dl/extractor/audioboom.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/bbc.py		patch \| blob \| history
youtube_dl/extractor/bleacherreport.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/cinemassacre.py		patch \| blob \| history
youtube_dl/extractor/cnet.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/douyutv.py		patch \| blob \| history
youtube_dl/extractor/dw.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/elpais.py		patch \| blob \| history
youtube_dl/extractor/engadget.py		patch \| blob \| history
youtube_dl/extractor/facebook.py		patch \| blob \| history
youtube_dl/extractor/fivemin.py		patch \| blob \| history
youtube_dl/extractor/foxnews.py		patch \| blob \| history
youtube_dl/extractor/freespeech.py		patch \| blob \| history
youtube_dl/extractor/gameinformer.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/googledrive.py		patch \| blob \| history
youtube_dl/extractor/indavideo.py		patch \| blob \| history
youtube_dl/extractor/iqiyi.py		patch \| blob \| history
youtube_dl/extractor/jeuxvideo.py		patch \| blob \| history
youtube_dl/extractor/kaltura.py		patch \| blob \| history
youtube_dl/extractor/khanacademy.py		patch \| blob \| history
youtube_dl/extractor/kusi.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/kuwo.py		patch \| blob \| history
youtube_dl/extractor/leeco.py		patch \| blob \| history
youtube_dl/extractor/livestream.py		patch \| blob \| history
youtube_dl/extractor/makerschannel.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/minoto.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/mit.py		patch \| blob \| history
youtube_dl/extractor/mixcloud.py		patch \| blob \| history
youtube_dl/extractor/noz.py		patch \| blob \| history
youtube_dl/extractor/pyvideo.py		patch \| blob \| history
youtube_dl/extractor/revision3.py		patch \| blob \| history
youtube_dl/extractor/rice.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/safari.py		patch \| blob \| history
youtube_dl/extractor/sexu.py		patch \| blob \| history
youtube_dl/extractor/svt.py		patch \| blob \| history
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/thestar.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tlc.py		patch \| blob \| history
youtube_dl/extractor/tv3.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/twitch.py		patch \| blob \| history
youtube_dl/extractor/twitter.py		patch \| blob \| history
youtube_dl/extractor/usatoday.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vgtv.py		patch \| blob \| history
youtube_dl/extractor/vice.py		patch \| blob \| history
youtube_dl/extractor/viki.py		patch \| blob \| history
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/vk.py		patch \| blob \| history
youtube_dl/extractor/wimp.py		patch \| blob \| history
youtube_dl/extractor/yandexmusic.py		patch \| blob \| history
youtube_dl/extractor/youporn.py		patch \| blob \| history
youtube_dl/postprocessor/__init__.py		patch \| blob \| history
youtube_dl/postprocessor/ffmpeg.py		patch \| blob \| history
youtube_dl/postprocessor/xattrpp.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history