Merge remote-tracking branch 'rzhxeo/rtmpdump'

author Philipp Hagemeister <phihag@phihag.de>

Mon, 25 Nov 2013 05:16:18 +0000 (06:16 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Mon, 25 Nov 2013 05:16:18 +0000 (06:16 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Mon, 25 Nov 2013 05:16:18 +0000 (06:16 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Mon, 25 Nov 2013 05:16:18 +0000 (06:16 +0100)
diff --git a/README.md b/README.md

index 6632e5865585fa45eed03b25121b4e4528628fbc..580b1600446d727b20eaa850ca7ec60535626a31 100644 (file)
--- a/README.md
+++ b/README.md
@@ -123,6 +123,7 @@ which means you can modify it, redistribute it or use it however you like.
      --get-description          simulate, quiet but print video description
      --get-filename             simulate, quiet but print output filename
      --get-format               simulate, quiet but print output format
+    -j, --dump-json            simulate, quiet but print JSON information
      --newline                  output progress bar as new lines
      --no-progress              do not print progress bar
      --console-title            display progress in console titlebar
diff --git a/setup.py b/setup.py

index aa7cfca0862b1f4ba2cfd220fd570ca63bcfda7e..8e24fe67918eeefa2f3f8b445ccfb480b8c841a8 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -48,7 +48,7 @@ else:
          'data_files': [  # Installing system-wide would require sudo...
              ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
              ('share/doc/youtube_dl', ['README.txt']),
-            ('share/man/man1/', ['youtube-dl.1'])
+            ('share/man/man1', ['youtube-dl.1'])
          ]
      }
      if setuptools_available:
diff --git a/test/helper.py b/test/helper.py

index d7bf7a82802e58f0a80d788de83146d3a9d3fadf..b1f421ac58331bad23328502f42a0e1316df853d 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL
  from youtube_dl.utils import preferredencoding
  
  
-def global_setup():
-    youtube_dl._setup_opener(timeout=10)
-
-
  def get_params(override=None):
      PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                     "parameters.json")
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py

index d500c6edceb6018510b9226d925d9f407b72fcbd..c9cdb96cb30578d58724ddadb4328ad790316a39 100644 (file)
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -6,8 +6,7 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-from test.helper import global_setup, try_rm
-global_setup()
+from test.helper import try_rm
  
  
  from youtube_dl import YoutubeDL
@@ -24,7 +23,7 @@ def _download_restricted(url, filename, age):
      }
      ydl = YoutubeDL(params)
      ydl.add_default_info_extractors()
-    json_filename = filename + '.info.json'
+    json_filename = os.path.splitext(filename)[0] + '.info.json'
      try_rm(json_filename)
      ydl.download([url])
      res = os.path.exists(json_filename)
diff --git a/test/test_all_urls.py b/test/test_all_urls.py

index 56e5f80e1f6ddb17fef3ee5c499c238996c12051..1f1adb6b46e0fa2e8a683e6593f699476397a0cd 100644 (file)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -100,10 +100,11 @@ class TestAllURLsMatching(unittest.TestCase):
      def test_keywords(self):
          self.assertMatch(':ytsubs', ['youtube:subscriptions'])
          self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
-        self.assertMatch(':thedailyshow', ['ComedyCentral'])
-        self.assertMatch(':tds', ['ComedyCentral'])
-        self.assertMatch(':colbertreport', ['ComedyCentral'])
-        self.assertMatch(':cr', ['ComedyCentral'])
+        self.assertMatch(':ythistory', ['youtube:history'])
+        self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
+        self.assertMatch(':tds', ['ComedyCentralShows'])
+        self.assertMatch(':colbertreport', ['ComedyCentralShows'])
+        self.assertMatch(':cr', ['ComedyCentralShows'])
  
  
  if __name__ == '__main__':
diff --git a/test/test_download.py b/test/test_download.py

index 16f2008094fca751f69ee01a38f16a6fa0c3fb53..dd5818dba91c166936e45f1c7d8779c752fa3b86 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  from test.helper import (
      get_params,
      get_testcases,
-    global_setup,
      try_rm,
      md5,
      report_warning
  )
-global_setup()
  
  
  import hashlib
@@ -103,7 +101,7 @@ def generator(test_case):
                  tc_filename = get_tc_filename(tc)
                  try_rm(tc_filename)
                  try_rm(tc_filename + '.part')
-                try_rm(tc_filename + '.info.json')
+                try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
          try_rm_tcs_files()
          try:
              try_num = 1
@@ -130,11 +128,12 @@ def generator(test_case):
                  if not test_case.get('params', {}).get('skip_download', False):
                      self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
                      self.assertTrue(tc_filename in finished_hook_called)
-                self.assertTrue(os.path.exists(tc_filename + '.info.json'))
+                info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
+                self.assertTrue(os.path.exists(info_json_fn))
                  if 'md5' in tc:
                      md5_for_file = _file_md5(tc_filename)
                      self.assertEqual(md5_for_file, tc['md5'])
-                with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:
+                with io.open(info_json_fn, encoding='utf-8') as infof:
                      info_dict = json.load(infof)
                  for (info_field, expected) in tc.get('info_dict', {}).items():
                      if isinstance(expected, compat_str) and expected.startswith('md5:'):
diff --git a/test/test_playlists.py b/test/test_playlists.py

index 706b6bdca1399284263106b755fdf9278c5d17d5..167801ae246087aae4c7068cb11b84245e560649 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -8,8 +8,7 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-from test.helper import FakeYDL, global_setup
-global_setup()
+from test.helper import FakeYDL
  
  
  from youtube_dl.extractor import (
@@ -22,6 +21,7 @@ from youtube_dl.extractor import (
      LivestreamIE,
      NHLVideocenterIE,
      BambuserChannelIE,
+    BandcampAlbumIE
  )
  
  
@@ -101,7 +101,15 @@ class TestPlaylists(unittest.TestCase):
          result = ie.extract('http://bambuser.com/channel/pixelversity')
          self.assertIsPlaylist(result)
          self.assertEqual(result['title'], u'pixelversity')
-        self.assertTrue(len(result['entries']) >= 66)
+        self.assertTrue(len(result['entries']) >= 60)
+
+    def test_bandcamp_album(self):
+        dl = FakeYDL()
+        ie = BandcampAlbumIE(dl)
+        result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'Nightmare Night EP')
+        self.assertTrue(len(result['entries']) >= 4)
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_subtitles.py b/test/test_subtitles.py

index 06a304879d122b743f62703f08cdcf00301b0d39..94a1f771d19df85063365cdcb47d5799a55f57e1 100644 (file)
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -6,8 +6,7 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-from test.helper import FakeYDL, global_setup, md5
-global_setup()
+from test.helper import FakeYDL, md5
  
  
  from youtube_dl.extractor import (
diff --git a/test/test_utils.py b/test/test_utils.py

index f3fbff042ccc8193d8d08527fdc04421c9832305..e9e590e749f131a0950c79bcf4fee1e9fb9004c2 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -24,6 +24,8 @@ from youtube_dl.utils import (
      xpath_with_ns,
      smuggle_url,
      unsmuggle_url,
+    shell_quote,
+    encodeFilename,
  )
  
  if sys.version_info < (3, 0):
@@ -170,6 +172,10 @@ class TestUtil(unittest.TestCase):
          self.assertEqual(res_url, url)
          self.assertEqual(res_data, None)
  
+    def test_shell_quote(self):
+        args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
+        self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
+
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py

index 35defb8953402a74ff71b7a9a14cec105a5f1703..eac53b285ab6740b368f278784aced9625abb9a6 100644 (file)
--- a/test/test_write_annotations.py
+++ b/test/test_write_annotations.py
@@ -7,8 +7,7 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-from test.helper import get_params, global_setup, try_rm
-global_setup()
+from test.helper import get_params, try_rm
  
  
  import io
diff --git a/test/test_write_info_json.py b/test/test_write_info_json.py

index a5b6f6972df48f6b7cdcfebc3ea32d11c6a27afa..d7177611b5e1a90aa3bdf612ae873336ff44d686 100644 (file)
--- a/test/test_write_info_json.py
+++ b/test/test_write_info_json.py
@@ -7,8 +7,7 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-from test.helper import get_params, global_setup
-global_setup()
+from test.helper import get_params
  
  
  import io
@@ -31,7 +30,7 @@ params = get_params({
  
  
  TEST_ID = 'BaW_jenozKc'
-INFO_JSON_FILE = TEST_ID + '.mp4.info.json'
+INFO_JSON_FILE = TEST_ID + '.info.json'
  DESCRIPTION_FILE = TEST_ID + '.mp4.description'
  EXPECTED_DESCRIPTION = u'''test chars:  "'/\ä↭𝕐
  
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py

index 4b7a7847bd3a33a9a2bff3e99f9f4cff0de7eebf..8fd073f3144b0c3f39cd1d3d9dbd518a540773c3 100644 (file)
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -6,8 +6,7 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-from test.helper import FakeYDL, global_setup
-global_setup()
+from test.helper import FakeYDL
  
  
  from youtube_dl.extractor import (
@@ -27,7 +26,7 @@ class TestYoutubeLists(unittest.TestCase):
      def test_youtube_playlist(self):
          dl = FakeYDL()
          ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
+        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
          self.assertIsPlaylist(result)
          self.assertEqual(result['title'], 'ytdl test PL')
          ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
@@ -44,13 +43,13 @@ class TestYoutubeLists(unittest.TestCase):
      def test_issue_673(self):
          dl = FakeYDL()
          ie = YoutubePlaylistIE(dl)
-        result = ie.extract('PLBB231211A4F62143')[0]
+        result = ie.extract('PLBB231211A4F62143')
          self.assertTrue(len(result['entries']) > 25)
  
      def test_youtube_playlist_long(self):
          dl = FakeYDL()
          ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
+        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
          self.assertIsPlaylist(result)
          self.assertTrue(len(result['entries']) >= 799)
  
@@ -58,7 +57,7 @@ class TestYoutubeLists(unittest.TestCase):
          #651
          dl = FakeYDL()
          ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
+        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
          ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
          self.assertFalse('pElCt5oNDuI' in ytie_results)
          self.assertFalse('KdPEApIVdWM' in ytie_results)
@@ -66,7 +65,7 @@ class TestYoutubeLists(unittest.TestCase):
      def test_youtube_playlist_empty(self):
          dl = FakeYDL()
          ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
+        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
          self.assertIsPlaylist(result)
          self.assertEqual(len(result['entries']), 0)
  
@@ -74,7 +73,7 @@ class TestYoutubeLists(unittest.TestCase):
          dl = FakeYDL()
          ie = YoutubePlaylistIE(dl)
          # TODO find a > 100 (paginating?) videos course
-        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
+        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
          entries = result['entries']
          self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
          self.assertEqual(len(entries), 25)
@@ -84,22 +83,22 @@ class TestYoutubeLists(unittest.TestCase):
          dl = FakeYDL()
          ie = YoutubeChannelIE(dl)
          #test paginated channel
-        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
+        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
          self.assertTrue(len(result['entries']) > 90)
          #test autogenerated channel
-        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
+        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
          self.assertTrue(len(result['entries']) >= 18)
  
      def test_youtube_user(self):
          dl = FakeYDL()
          ie = YoutubeUserIE(dl)
-        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
+        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
          self.assertTrue(len(result['entries']) >= 320)
  
      def test_youtube_safe_search(self):
          dl = FakeYDL()
          ie = YoutubePlaylistIE(dl)
-        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
+        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
          self.assertEqual(len(result['entries']), 2)
  
      def test_youtube_show(self):
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py

index 5e1ff5eb0ede5bcb020cd027ca00d5b4159f9812..056700614b43fa0a3dbceeb82ef991e34fdb53f9 100644 (file)
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -6,9 +6,6 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-from test.helper import global_setup
-global_setup()
-
  
  import io
  import re
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py

index 84a539b825051e268cacd828156b70eeee4a0fab..c6276d1942105bc9a856dd98c0ca6417b827a7da 100644 (file)
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -1,4 +1,3 @@
-import math
  import os
  import re
  import subprocess
@@ -11,6 +10,7 @@ from .utils import (
      ContentTooShortError,
      determine_ext,
      encodeFilename,
+    format_bytes,
      sanitize_open,
      timeconvert,
  )
@@ -53,20 +53,6 @@ class FileDownloader(object):
          self._progress_hooks = []
          self.params = params
  
-    @staticmethod
-    def format_bytes(bytes):
-        if bytes is None:
-            return 'N/A'
-        if type(bytes) is str:
-            bytes = float(bytes)
-        if bytes == 0.0:
-            exponent = 0
-        else:
-            exponent = int(math.log(bytes, 1024.0))
-        suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
-        converted = float(bytes) / float(1024 ** exponent)
-        return '%.2f%s' % (converted, suffix)
-
      @staticmethod
      def format_seconds(seconds):
          (mins, secs) = divmod(seconds, 60)
@@ -117,7 +103,7 @@ class FileDownloader(object):
      def format_speed(speed):
          if speed is None:
              return '%10s' % '---b/s'
-        return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
+        return '%10s' % ('%s/s' % format_bytes(speed))
  
      @staticmethod
      def best_block_size(elapsed_time, bytes):
@@ -581,7 +567,7 @@ class FileDownloader(object):
                  self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                  return False
  
-        data_len_str = self.format_bytes(data_len)
+        data_len_str = format_bytes(data_len)
          byte_counter = 0 + resume_len
          block_size = self.params.get('buffersize', 1024)
          start = time.time()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 20eed96caf6dbb824973a1ba7566d5131a5f4334..30ba94666a642c45bfc03af75eb09019c04ae9ad 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -5,9 +5,12 @@ from __future__ import absolute_import
  
  import errno
  import io
+import json
  import os
+import platform
  import re
  import shutil
+import subprocess
  import socket
  import sys
  import time
@@ -17,6 +20,7 @@ if os.name == 'nt':
      import ctypes
  
  from .utils import (
+    compat_cookiejar,
      compat_http_client,
      compat_print,
      compat_str,
@@ -29,9 +33,12 @@ from .utils import (
      DownloadError,
      encodeFilename,
      ExtractorError,
+    format_bytes,
      locked_file,
+    make_HTTPS_handler,
      MaxDownloadsReached,
      PostProcessingError,
+    platform_name,
      preferredencoding,
      SameFileError,
      sanitize_filename,
@@ -40,9 +47,11 @@ from .utils import (
      UnavailableVideoError,
      write_json_file,
      write_string,
+    YoutubeDLHandler,
  )
  from .extractor import get_info_extractor, gen_extractors
  from .FileDownloader import FileDownloader
+from .version import __version__
  
  
  class YoutubeDL(object):
@@ -84,6 +93,7 @@ class YoutubeDL(object):
      forcethumbnail:    Force printing thumbnail URL.
      forcedescription:  Force printing description.
      forcefilename:     Force printing final filename.
+    forcejson:         Force printing info_dict as JSON.
      simulate:          Do not download the video files.
      format:            Video format code.
      format_limit:      Highest quality format to try.
@@ -95,6 +105,7 @@ class YoutubeDL(object):
      playlistend:       Playlist item to end at.
      matchtitle:        Download only matching titles.
      rejecttitle:       Reject downloads for matching titles.
+    logger:            Log messages to a logging.Logger instance.
      logtostderr:       Log messages to stderr instead of stdout.
      writedescription:  Write the video description to a .description file
      writeinfojson:     Write the video description to a .info.json file
@@ -118,6 +129,9 @@ class YoutubeDL(object):
      downloadarchive:   File name of a file where all downloads are recorded.
                         Videos already present in the file are not downloaded
                         again.
+    cookiefile:        File name where cookies should be read from and dumped to.
+    nocheckcertificate:Do not verify SSL certificates
+    proxy:             URL of the proxy server to use
  
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
@@ -158,6 +172,8 @@ class YoutubeDL(object):
          if '%(stitle)s' in self.params['outtmpl']:
              self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
  
+        self._setup_opener()
+
      def add_info_extractor(self, ie):
          """Add an InfoExtractor object to the end of the list."""
          self._ies.append(ie)
@@ -190,7 +206,9 @@ class YoutubeDL(object):
  
      def to_screen(self, message, skip_eol=False):
          """Print message to stdout if not in quiet mode."""
-        if not self.params.get('quiet', False):
+        if self.params.get('logger'):
+            self.params['logger'].debug(message)
+        elif not self.params.get('quiet', False):
              terminator = [u'\n', u''][skip_eol]
              output = message + terminator
              write_string(output, self._screen_file)
@@ -198,10 +216,13 @@ class YoutubeDL(object):
      def to_stderr(self, message):
          """Print message to stderr."""
          assert type(message) == type(u'')
-        output = message + u'\n'
-        if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
-            output = output.encode(preferredencoding())
-        sys.stderr.write(output)
+        if self.params.get('logger'):
+            self.params['logger'].error(message)
+        else:
+            output = message + u'\n'
+            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+                output = output.encode(preferredencoding())
+            sys.stderr.write(output)
  
      def to_console_title(self, message):
          if not self.params.get('consoletitle', False):
@@ -217,13 +238,15 @@ class YoutubeDL(object):
          if not self.params.get('consoletitle', False):
              return
          if 'TERM' in os.environ:
-            write_string(u'\033[22t', self._screen_file)
+            # Save the title on stack
+            write_string(u'\033[22;0t', self._screen_file)
  
      def restore_console_title(self):
          if not self.params.get('consoletitle', False):
              return
          if 'TERM' in os.environ:
-            write_string(u'\033[23t', self._screen_file)
+            # Restore the title from stack
+            write_string(u'\033[23;0t', self._screen_file)
  
      def __enter__(self):
          self.save_console_title()
@@ -231,6 +254,9 @@ class YoutubeDL(object):
  
      def __exit__(self, *args):
          self.restore_console_title()
+    
+        if self.params.get('cookiefile') is not None:
+            self.cookiejar.save()
  
      def fixed_template(self):
          """Checks if the output template is fixed."""
@@ -351,15 +377,17 @@ class YoutubeDL(object):
      def _match_entry(self, info_dict):
          """ Returns None iff the file should be downloaded """
  
-        title = info_dict['title']
-        matchtitle = self.params.get('matchtitle', False)
-        if matchtitle:
-            if not re.search(matchtitle, title, re.IGNORECASE):
-                return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
-        rejecttitle = self.params.get('rejecttitle', False)
-        if rejecttitle:
-            if re.search(rejecttitle, title, re.IGNORECASE):
-                return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+        if 'title' in info_dict:
+            # This can happen when we're just evaluating the playlist
+            title = info_dict['title']
+            matchtitle = self.params.get('matchtitle', False)
+            if matchtitle:
+                if not re.search(matchtitle, title, re.IGNORECASE):
+                    return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+            rejecttitle = self.params.get('rejecttitle', False)
+            if rejecttitle:
+                if re.search(rejecttitle, title, re.IGNORECASE):
+                    return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
          date = info_dict.get('upload_date', None)
          if date is not None:
              dateRange = self.params.get('daterange', DateRange())
@@ -370,8 +398,8 @@ class YoutubeDL(object):
              if age_limit < info_dict.get('age_limit', 0):
                  return u'Skipping "' + title + '" because it is age restricted'
          if self.in_download_archive(info_dict):
-            return (u'%(title)s has already been recorded in archive'
-                    % info_dict)
+            return (u'%s has already been recorded in archive'
+                    % info_dict.get('title', info_dict.get('id', u'video')))
          return None
  
      @staticmethod
@@ -450,7 +478,7 @@ class YoutubeDL(object):
                                       ie_key=ie_result.get('ie_key'),
                                       extra_info=extra_info)
          elif result_type == 'playlist':
-            self.add_extra_info(ie_result, extra_info)
+
              # We process each entry in the playlist
              playlist = ie_result.get('title', None) or ie_result.get('id', None)
              self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -480,6 +508,12 @@ class YoutubeDL(object):
                      'webpage_url': ie_result['webpage_url'],
                      'extractor_key': ie_result['extractor_key'],
                  }
+
+                reason = self._match_entry(entry)
+                if reason is not None:
+                    self.to_screen(u'[download] ' + reason)
+                    continue
+
                  entry_result = self.process_ie_result(entry,
                                                        download=download,
                                                        extra_info=extra)
@@ -635,7 +669,7 @@ class YoutubeDL(object):
  
          # Forced printings
          if self.params.get('forcetitle', False):
-            compat_print(info_dict['title'])
+            compat_print(info_dict['fulltitle'])
          if self.params.get('forceid', False):
              compat_print(info_dict['id'])
          if self.params.get('forceurl', False):
@@ -649,6 +683,8 @@ class YoutubeDL(object):
              compat_print(filename)
          if self.params.get('forceformat', False):
              compat_print(info_dict['format'])
+        if self.params.get('forcejson', False):
+            compat_print(json.dumps(info_dict))
  
          # Do nothing else if in simulate mode
          if self.params.get('simulate', False):
@@ -711,7 +747,7 @@ class YoutubeDL(object):
                      return
  
          if self.params.get('writeinfojson', False):
-            infofn = filename + u'.info.json'
+            infofn = os.path.splitext(filename)[0] + u'.info.json'
              self.report_writeinfojson(infofn)
              try:
                  json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
@@ -768,7 +804,7 @@ class YoutubeDL(object):
          for url in url_list:
              try:
                  #It also downloads the videos
-                videos = self.extract_info(url)
+                self.extract_info(url)
              except UnavailableVideoError:
                  self.report_error(u'unable to download video')
              except MaxDownloadsReached:
@@ -804,7 +840,16 @@ class YoutubeDL(object):
          fn = self.params.get('download_archive')
          if fn is None:
              return False
-        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        extractor = info_dict.get('extractor_id')
+        if extractor is None:
+            if 'id' in info_dict:
+                extractor = info_dict.get('ie_key')  # key in a playlist
+        if extractor is None:
+            return False  # Incomplete video information
+        # Future-proof against any change in case
+        # and backwards compatibility with prior versions
+        extractor = extractor.lower()
+        vid_id = extractor + u' ' + info_dict['id']
          try:
              with locked_file(fn, 'r', encoding='utf-8') as archive_file:
                  for line in archive_file:
@@ -838,9 +883,9 @@ class YoutubeDL(object):
  
      def list_formats(self, info_dict):
          def format_note(fdict):
-            if fdict.get('format_note') is not None:
-                return fdict['format_note']
              res = u''
+            if fdict.get('format_note') is not None:
+                res += fdict['format_note'] + u' '
              if fdict.get('vcodec') is not None:
                  res += u'%-5s' % fdict['vcodec']
              elif fdict.get('vbr') is not None:
@@ -857,25 +902,100 @@ class YoutubeDL(object):
                  res += 'audio'
              if fdict.get('abr') is not None:
                  res += u'@%3dk' % fdict['abr']
+            if fdict.get('filesize') is not None:
+                if res:
+                    res += u', '
+                res += format_bytes(fdict['filesize'])
              return res
  
-        def line(format):
-            return (u'%-20s%-10s%-12s%s' % (
+        def line(format, idlen=20):
+            return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
                  format['format_id'],
                  format['ext'],
                  self.format_resolution(format),
                  format_note(format),
-                )
-            )
+            ))
  
          formats = info_dict.get('formats', [info_dict])
-        formats_s = list(map(line, formats))
+        idlen = max(len(u'format code'),
+                    max(len(f['format_id']) for f in formats))
+        formats_s = [line(f, idlen) for f in formats]
          if len(formats) > 1:
              formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
              formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
  
          header_line = line({
              'format_id': u'format code', 'ext': u'extension',
-            '_resolution': u'resolution', 'format_note': u'note'})
+            '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
          self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
                         (info_dict['id'], header_line, u"\n".join(formats_s)))
+
+    def urlopen(self, req):
+        """ Start an HTTP download """
+        return self._opener.open(req)
+
+    def print_debug_header(self):
+        if not self.params.get('verbose'):
+            return
+        write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
+        try:
+            sp = subprocess.Popen(
+                ['git', 'rev-parse', '--short', 'HEAD'],
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                cwd=os.path.dirname(os.path.abspath(__file__)))
+            out, err = sp.communicate()
+            out = out.decode().strip()
+            if re.match('[0-9a-f]+', out):
+                write_string(u'[debug] Git HEAD: ' + out + u'\n')
+        except:
+            try:
+                sys.exc_clear()
+            except:
+                pass
+        write_string(u'[debug] Python version %s - %s' %
+                     (platform.python_version(), platform_name()) + u'\n')
+
+        proxy_map = {}
+        for handler in self._opener.handlers:
+            if hasattr(handler, 'proxies'):
+                proxy_map.update(handler.proxies)
+        write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
+
+    def _setup_opener(self, timeout=300):
+        opts_cookiefile = self.params.get('cookiefile')
+        opts_proxy = self.params.get('proxy')
+
+        if opts_cookiefile is None:
+            self.cookiejar = compat_cookiejar.CookieJar()
+        else:
+            self.cookiejar = compat_cookiejar.MozillaCookieJar(
+                opts_cookiefile)
+            if os.access(opts_cookiefile, os.R_OK):
+                self.cookiejar.load()
+
+        cookie_processor = compat_urllib_request.HTTPCookieProcessor(
+            self.cookiejar)
+        if opts_proxy is not None:
+            if opts_proxy == '':
+                proxies = {}
+            else:
+                proxies = {'http': opts_proxy, 'https': opts_proxy}
+        else:
+            proxies = compat_urllib_request.getproxies()
+            # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+            if 'http' in proxies and 'https' not in proxies:
+                proxies['https'] = proxies['http']
+        proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+        https_handler = make_HTTPS_handler(
+            self.params.get('nocheckcertificate', False))
+        opener = compat_urllib_request.build_opener(
+            https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+        # Delete the default user-agent header, which would otherwise apply in
+        # cases where our custom HTTP handler doesn't come into play
+        # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+        opener.addheaders = []
+        self._opener = opener
+
+        # TODO remove this global modification
+        compat_urllib_request.install_opener(opener)
+        socket.setdefaulttimeout(timeout)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index af4c9c5c46d094866d3e6f85a27202d363a9baf2..1f15c7eaa03acc63a5d3cbf1e244b292a053344e 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -34,50 +34,42 @@ __authors__  = (
      'Andras Elso',
      'Jelle van der Waa',
      'Marcin Cieślak',
+    'Anton Larionov',
+    'Takuya Tsuchida',
  )
  
  __license__ = 'Public Domain'
  
  import codecs
-import collections
  import getpass
  import optparse
  import os
  import random
  import re
  import shlex
-import socket
  import subprocess
  import sys
-import traceback
-import platform
  
  
  from .utils import (
-    compat_cookiejar,
      compat_print,
-    compat_str,
-    compat_urllib_request,
      DateRange,
      decodeOption,
      determine_ext,
      DownloadError,
      get_cachedir,
-    make_HTTPS_handler,
      MaxDownloadsReached,
-    platform_name,
      preferredencoding,
      SameFileError,
      std_headers,
      write_string,
-    YoutubeDLHandler,
  )
  from .update import update_self
-from .version import __version__
  from .FileDownloader import (
      FileDownloader,
  )
  from .extractor import gen_extractors
+from .version import __version__
  from .YoutubeDL import YoutubeDL
  from .PostProcessor import (
      FFmpegMetadataPP,
@@ -306,6 +298,9 @@ def parseOpts(overrideArguments=None):
      verbosity.add_option('--get-format',
              action='store_true', dest='getformat',
              help='simulate, quiet but print output format', default=False)
+    verbosity.add_option('-j', '--dump-json',
+            action='store_true', dest='dumpjson',
+            help='simulate, quiet but print JSON information', default=False)
      verbosity.add_option('--newline',
              action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
      verbosity.add_option('--no-progress',
@@ -447,19 +442,6 @@ def _real_main(argv=None):
  
      parser, opts, args = parseOpts(argv)
  
-    # Open appropriate CookieJar
-    if opts.cookiefile is None:
-        jar = compat_cookiejar.CookieJar()
-    else:
-        try:
-            jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
-            if os.access(opts.cookiefile, os.R_OK):
-                jar.load()
-        except (IOError, OSError) as err:
-            if opts.verbose:
-                traceback.print_exc()
-            write_string(u'ERROR: unable to open cookie file\n')
-            sys.exit(101)
      # Set user agent
      if opts.user_agent is not None:
          std_headers['User-Agent'] = opts.user_agent
@@ -491,8 +473,6 @@ def _real_main(argv=None):
      all_urls = batchurls + args
      all_urls = [url.strip() for url in all_urls]
  
-    opener = _setup_opener(jar=jar, opts=opts)
-
      extractors = gen_extractors()
  
      if opts.list_extractors:
@@ -547,7 +527,7 @@ def _real_main(argv=None):
      if opts.retries is not None:
          try:
              opts.retries = int(opts.retries)
-        except (TypeError, ValueError) as err:
+        except (TypeError, ValueError):
              parser.error(u'invalid retry count specified')
      if opts.buffersize is not None:
          numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
@@ -558,13 +538,13 @@ def _real_main(argv=None):
          opts.playliststart = int(opts.playliststart)
          if opts.playliststart <= 0:
              raise ValueError(u'Playlist start must be positive')
-    except (TypeError, ValueError) as err:
+    except (TypeError, ValueError):
          parser.error(u'invalid playlist start number specified')
      try:
          opts.playlistend = int(opts.playlistend)
          if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
              raise ValueError(u'Playlist end must be greater than playlist start')
-    except (TypeError, ValueError) as err:
+    except (TypeError, ValueError):
          parser.error(u'invalid playlist end number specified')
      if opts.extractaudio:
          if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
@@ -608,7 +588,7 @@ def _real_main(argv=None):
          'username': opts.username,
          'password': opts.password,
          'videopassword': opts.videopassword,
-        'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
+        'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
          'forceurl': opts.geturl,
          'forcetitle': opts.gettitle,
          'forceid': opts.getid,
@@ -616,8 +596,9 @@ def _real_main(argv=None):
          'forcedescription': opts.getdescription,
          'forcefilename': opts.getfilename,
          'forceformat': opts.getformat,
+        'forcejson': opts.dumpjson,
          'simulate': opts.simulate,
-        'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
+        'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
          'format': opts.format,
          'format_limit': opts.format_limit,
          'listformats': opts.listformats,
@@ -666,34 +647,12 @@ def _real_main(argv=None):
          'youtube_print_sig_code': opts.youtube_print_sig_code,
          'age_limit': opts.age_limit,
          'download_archive': opts.download_archive,
+        'cookiefile': opts.cookiefile,
+        'nocheckcertificate': opts.no_check_certificate,
      }
  
      with YoutubeDL(ydl_opts) as ydl:
-        if opts.verbose:
-            write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
-            try:
-                sp = subprocess.Popen(
-                    ['git', 'rev-parse', '--short', 'HEAD'],
-                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                    cwd=os.path.dirname(os.path.abspath(__file__)))
-                out, err = sp.communicate()
-                out = out.decode().strip()
-                if re.match('[0-9a-f]+', out):
-                    write_string(u'[debug] Git HEAD: ' + out + u'\n')
-            except:
-                try:
-                    sys.exc_clear()
-                except:
-                    pass
-            write_string(u'[debug] Python version %s - %s' %
-                         (platform.python_version(), platform_name()) + u'\n')
-
-            proxy_map = {}
-            for handler in opener.handlers:
-                if hasattr(handler, 'proxies'):
-                    proxy_map.update(handler.proxies)
-            write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
-
+        ydl.print_debug_header()
          ydl.add_default_info_extractors()
  
          # PostProcessors
@@ -724,46 +683,9 @@ def _real_main(argv=None):
              ydl.to_screen(u'--max-download limit reached, aborting.')
              retcode = 101
  
-    # Dump cookie jar if requested
-    if opts.cookiefile is not None:
-        try:
-            jar.save()
-        except (IOError, OSError):
-            sys.exit(u'ERROR: unable to save cookie jar')
-
      sys.exit(retcode)
  
  
-def _setup_opener(jar=None, opts=None, timeout=300):
-    if opts is None:
-        FakeOptions = collections.namedtuple(
-            'FakeOptions', ['proxy', 'no_check_certificate'])
-        opts = FakeOptions(proxy=None, no_check_certificate=False)
-
-    cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-    if opts.proxy is not None:
-        if opts.proxy == '':
-            proxies = {}
-        else:
-            proxies = {'http': opts.proxy, 'https': opts.proxy}
-    else:
-        proxies = compat_urllib_request.getproxies()
-        # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
-        if 'http' in proxies and 'https' not in proxies:
-            proxies['https'] = proxies['http']
-    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
-    https_handler = make_HTTPS_handler(opts)
-    opener = compat_urllib_request.build_opener(
-        https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
-    # Delete the default user-agent header, which would otherwise apply in
-    # cases where our custom HTTP handler doesn't come into play
-    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
-    opener.addheaders = []
-    compat_urllib_request.install_opener(opener)
-    socket.setdefaulttimeout(timeout)
-    return opener
-
-
  def main(argv=None):
      try:
          _real_main(argv)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 2d1e3cdfd9536fcf30175c9456cd107db6a61d9d..0b4d086b77314d502db0b08c3d0af244803e8e2e 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -1,5 +1,6 @@
  from .appletrailers import AppleTrailersIE
  from .addanime import AddAnimeIE
+from .anitube import AnitubeIE
  from .archiveorg import ArchiveOrgIE
  from .ard import ARDIE
  from .arte import (
@@ -10,7 +11,7 @@ from .arte import (
  )
  from .auengine import AUEngineIE
  from .bambuser import BambuserIE, BambuserChannelIE
-from .bandcamp import BandcampIE
+from .bandcamp import BandcampIE, BandcampAlbumIE
  from .bliptv import BlipTVIE, BlipTVUserIE
  from .bloomberg import BloombergIE
  from .breakcom import BreakIE
@@ -19,12 +20,14 @@ from .c56 import C56IE
  from .canalplus import CanalplusIE
  from .canalc2 import Canalc2IE
  from .cinemassacre import CinemassacreIE
+from .clipfish import ClipfishIE
  from .cnn import CNNIE
  from .collegehumor import CollegeHumorIE
-from .comedycentral import ComedyCentralIE
+from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
  from .condenast import CondeNastIE
  from .criterion import CriterionIE
  from .cspan import CSpanIE
+from .d8 import D8IE
  from .dailymotion import (
      DailymotionIE,
      DailymotionPlaylistIE,
@@ -96,6 +99,7 @@ from .nba import NBAIE
  from .nbc import NBCNewsIE
  from .newgrounds import NewgroundsIE
  from .nhl import NHLIE, NHLVideocenterIE
+from .niconico import NiconicoIE
  from .nowvideo import NowVideoIE
  from .ooyala import OoyalaIE
  from .orf import ORFIE
@@ -126,12 +130,14 @@ from .spiegel import SpiegelIE
  from .stanfordoc import StanfordOpenClassroomIE
  from .statigram import StatigramIE
  from .steam import SteamIE
+from .streamcloud import StreamcloudIE
  from .sztvhu import SztvHuIE
  from .teamcoco import TeamcocoIE
  from .techtalks import TechTalksIE
  from .ted import TEDIE
  from .tf1 import TF1IE
  from .thisav import ThisAVIE
+from .toutv import TouTvIE
  from .traileraddict import TrailerAddictIE
  from .trilulilu import TriluliluIE
  from .tube8 import Tube8IE
@@ -152,6 +158,7 @@ from .videofyme import VideofyMeIE
  from .videopremium import VideoPremiumIE
  from .vimeo import VimeoIE, VimeoChannelIE
  from .vine import VineIE
+from .viki import VikiIE
  from .vk import VKIE
  from .wat import WatIE
  from .websurg import WeBSurgIE
@@ -179,6 +186,7 @@ from .youtube import (
      YoutubeTruncatedURLIE,
      YoutubeWatchLaterIE,
      YoutubeFavouritesIE,
+    YoutubeHistoryIE,
  )
  from .zdf import ZDFIE
  
diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py

new file mode 100644 (file)

index 0000000..691d5a8
--- /dev/null
+++ b/youtube_dl/extractor/anitube.py
@@ -0,0 +1,55 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+
+
+class AnitubeIE(InfoExtractor):
+    IE_NAME = u'anitube.se'
+    _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.anitube.se/video/36621',
+        u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
+        u'file': u'36621.mp4',
+        u'info_dict': {
+            u'id': u'36621',
+            u'ext': u'mp4',
+            u'title': u'Recorder to Randoseru 01',
+        },
+        u'skip': u'Blocked in the US',
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
+                                      webpage, u'key')
+
+        webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
+                                                key)
+        config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
+
+        video_title = config_xml.find('title').text
+
+        formats = []
+        video_url = config_xml.find('file')
+        if video_url is not None:
+            formats.append({
+                'format_id': 'sd',
+                'url': video_url.text,
+            })
+        video_url = config_xml.find('filehd')
+        if video_url is not None:
+            formats.append({
+                'format_id': 'hd',
+                'url': video_url.text,
+            })
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'formats': formats
+        }
diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py

index 0febbff4f6c42afd10f8dbc13ea9df883edae4c6..95c038003b431dc48ac3bb89dcc03f8aa39ea07f 100644 (file)
--- a/youtube_dl/extractor/auengine.py
+++ b/youtube_dl/extractor/auengine.py
@@ -1,10 +1,10 @@
-import os.path
  import re
  
  from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse,
-    compat_urllib_parse_urlparse,
+    determine_ext,
+    ExtractorError,
  )
  
  class AUEngineIE(InfoExtractor):
@@ -25,22 +25,25 @@ class AUEngineIE(InfoExtractor):
          title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
                  webpage, u'title')
          title = title.strip()
-        links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage)
-        links = [compat_urllib_parse.unquote(l) for l in links]
+        links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
+        links = map(compat_urllib_parse.unquote, links)
+
+        thumbnail = None
+        video_url = None
          for link in links:
-            root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path)
-            if pathext == '.png':
+            if link.endswith('.png'):
                  thumbnail = link
-            elif pathext == '.mp4':
-                url = link
-                ext = pathext
+            elif '/videos/' in link:
+                video_url = link
+        if not video_url:
+            raise ExtractorError(u'Could not find video URL')
+        ext = u'.' + determine_ext(video_url)
          if ext == title[-len(ext):]:
              title = title[:-len(ext)]
-        ext = ext[1:]
-        return [{
+
+        return {
              'id':        video_id,
-            'url':       url,
-            'ext':       ext,
+            'url':       video_url,
              'title':     title,
              'thumbnail': thumbnail,
-        }]
+        }
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py

index 129a20f4497b4cc6fc9f031e8e48dd8eb8980f66..3a32c14c598dd2da14841fe68c1cb59582f30799 100644 (file)
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -3,13 +3,16 @@ import re
  
  from .common import InfoExtractor
  from ..utils import (
+    compat_str,
+    compat_urlparse,
      ExtractorError,
  )
  
  
  class BandcampIE(InfoExtractor):
+    IE_NAME = u'Bandcamp'
      _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
-    _TEST = {
+    _TESTS = [{
          u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
          u'file': u'1812978515.mp3',
          u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
@@ -17,7 +20,7 @@ class BandcampIE(InfoExtractor):
              u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
          },
          u'skip': u'There is a limit of 200 free downloads / month for the test song'
-    }
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -26,6 +29,23 @@ class BandcampIE(InfoExtractor):
          # We get the link to the free download page
          m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
          if m_download is None:
+            m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
+        if m_trackinfo:
+            json_code = m_trackinfo.group(1)
+            data = json.loads(json_code)
+
+            for d in data:
+                formats = [{
+                    'format_id': 'format_id',
+                    'url': format_url,
+                    'ext': format_id.partition('-')[0]
+                } for format_id, format_url in sorted(d['file'].items())]
+                return {
+                    'id': compat_str(d['id']),
+                    'title': d['title'],
+                    'formats': formats,
+                }
+        else:
              raise ExtractorError(u'No free songs found')
  
          download_link = m_download.group(1)
@@ -61,3 +81,49 @@ class BandcampIE(InfoExtractor):
                        }
  
          return [track_info]
+
+
+class BandcampAlbumIE(InfoExtractor):
+    IE_NAME = u'Bandcamp:album'
+    _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
+
+    _TEST = {
+        u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
+        u'playlist': [
+            {
+                u'file': u'1353101989.mp3',
+                u'md5': u'39bc1eded3476e927c724321ddf116cf',
+                u'info_dict': {
+                    u'title': u'Intro',
+                }
+            },
+            {
+                u'file': u'38097443.mp3',
+                u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
+                u'info_dict': {
+                    u'title': u'Kero One - Keep It Alive (Blazo remix)',
+                }
+            },
+        ],
+        u'params': {
+            u'playlistend': 2
+        },
+        u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
+        if not tracks_paths:
+            raise ExtractorError(u'The page doesn\'t contain any track')
+        entries = [
+            self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
+            for t_path in tracks_paths]
+        title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
+        return {
+            '_type': 'playlist',
+            'title': title,
+            'entries': entries,
+        }
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index d8c35465a34fa4c4d4ca822d499892504a51ce62..66fe0ac9ade6fad80d77f0429c136c2d022af16d 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -75,16 +75,22 @@ class BrightcoveIE(InfoExtractor):
          params = {'flashID': object_doc.attrib['id'],
                    'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
                    }
-        playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
+        def find_param(name):
+            node = find_xpath_attr(object_doc, './param', 'name', name)
+            if node is not None:
+                return node.attrib['value']
+            return None
+        playerKey = find_param('playerKey')
          # Not all pages define this value
          if playerKey is not None:
-            params['playerKey'] = playerKey.attrib['value']
-        videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
+            params['playerKey'] = playerKey
+        # The three fields hold the id of the video
+        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
          if videoPlayer is not None:
-            params['@videoPlayer'] = videoPlayer.attrib['value']
-        linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
+            params['@videoPlayer'] = videoPlayer
+        linkBase = find_param('linkBaseURL')
          if linkBase is not None:
-            params['linkBaseURL'] = linkBase.attrib['value']
+            params['linkBaseURL'] = linkBase
          data = compat_urllib_parse.urlencode(params)
          return cls._FEDERATED_URL_TEMPLATE % data
  
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py

index 1db9b24cf204cc26d68b1a1bdaff93577c3ae903..bfa2a8b4063163729b3d8c11d63d7567a81ab59e 100644 (file)
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -5,6 +5,7 @@ import xml.etree.ElementTree
  from .common import InfoExtractor
  from ..utils import unified_strdate
  
+
  class CanalplusIE(InfoExtractor):
      _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
      _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
@@ -25,7 +26,7 @@ class CanalplusIE(InfoExtractor):
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = mobj.groupdict().get('id')
          if video_id is None:
              webpage = self._download_webpage(url, mobj.group('path'))
              video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py

new file mode 100644 (file)

index 0000000..95449da
--- /dev/null
+++ b/youtube_dl/extractor/clipfish.py
@@ -0,0 +1,53 @@
+import re
+import time
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+
+
+class ClipfishIE(InfoExtractor):
+    IE_NAME = u'clipfish'
+
+    _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
+    _TEST = {
+        u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
+        u'file': u'4028320.f4v',
+        u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
+        u'info_dict': {
+            u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
+            u'duration': 399,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
+                    (video_id, int(time.time())))
+        info_xml = self._download_webpage(
+            info_url, video_id, note=u'Downloading info page')
+        doc = xml.etree.ElementTree.fromstring(info_xml)
+        title = doc.find('title').text
+        video_url = doc.find('filename').text
+        thumbnail = doc.find('imageurl').text
+        duration_str = doc.find('duration').text
+        m = re.match(
+            r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
+            duration_str)
+        if m:
+            duration = (
+                (int(m.group('hours')) * 60 * 60) +
+                (int(m.group('minutes')) * 60) +
+                (int(m.group('seconds')))
+            )
+        else:
+            duration = None
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py

index 8d4c93d6da91f4470c9809bf32dd0fbbe886c92b..b27c1dfc52401f3c148d48d2b2897d2b06db3834 100644 (file)
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -1,5 +1,4 @@
  import re
-import xml.etree.ElementTree
  
  from .common import InfoExtractor
  from ..utils import (
@@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
  
          self.report_extraction(video_id)
          xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
-        metaXml = self._download_webpage(xmlUrl, video_id,
+        mdoc = self._download_xml(xmlUrl, video_id,
                                           u'Downloading info XML',
                                           u'Unable to download video info XML')
  
-        mdoc = xml.etree.ElementTree.fromstring(metaXml)
          try:
              videoNode = mdoc.findall('./video')[0]
              youtubeIdNode = videoNode.find('./youtubeID')
@@ -65,16 +63,13 @@ class CollegeHumorIE(InfoExtractor):
  
          if next_url.endswith(u'manifest.f4m'):
              manifest_url = next_url + '?hdcore=2.10.3'
-            manifestXml = self._download_webpage(manifest_url, video_id,
+            adoc = self._download_xml(manifest_url, video_id,
                                           u'Downloading XML manifest',
                                           u'Unable to download video info XML')
  
-            adoc = xml.etree.ElementTree.fromstring(manifestXml)
              try:
-                media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
-                node_id = media_node.attrib['url']
                  video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
-            except IndexError as err:
+            except IndexError:
                  raise ExtractorError(u'Invalid manifest file')
              url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
              info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index 69b2beecebac319ef92e8043ab75ad71fad46a25..725849d2e98c89c2548778e6c37f73ce395d1c4a 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -2,6 +2,7 @@ import re
  import xml.etree.ElementTree
  
  from .common import InfoExtractor
+from .mtv import MTVIE, _media_xml_tag
  from ..utils import (
      compat_str,
      compat_urllib_parse,
@@ -11,7 +12,37 @@ from ..utils import (
  )
  
  
-class ComedyCentralIE(InfoExtractor):
+class ComedyCentralIE(MTVIE):
+    _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
+    _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
+
+    _TEST = {
+        u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
+        u'md5': u'4167875aae411f903b751a21f357f1ee',
+        u'info_dict': {
+            u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+            u'ext': u'mp4',
+            u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
+            u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
+        },
+    }
+    # Overwrite MTVIE properties we don't want
+    _TESTS = []
+
+    def _get_thumbnail_url(self, uri, itemdoc):
+        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+        return itemdoc.find(search_path).attrib['url']
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
+                                  webpage, u'mgid')
+        return self._get_videos_info(mgid)
+
+
+class ComedyCentralShowsIE(InfoExtractor):
      IE_DESC = u'The Daily Show / Colbert Report'
      # urls can be abbreviations like :thedailyshow or :colbert
      # urls for episodes like:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index f787d0a3c0b3afd47f38ddbb09eab8476126516d..5656445a3360b8e908280e967bb97fe5b22beffc 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -4,11 +4,11 @@ import re
  import socket
  import sys
  import netrc
+import xml.etree.ElementTree
  
  from ..utils import (
      compat_http_client,
      compat_urllib_error,
-    compat_urllib_request,
      compat_str,
  
      clean_html,
@@ -19,6 +19,7 @@ from ..utils import (
      unescapeHTML,
  )
  
+
  class InfoExtractor(object):
      """Information Extractor class.
  
@@ -75,6 +76,7 @@ class InfoExtractor(object):
                      * acodec    Name of the audio codec in use
                      * vbr       Average video bitrate in KBit/s
                      * vcodec    Name of the video codec in use
+                    * filesize  The number of bytes, if known in advance
      webpage_url:    The url to the video webpage, if given to youtube-dl it
                      should allow to get the same result again. (It will be set
                      by YoutubeDL if it's missing)
@@ -156,7 +158,7 @@ class InfoExtractor(object):
          elif note is not False:
              self.to_screen(u'%s: %s' % (video_id, note))
          try:
-            return compat_urllib_request.urlopen(url_or_request)
+            return self._downloader.urlopen(url_or_request)
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              if errnote is None:
                  errnote = u'Unable to download webpage'
@@ -208,6 +210,11 @@ class InfoExtractor(object):
          """ Returns the data of the page as a string """
          return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
  
+    def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
+        """Return the xml as an xml.etree.ElementTree.Element"""
+        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+
      def to_screen(self, msg):
          """Print msg to screen, prefixing it with '[ie_name]'"""
          self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
@@ -229,12 +236,14 @@ class InfoExtractor(object):
          self.to_screen(u'Logging in')
  
      #Methods for following #608
-    def url_result(self, url, ie=None):
+    def url_result(self, url, ie=None, video_id=None):
          """Returns a url that points to a page that should be processed"""
          #TODO: ie should be the class used for getting the info
          video_info = {'_type': 'url',
                        'url': url,
                        'ie_key': ie}
+        if video_id is not None:
+            video_info['id'] = video_id
          return video_info
      def playlist_result(self, entries, playlist_id=None, playlist_title=None):
          """Returns a playlist"""
@@ -350,6 +359,17 @@ class InfoExtractor(object):
          if secure: regexes = self._og_regexes('video:secure_url') + regexes
          return self._html_search_regex(regexes, html, name, **kargs)
  
+    def _html_search_meta(self, name, html, display_name=None):
+        if display_name is None:
+            display_name = name
+        return self._html_search_regex(
+            r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
+                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
+            html, display_name, fatal=False)
+
+    def _dc_search_uploader(self, html):
+        return self._html_search_meta('dc.creator', html, 'uploader')
+
      def _rta_search(self, html):
          # See http://www.rtalabel.org/index.php?content=howtofaq#single
          if re.search(r'(?ix)<meta\s+name="rating"\s+'
@@ -358,6 +378,23 @@ class InfoExtractor(object):
              return 18
          return 0
  
+    def _media_rating_search(self, html):
+        # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
+        rating = self._html_search_meta('rating', html)
+
+        if not rating:
+            return None
+
+        RATING_TABLE = {
+            'safe for kids': 0,
+            'general': 8,
+            '14 years': 14,
+            'mature': 17,
+            'restricted': 19,
+        }
+        return RATING_TABLE.get(rating.lower(), None)
+
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
diff --git a/youtube_dl/extractor/d8.py b/youtube_dl/extractor/d8.py

new file mode 100644 (file)

index 0000000..a56842b
--- /dev/null
+++ b/youtube_dl/extractor/d8.py
@@ -0,0 +1,22 @@
+# encoding: utf-8
+from .canalplus import CanalplusIE
+
+
+class D8IE(CanalplusIE):
+    _VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
+    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
+    IE_NAME = u'd8.tv'
+
+    _TEST = {
+        u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
+        u'file': u'966289.flv',
+        u'info_dict': {
+            u'title': u'Campagne intime - Documentaire exceptionnel',
+            u'description': u'md5:d2643b799fb190846ae09c61e59a859f',
+            u'upload_date': u'20131108',
+        },
+        u'params': {
+            # rtmp
+            u'skip_download': True,
+        },
+    }
diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py

index 2cfbcd363c0db4f2505d8da7120d7c3161a7b0a9..f21ef88530d2f8913b4b35d9c03fc4fc14de7ddc 100644 (file)
--- a/youtube_dl/extractor/eighttracks.py
+++ b/youtube_dl/extractor/eighttracks.py
@@ -1,4 +1,3 @@
-import itertools
  import json
  import random
  import re
diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py

index 3aa2da52c0117bc9926df9c250eeb70da6cc2299..b1242f6bc457a41a9c8413eb851671acd05cc8c0 100644 (file)
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@@ -11,11 +11,11 @@ from ..utils import (
  
  
  class EscapistIE(InfoExtractor):
-    _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
+    _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
      _TEST = {
          u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
          u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
-        u'md5': u'c6793dbda81388f4264c1ba18684a74d',
+        u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
          u'info_dict': {
              u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", 
              u"uploader": u"the-escapist-presents", 
@@ -25,50 +25,60 @@ class EscapistIE(InfoExtractor):
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
          showName = mobj.group('showname')
          videoId = mobj.group('episode')
  
          self.report_extraction(videoId)
          webpage = self._download_webpage(url, videoId)
  
-        videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
+        videoDesc = self._html_search_regex(
+            r'<meta name="description" content="([^"]*)"',
              webpage, u'description', fatal=False)
  
-        playerUrl = self._og_search_video_url(webpage, name='player url')
+        playerUrl = self._og_search_video_url(webpage, name=u'player URL')
  
-        title = self._html_search_regex('<meta name="title" content="([^"]*)"',
-            webpage, u'player url').split(' : ')[-1]
+        title = self._html_search_regex(
+            r'<meta name="title" content="([^"]*)"',
+            webpage, u'title').split(' : ')[-1]
  
-        configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
+        configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
          configUrl = compat_urllib_parse.unquote(configUrl)
  
-        configJSON = self._download_webpage(configUrl, videoId,
-                                            u'Downloading configuration',
-                                            u'unable to download configuration')
-
-        # Technically, it's JavaScript, not JSON
-        configJSON = configJSON.replace("'", '"')
-
+        formats = []
+
+        def _add_format(name, cfgurl):
+            configJSON = self._download_webpage(
+                cfgurl, videoId,
+                u'Downloading ' + name + ' configuration',
+                u'Unable to download ' + name + ' configuration')
+
+            # Technically, it's JavaScript, not JSON
+            configJSON = configJSON.replace("'", '"')
+
+            try:
+                config = json.loads(configJSON)
+            except (ValueError,) as err:
+                raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
+            playlist = config['playlist']
+            formats.append({
+                'url': playlist[1]['url'],
+                'format_id': name,
+            })
+
+        _add_format(u'normal', configUrl)
+        hq_url = (configUrl +
+                  ('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
          try:
-            config = json.loads(configJSON)
-        except (ValueError,) as err:
-            raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
+            _add_format(u'hq', hq_url)
+        except ExtractorError:
+            pass  # That's fine, we'll just use normal quality
  
-        playlist = config['playlist']
-        videoUrl = playlist[1]['url']
-
-        info = {
+        return {
              'id': videoId,
-            'url': videoUrl,
+            'formats': formats,
              'uploader': showName,
-            'upload_date': None,
              'title': title,
-            'ext': 'mp4',
              'thumbnail': self._og_search_thumbnail(webpage),
              'description': videoDesc,
              'player_url': playerUrl,
          }
-
-        return [info]
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py

index f8bdfc2d33c9f00b9f902a4303eb7024f4646312..3b210710e3695ec3aa940b335d9868a281d7740a 100644 (file)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -1,5 +1,4 @@
  import json
-import netrc
  import re
  import socket
  
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py

index 9c89362efafefbb22c4dd5e4ef73950446fe9246..dba1a8dc262979b5afce987211bab2f14e502dba 100644 (file)
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@@ -39,7 +39,6 @@ class FKTVIE(InfoExtractor):
          for i, _ in enumerate(files, 1):
              video_id = '%04d%d' % (episode, i)
              video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
-            video_title = 'Fernsehkritik %d.%d' % (episode, i)
              videos.append({
                  'id': video_id,
                  'url': video_url,
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py

index 098768361ede01d8acc01dc773a31b5b8fc67241..9645b00c3307a42ba48b66af599345ba80349a3d 100644 (file)
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -24,7 +24,7 @@ class GameSpotIE(InfoExtractor):
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        page_id = video_id = mobj.group('page_id')
+        page_id = mobj.group('page_id')
          webpage = self._download_webpage(url, page_id)
          data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
          data_video = json.loads(unescapeHTML(data_video_json))
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index c7552fddb587a60454bec6faa174c36bd4aa9a4a..37671430a99b66dea8339dfd986503f3cb57f59e 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -162,6 +162,16 @@ class GenericIE(InfoExtractor):
              raise ExtractorError(u'Failed to download URL: %s' % url)
  
          self.report_extraction(video_id)
+
+        # it's tempting to parse this further, but you would
+        # have to take into account all the variations like
+        #   Video Title - Site Name
+        #   Site Name | Video Title
+        #   Video Title - Tagline | Site Name
+        # and so on and so forth; it's just not practical
+        video_title = self._html_search_regex(r'<title>(.*)</title>',
+            webpage, u'video title', default=u'video', flags=re.DOTALL)
+
          # Look for BrightCove:
          bc_url = BrightcoveIE._extract_brightcove_url(webpage)
          if bc_url is not None:
@@ -177,17 +187,20 @@ class GenericIE(InfoExtractor):
              return self.url_result(surl, 'Vimeo')
  
          # Look for embedded YouTube player
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?youtube.com/embed/.+?)\1', webpage)
-        if mobj:
-            surl = unescapeHTML(mobj.group(u'url'))
-            return self.url_result(surl, 'Youtube')
+        matches = re.findall(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
+        if matches:
+            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
+                     for tuppl in matches]
+            return self.playlist_result(
+                urlrs, playlist_id=video_id, playlist_title=video_title)
  
          # Look for Bandcamp pages with custom domain
          mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
          if mobj is not None:
              burl = unescapeHTML(mobj.group(1))
-            return self.url_result(burl, 'Bandcamp')
+            # Don't set the extractor because it can be a track url or an album
+            return self.url_result(burl)
  
          # Start with something easy: JW Player in SWFObject
          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
@@ -196,7 +209,7 @@ class GenericIE(InfoExtractor):
              mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
          if mobj is None:
              # Broaden the search a little bit: JWPlayer JS loader
-            mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
+            mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
          if mobj is None:
              # Try to find twitter cards info
              mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@@ -223,27 +236,16 @@ class GenericIE(InfoExtractor):
          video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
  
          # here's a fun little line of code for you:
-        video_extension = os.path.splitext(video_id)[1][1:]
          video_id = os.path.splitext(video_id)[0]
  
-        # it's tempting to parse this further, but you would
-        # have to take into account all the variations like
-        #   Video Title - Site Name
-        #   Site Name | Video Title
-        #   Video Title - Tagline | Site Name
-        # and so on and so forth; it's just not practical
-        video_title = self._html_search_regex(r'<title>(.*)</title>',
-            webpage, u'video title', default=u'video', flags=re.DOTALL)
-
          # video uploader is domain name
          video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
              url, u'video uploader')
  
-        return [{
+        return {
              'id':       video_id,
              'url':      video_url,
              'uploader': video_uploader,
              'upload_date':  None,
              'title':    video_title,
-            'ext':      video_extension,
-        }]
+        }
diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py

index 46954337f25e1cbd7bae89e7da76d4e93ecc8c9e..bafc5826f680353af40b820609a543192ac73d17 100644 (file)
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
      _TEST = {
          u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
          u'file': u'390161.mp4',
-        u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
+        u'md5': u'8b743df908c42f60cf6496586c7f12c3',
          u'info_dict': {
              u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.", 
              u"title": u"How to Tie a Square Knot Properly"
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py

index 6bb54b932298395b8f07554b12ad6091cca140d3..0020c47cfd53a05eb4c719a2d8c203783e97d4e2 100644 (file)
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -22,7 +22,7 @@ class JeuxVideoIE(InfoExtractor):
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        title = re.match(self._VALID_URL, url).group(1)
+        title = mobj.group(1)
          webpage = self._download_webpage(url, title)
          xml_link = self._html_search_regex(
              r'<param name="flashvars" value="config=(.*?)" />',
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py

index 1a3e0ae6b94ea2b59f2065f8641827d29b39fc37..5f548437cd74d879e06ac4be64adf3a6de93ddef 100644 (file)
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -6,9 +6,7 @@ from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse_urlparse,
      compat_urlparse,
-    get_meta_content,
      xpath_with_ns,
-    ExtractorError,
  )
  
  
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py

index a200dcd74a5a7af220cedea02a60c01cfd643e79..e2baf44d7e15032022e6b304ace2bf8ef11a09b2 100644 (file)
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
              'title': info['name'],
              'url': final_song_url,
              'ext': 'mp3',
-            'description': info['description'],
+            'description': info.get('description'),
              'thumbnail': info['pictures'].get('extra_large'),
              'uploader': info['user']['name'],
              'uploader_id': info['user']['username'],
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index 3df7f9b85a31c6836ac9c0e7e9e19c504c1a045d..04afd6c4c86233512d9e42f0493f5d7a0a7b00b0 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -59,7 +59,6 @@ class MTVIE(InfoExtractor):
          if '/error_country_block.swf' in metadataXml:
              raise ExtractorError(u'This video is not available from your country.', expected=True)
          mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
-        renditions = mdoc.findall('.//rendition')
  
          formats = []
          for rendition in mdoc.findall('.//rendition'):
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py

index 224f56ac84b77647c2ff5468b41d229786da632e..458fe40639171943fec8b516a5facba5bcfb3df6 100644 (file)
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -72,7 +72,7 @@ class NHLIE(NHLBaseInfoExtractor):
  
  class NHLVideocenterIE(NHLBaseInfoExtractor):
      IE_NAME = u'nhl.com:videocenter'
-    IE_DESC = u'Download the first 12 videos from a videocenter category'
+    IE_DESC = u'NHL videocenter category'
      _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
  
      @classmethod
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py

new file mode 100644 (file)

index 0000000..729607e
--- /dev/null
+++ b/youtube_dl/extractor/niconico.py
@@ -0,0 +1,131 @@
+# encoding: utf-8
+
+import re
+import socket
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_http_client,
+    compat_urllib_error,
+    compat_urllib_parse,
+    compat_urllib_request,
+    compat_urlparse,
+    compat_str,
+
+    ExtractorError,
+    unified_strdate,
+)
+
+
+class NiconicoIE(InfoExtractor):
+    IE_NAME = u'niconico'
+    IE_DESC = u'ニコニコ動画'
+
+    _TEST = {
+        u'url': u'http://www.nicovideo.jp/watch/sm22312215',
+        u'file': u'sm22312215.mp4',
+        u'md5': u'd1a75c0823e2f629128c43e1212760f9',
+        u'info_dict': {
+            u'title': u'Big Buck Bunny',
+            u'uploader': u'takuya0301',
+            u'uploader_id': u'2698420',
+            u'upload_date': u'20131123',
+            u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+        },
+        u'params': {
+            u'username': u'ydl.niconico@gmail.com',
+            u'password': u'youtube-dl',
+        },
+    }
+
+    _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
+    _NETRC_MACHINE = 'niconico'
+    # If True it will raise an error if no login info is provided
+    _LOGIN_REQUIRED = True
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        # No authentication to be performed
+        if username is None:
+            if self._LOGIN_REQUIRED:
+                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+            return False
+
+        # Log in
+        login_form_strs = {
+            u'mail': username,
+            u'password': password,
+        }
+        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+        # chokes on unicode
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
+        request = compat_urllib_request.Request(
+            u'https://secure.nicovideo.jp/secure/login', login_data)
+        login_results = self._download_webpage(
+            request, u'', note=u'Logging in', errnote=u'Unable to log in')
+        if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
+            self._downloader.report_warning(u'unable to log in: bad username or password')
+            return False
+        return True
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        # Get video webpage. We are not actually interested in it, but need
+        # the cookies in order to be able to download the info webpage
+        self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
+
+        video_info_webpage = self._download_webpage(
+            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
+            note=u'Downloading video info page')
+
+        # Get flv info
+        flv_info_webpage = self._download_webpage(
+            u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+            video_id, u'Downloading flv info')
+        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
+
+        # Start extracting information
+        video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
+        video_title = video_info.find('.//title').text
+        video_extension = video_info.find('.//movie_type').text
+        video_format = video_extension.upper()
+        video_thumbnail = video_info.find('.//thumbnail_url').text
+        video_description = video_info.find('.//description').text
+        video_uploader_id = video_info.find('.//user_id').text
+        video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
+        video_view_count = video_info.find('.//view_counter').text
+        video_webpage_url = video_info.find('.//watch_url').text
+
+        # uploader
+        video_uploader = video_uploader_id
+        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
+        try:
+            user_info_webpage = self._download_webpage(
+                url, video_id, note=u'Downloading user information')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
+        else:
+            user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
+            video_uploader = user_info.find('.//nickname').text
+
+        return {
+            'id':          video_id,
+            'url':         video_real_url,
+            'title':       video_title,
+            'ext':         video_extension,
+            'format':      video_format,
+            'thumbnail':   video_thumbnail,
+            'description': video_description,
+            'uploader':    video_uploader,
+            'upload_date': video_upload_date,
+            'uploader_id': video_uploader_id,
+            'view_count':  video_view_count,
+            'webpage_url': video_webpage_url,
+        }
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

index 75cf4bb9f6789157531377ed38ed100033e2393d..8b3471919565d4c7044d51eb24e8ef01cc8e77fc 100644 (file)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -6,7 +6,6 @@ from ..utils import (
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urllib_parse,
-    unescapeHTML,
  )
  from ..aes import (
      aes_decrypt_text
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index 83e1f055f80efa05d8db33d296fd4554466a24ad..67b2dff9c9ec431bb7ddd501bb804f6cc572cb61 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -59,6 +59,7 @@ class SoundcloudIE(InfoExtractor):
      ]
  
      _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
+    _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
  
      @classmethod
      def suitable(cls, url):
@@ -83,7 +84,6 @@ class SoundcloudIE(InfoExtractor):
              thumbnail = thumbnail.replace('-large', '-t500x500')
          result = {
              'id':       track_id,
-            'url':      info['stream_url'] + '?client_id=' + self._CLIENT_ID,
              'uploader': info['user']['username'],
              'upload_date': unified_strdate(info['created_at']),
              'title':    info['title'],
@@ -92,19 +92,29 @@ class SoundcloudIE(InfoExtractor):
              'thumbnail': thumbnail,
          }
          if info.get('downloadable', False):
+            # We can build a direct link to the song
              result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
-        if not info.get('streamable', False):
-            # We have to get the rtmp url
+        else:
+            # We have to retrieve the url
              stream_json = self._download_webpage(
-                'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID),
+                'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
                  track_id, u'Downloading track url')
-            rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url']
-            # The url doesn't have an rtmp app, we have to extract the playpath
-            url, path = rtmp_url.split('mp3:', 1)
-            result.update({
-                'url': url,
-                'play_path': 'mp3:' + path,
-            })
+            # There should be only one entry in the dictionary
+            key, stream_url = list(json.loads(stream_json).items())[0]
+            if key.startswith(u'http'):
+                result['url'] = stream_url
+            elif key.startswith(u'rtmp'):
+                # The url doesn't have an rtmp app, we have to extract the playpath
+                url, path = stream_url.split('mp3:', 1)
+                result.update({
+                    'url': url,
+                    'play_path': 'mp3:' + path,
+                })
+            else:
+                # We fallback to the stream_url in the original info, this
+                # cannot be always used, sometimes it can give an HTTP 404 error
+                result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
+
          return result
  
      def _real_extract(self, url):
@@ -158,7 +168,6 @@ class SoundcloudSetIE(SoundcloudIE):
          resolv_url = self._resolv_url(url)
          info_json = self._download_webpage(resolv_url, full_title)
  
-        videos = []
          info = json.loads(info_json)
          if 'errors' in info:
              for err in info['errors']:
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py

index 97f9c268a4531114912f9209c8a959d567add062..9e2ad0d9962c375ca27851b3f842de302be28e56 100644 (file)
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -6,7 +6,6 @@ from ..utils import (
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urllib_parse,
-    unescapeHTML,
  )
  from ..aes import (
      aes_decrypt_text
@@ -36,11 +35,12 @@ class SpankwireIE(InfoExtractor):
          webpage = self._download_webpage(req, video_id)
  
          video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
-        video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
-        thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
-        description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False)
-        if len(description) == 0:
-            description = None
+        video_uploader = self._html_search_regex(
+            r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
+        thumbnail = self._html_search_regex(
+            r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
+        description = self._html_search_regex(
+            r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
  
          video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
          if webpage.find('flashvars\.encrypted = "true"') != -1:
diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py

index 6dc2eda6d9740ed4464d30018bb8d23395034eae..19ce585cf1f6dc89569fa56cd6dc30c23dd17e61 100644 (file)
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@@ -2,7 +2,6 @@ import re
  import xml.etree.ElementTree
  
  from .common import InfoExtractor
-from ..utils import determine_ext
  
  
  class SpiegelIE(InfoExtractor):
diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py

new file mode 100644 (file)

index 0000000..9faf3a5
--- /dev/null
+++ b/youtube_dl/extractor/streamcloud.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+
+
+class StreamcloudIE(InfoExtractor):
+    IE_NAME = u'streamcloud.eu'
+    _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
+
+    _TEST = {
+        u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
+        u'file': u'skp9j99s4bpz.mp4',
+        u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
+        u'info_dict': {
+            u'title': u'youtube-dl test video  \'/\\ ä ↭',
+            u'duration': 9,
+        },
+        u'skip': u'Only available from the EU'
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        orig_webpage = self._download_webpage(url, video_id)
+
+        fields = re.findall(r'''(?x)<input\s+
+            type="(?:hidden|submit)"\s+
+            name="([^"]+)"\s+
+            (?:id="[^"]+"\s+)?
+            value="([^"]*)"
+            ''', orig_webpage)
+        post = compat_urllib_parse.urlencode(fields)
+
+        self.to_screen('%s: Waiting for timeout' % video_id)
+        time.sleep(12)
+        headers = {
+            b'Content-Type': b'application/x-www-form-urlencoded',
+        }
+        req = compat_urllib_request.Request(url, post, headers)
+
+        webpage = self._download_webpage(
+            req, video_id, note=u'Downloading video page ...')
+        title = self._html_search_regex(
+            r'<h1[^>]*>([^<]+)<', webpage, u'title')
+        video_url = self._search_regex(
+            r'file:\s*"([^"]+)"', webpage, u'video URL')
+        duration_str = self._search_regex(
+            r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
+        duration = None if duration_str is None else int(duration_str)
+        thumbnail = self._search_regex(
+            r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py

index 81fa35c4bd297f6b6a4b5fa44ed1b98998393ab6..c9359fafb5c5989923c6320e3e684673b80057d6 100644 (file)
--- a/youtube_dl/extractor/sztvhu.py
+++ b/youtube_dl/extractor/sztvhu.py
@@ -15,7 +15,8 @@ class SztvHuIE(InfoExtractor):
          u'info_dict': {
              u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
              u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
-        }
+        },
+        u'skip': u'Service temporarily disabled as of 2013-11-20'
      }
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index bc48620f0b992366e866181b1dad22aeb2e5d0a6..165d9f88bc984ef80f1fd7aa4ff0d0b10e5fca45 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -60,7 +60,7 @@ class TeamcocoIE(InfoExtractor):
                  return -1
          formats.sort(key=sort_key)
          if not formats:
-            raise RegexNotFoundError(u'Unable to extract video URL')
+            raise ExtractorError(u'Unable to extract video URL')
  
          return {
              'id':          video_id,
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index 2e497c86ee41cf3c7afa76da967bf2a542b38659..4bca62ba003e325ebedd0fcc74c953bd64120cd5 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -4,7 +4,6 @@ import re
  from .subtitles import SubtitlesInfoExtractor
  
  from ..utils import (
-    compat_str,
      RegexNotFoundError,
  )
  
@@ -113,6 +112,6 @@ class TEDIE(SubtitlesInfoExtractor):
                      url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
                      sub_lang_list[l] = url
                  return sub_lang_list
-        except RegexNotFoundError as err:
+        except RegexNotFoundError:
              self._downloader.report_warning(u'video doesn\'t have subtitles')
          return {}
diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py

new file mode 100644 (file)

index 0000000..2f728d3
--- /dev/null
+++ b/youtube_dl/extractor/toutv.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unified_strdate,
+)
+
+
+class TouTvIE(InfoExtractor):
+    IE_NAME = u'tou.tv'
+    _VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
+
+    _TEST = {
+        u'url': u'http://www.tou.tv/30-vies/S04E41',
+        u'file': u'30-vies_S04E41.mp4',
+        u'info_dict': {
+            u'title': u'30 vies Saison 4 / Épisode 41',
+            u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
+            u'age_limit': 8,
+            u'uploader': u'Groupe des Nouveaux Médias',
+            u'duration': 1296,
+            u'upload_date': u'20131118',
+            u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
+        },
+        u'params': {
+            u'skip_download': True,  # Requires rtmpdump
+        },
+        u'skip': 'Only available in Canada'
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        mediaId = self._search_regex(
+            r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
+
+        streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
+        streams_webpage = self._download_webpage(
+            streams_url, video_id, note=u'Downloading stream list')
+
+        streams_doc = xml.etree.ElementTree.fromstring(
+            streams_webpage.encode('utf-8'))
+        video_url = next(n.text
+                         for n in streams_doc.findall('.//choice/url')
+                         if u'//ad.doubleclick' not in n.text)
+        if video_url.endswith('/Unavailable.flv'):
+            raise ExtractorError(
+                u'Access to this video is blocked from outside of Canada',
+                expected=True)
+
+        duration_str = self._html_search_meta(
+            'video:duration', webpage, u'duration')
+        duration = int(duration_str) if duration_str else None
+        upload_date_str = self._html_search_meta(
+            'video:release_date', webpage, u'upload date')
+        upload_date = unified_strdate(upload_date_str) if upload_date_str else None
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'url': video_url,
+            'description': self._og_search_description(webpage),
+            'uploader': self._dc_search_uploader(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'age_limit': self._media_rating_search(webpage),
+            'duration': duration,
+            'upload_date': upload_date,
+            'ext': 'mp4',
+        }
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py

index d4b7603c7c96a5da148ff50869559b0ff0c11a0f..4d9d41db3af2382bead67efa4afe6edbf9a6846e 100644 (file)
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@@ -5,8 +5,6 @@ from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse_urlparse,
      compat_urllib_request,
-    compat_urllib_parse,
-    unescapeHTML,
  )
  from ..aes import (
      aes_decrypt_text
diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py

index 65f39b98259bc0050b512073b2f26e5bd0e49605..4800415bde2d103b8781ab3954d617fc50166074 100644 (file)
--- a/youtube_dl/extractor/videopremium.py
+++ b/youtube_dl/extractor/videopremium.py
@@ -24,12 +24,16 @@ class VideoPremiumIE(InfoExtractor):
          webpage_url = 'http://videopremium.tv/' + video_id
          webpage = self._download_webpage(webpage_url, video_id)
  
-        self.report_extraction(video_id)
+        if re.match(r"^<html><head><script[^>]*>window.location\s*=", webpage):
+            # Download again, we need a cookie
+            webpage = self._download_webpage(
+                webpage_url, video_id,
+                note=u'Downloading webpage again (with cookie)')
  
-        video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<',
-            webpage, u'video title')
+        video_title = self._html_search_regex(
+            r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title')
  
-        return [{
+        return {
              'id':          video_id,
              'url':         "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
              'play_path':   "mp4:%s.f4v" % video_id,
@@ -37,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
              'player_url':  "http://videopremium.tv/uplayer/uppod.swf",
              'ext':         'f4v',
              'title':       video_title,
-        }]
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

new file mode 100644 (file)

index 0000000..2206a06
--- /dev/null
+++ b/youtube_dl/extractor/viki.py
@@ -0,0 +1,101 @@
+import re
+
+from ..utils import (
+    ExtractorError,
+    unescapeHTML,
+    unified_strdate,
+)
+from .subtitles import SubtitlesInfoExtractor
+
+
+class VikiIE(SubtitlesInfoExtractor):
+    IE_NAME = u'viki'
+
+    _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
+    _TEST = {
+        u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
+        u'file': u'1023585v.mp4',
+        u'md5': u'a21454021c2646f5433514177e2caa5f',
+        u'info_dict': {
+            u'title': u'Heirs Episode 14',
+            u'uploader': u'SBS',
+            u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+            u'upload_date': u'20131121',
+            u'age_limit': 13,
+        },
+        u'skip': u'Blocked in the US',
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        uploader_m = re.search(
+            r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
+        if uploader_m is None:
+            uploader = None
+        else:
+            uploader = uploader_m.group(1).strip()
+
+        rating_str = self._html_search_regex(
+            r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
+            u'rating information', default='').strip()
+        RATINGS = {
+            'G': 0,
+            'PG': 10,
+            'PG-13': 13,
+            'R': 16,
+            'NC': 18,
+        }
+        age_limit = RATINGS.get(rating_str)
+
+        info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
+        info_webpage = self._download_webpage(
+            info_url, video_id, note=u'Downloading info page')
+        if re.match(r'\s*<div\s+class="video-error', info_webpage):
+            raise ExtractorError(
+                u'Video %s is blocked from your location.' % video_id,
+                expected=True)
+        video_url = self._html_search_regex(
+            r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
+
+        upload_date_str = self._html_search_regex(
+            r'"created_at":"([^"]+)"', info_webpage, u'upload date')
+        upload_date = (
+            unified_strdate(upload_date_str)
+            if upload_date_str is not None
+            else None
+        )
+
+        # subtitles
+        video_subtitles = self.extract_subtitles(video_id, info_webpage)
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, info_webpage)
+            return
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'description': description,
+            'thumbnail': thumbnail,
+            'age_limit': age_limit,
+            'uploader': uploader,
+            'subtitles': video_subtitles,
+            'upload_date': upload_date,
+        }
+
+    def _get_available_subtitles(self, video_id, info_webpage):
+        res = {}
+        for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
+            sturl = unescapeHTML(sturl_html)
+            m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
+            if not m:
+                continue
+            res[m.group('lang')] = sturl
+        return res
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index d465bf20b6d65b7b5cd3a0545af676c2c710a07d..7d82c2cfa84bd9b57b7ebc9eb35537b4033ba45d 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -151,7 +151,7 @@ class VimeoIE(InfoExtractor):
                  config = json.loads(config_json)
              except RegexNotFoundError:
                  # For pro videos or player.vimeo.com urls
-                config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
+                config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'],
                      webpage, u'info section', flags=re.DOTALL)
                  config = json.loads(config)
          except Exception as e:
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py

index 03ad88bededd60a7d8462bb85e5a8cb23db381b2..e3458d2bd4abaa196190f886afce2e9ac05df191 100644 (file)
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -5,7 +5,6 @@ from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse_urlparse,
      compat_urllib_request,
-    compat_urllib_parse,
  )
  
  class XTubeIE(InfoExtractor):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 1aa5497408b8bf0833d329a725dd9de97785558e..64d4c2445892baad5decb53620f605368ff335d5 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -139,10 +139,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
  
  class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
      IE_DESC = u'YouTube.com'
-    _VALID_URL = r"""^
+    _VALID_URL = r"""(?x)^
                       (
-                         (?:https?://)?                                       # http(s):// (optional)
-                         (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
+                         (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
+                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
                              tube\.majestyc\.net/|
                              youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
@@ -363,6 +363,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  u"uploader_id": u"justintimberlakeVEVO"
              }
          },
+        {
+            u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
+            u"file":  u"yZIXLfi8CZQ.mp4",
+            u"note": u"Embed-only video (#1746)",
+            u"info_dict": {
+                u"upload_date": u"20120608",
+                u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
+                u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
+                u"uploader": u"SET India",
+                u"uploader_id": u"setindia"
+            }
+        },
      ]
  
  
@@ -370,7 +382,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
          if YoutubePlaylistIE.suitable(url): return False
-        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
+        return re.match(cls._VALID_URL, url) is not None
  
      def __init__(self, *args, **kwargs):
          super(YoutubeIE, self).__init__(*args, **kwargs)
@@ -1272,7 +1284,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              # We simulate the access to the video from www.youtube.com/v/{video_id}
              # this can be viewed without login into Youtube
              data = compat_urllib_parse.urlencode({'video_id': video_id,
-                                                  'el': 'embedded',
+                                                  'el': 'player_embedded',
                                                    'gl': 'US',
                                                    'hl': 'en',
                                                    'eurl': 'https://youtube.googleapis.com/v/' + video_id,
@@ -1498,7 +1510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              })
          return results
  
-class YoutubePlaylistIE(InfoExtractor):
+class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
      IE_DESC = u'YouTube.com playlists'
      _VALID_URL = r"""(?:
                          (?:https?://)?
@@ -1514,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):
                       |
                          ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
                       )"""
-    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
-    _MAX_RESULTS = 50
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
+    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
+    _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
      IE_NAME = u'youtube:playlist'
  
      @classmethod
@@ -1523,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor):
          """Receives a URL and returns True if suitable for this IE."""
          return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
+    def _real_initialize(self):
+        self._login()
+
      def _real_extract(self, url):
          # Extract playlist id
          mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1536,45 +1552,28 @@ class YoutubePlaylistIE(InfoExtractor):
              video_id = query_dict['v'][0]
              if self._downloader.params.get('noplaylist'):
                  self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
-                return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
+                return self.url_result(video_id, 'Youtube', video_id=video_id)
              else:
                  self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  
-        # Download playlist videos from API
-        videos = []
+        # Extract the video ids from the playlist pages
+        ids = []
  
          for page_num in itertools.count(1):
-            start_index = self._MAX_RESULTS * (page_num - 1) + 1
-            if start_index >= 1000:
-                self._downloader.report_warning(u'Max number of results reached')
-                break
-            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
+            url = self._TEMPLATE_URL % (playlist_id, page_num)
              page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
+            # The ids are duplicated
+            new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
+            ids.extend(new_ids)
  
-            try:
-                response = json.loads(page)
-            except ValueError as err:
-                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
-
-            if 'feed' not in response:
-                raise ExtractorError(u'Got a malformed response from YouTube API')
-            playlist_title = response['feed']['title']['$t']
-            if 'entry' not in response['feed']:
-                # Number of videos is a multiple of self._MAX_RESULTS
+            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
                  break
  
-            for entry in response['feed']['entry']:
-                index = entry['yt$position']['$t']
-                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
-                    videos.append((
-                        index,
-                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
-                    ))
-
-        videos = [v[1] for v in sorted(videos)]
+        playlist_title = self._og_search_title(page)
  
-        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
-        return [self.playlist_result(url_results, playlist_id, playlist_title)]
+        url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
+                       for vid_id in ids]
+        return self.playlist_result(url_results, playlist_id, playlist_title)
  
  
  class YoutubeChannelIE(InfoExtractor):
@@ -1628,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor):
  
          self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
  
-        urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
-        url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
-        return [self.playlist_result(url_entries, channel_id)]
+        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
+                       for video_id in video_ids]
+        return self.playlist_result(url_entries, channel_id)
  
  
  class YoutubeUserIE(InfoExtractor):
@@ -1694,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):
              if len(ids_in_page) < self._GDATA_PAGE_SIZE:
                  break
  
-        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
-        url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
-        return [self.playlist_result(url_results, playlist_title = username)]
+        url_results = [
+            self.url_result(video_id, 'Youtube', video_id=video_id)
+            for video_id in video_ids]
+        return self.playlist_result(url_results, playlist_title=username)
+
  
  class YoutubeSearchIE(SearchInfoExtractor):
      IE_DESC = u'YouTube.com searches'
@@ -1737,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
  
          if len(video_ids) > n:
              video_ids = video_ids[:n]
-        videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
+        videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
+                  for video_id in video_ids]
          return self.playlist_result(videos, query)
  
  class YoutubeSearchDateIE(YoutubeSearchIE):
@@ -1797,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
              feed_html = info['feed_html']
              m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
              ids = orderedSet(m.group(1) for m in m_ids)
-            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
+            feed_entries.extend(
+                self.url_result(video_id, 'Youtube', video_id=video_id)
+                for video_id in ids)
              if info['paging'] is None:
                  break
          return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
@@ -1822,6 +1826,20 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
      _PAGING_STEP = 100
      _PERSONAL_FEED = True
  
+class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
+    _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
+    _FEED_NAME = 'history'
+    _PERSONAL_FEED = True
+    _PLAYLIST_TITLE = u'Youtube Watch History'
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
+        data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
+        # The step is actually a ridiculously big number (like 1374343569725646)
+        self._PAGING_STEP = int(data_paging)
+        return super(YoutubeHistoryIE, self)._real_extract(url)
+
  class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
      IE_NAME = u'youtube:favorites'
      IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py

index faed7ff7f0511c666795a20f48eefd4dc96c7009..07f830e80793e5ac08432831e1179708461e22fc 100644 (file)
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -1,75 +1,111 @@
+import operator
  import re
  
  from .common import InfoExtractor
  from ..utils import (
-    determine_ext,
-    ExtractorError,
+    parse_xml_doc,
+    unified_strdate,
  )
  
  
  class ZDFIE(InfoExtractor):
      _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
-    _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
          video_id = mobj.group('video_id')
  
-        if mobj.group('hash'):
-            url = url.replace(u'#', u'', 1)
+        xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
+        info_xml = self._download_webpage(
+            xml_url, video_id, note=u'Downloading video info')
+        doc = parse_xml_doc(info_xml)
  
-        html = self._download_webpage(url, video_id)
-        streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
-        if streams is None:
-            raise ExtractorError(u'No media url found.')
+        title = doc.find('.//information/title').text
+        description = doc.find('.//information/detail').text
+        uploader_node = doc.find('.//details/originChannelTitle')
+        uploader = None if uploader_node is None else uploader_node.text
+        duration_str = doc.find('.//details/length').text
+        duration_m = re.match(r'''(?x)^
+            (?P<hours>[0-9]{2})
+            :(?P<minutes>[0-9]{2})
+            :(?P<seconds>[0-9]{2})
+            (?:\.(?P<ms>[0-9]+)?)
+            ''', duration_str)
+        duration = (
+            (
+                (int(duration_m.group('hours')) * 60 * 60) +
+                (int(duration_m.group('minutes')) * 60) +
+                int(duration_m.group('seconds'))
+            )
+            if duration_m
+            else None
+        )
+        upload_date = unified_strdate(doc.find('.//details/airtime').text)
  
-        # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
-        # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
-        # choose first/default media type and highest quality for now
-        def stream_pref(s):
-            TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
+        def xml_to_format(fnode):
+            video_url = fnode.find('url').text
+            is_available = u'http://www.metafilegenerator' not in video_url
+
+            format_id = fnode.attrib['basetype']
+            format_m = re.match(r'''(?x)
+                (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
+                (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
+            ''', format_id)
+
+            ext = format_m.group('container')
+            is_supported = ext != 'f4f'
+
+            PROTO_ORDER = ['http', 'rtmp', 'rtsp']
              try:
-                type_pref = TYPE_ORDER.index(s['media_type'])
+                proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
              except ValueError:
-                type_pref = 999
+                proto_pref = 999
  
-            QUALITY_ORDER = ['veryhigh', '300']
+            quality = fnode.find('./quality').text
+            QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
              try:
-                quality_pref = QUALITY_ORDER.index(s['quality'])
+                quality_pref = -QUALITY_ORDER.index(quality)
              except ValueError:
                  quality_pref = 999
  
-            return (type_pref, quality_pref)
-
-        sorted_streams = sorted(streams, key=stream_pref)
-        if not sorted_streams:
-            raise ExtractorError(u'No stream found.')
-        stream = sorted_streams[0]
-
-        media_link = self._download_webpage(
-            stream['video_url'],
-            video_id,
-            u'Get stream URL')
+            abr = int(fnode.find('./audioBitrate').text) // 1000
+            vbr = int(fnode.find('./videoBitrate').text) // 1000
+            pref = (is_available, is_supported,
+                    proto_pref, quality_pref, vbr, abr)
  
-        MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
-        RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
+            format_note = u''
+            if not is_supported:
+                format_note += u'(unsupported)'
+            if not format_note:
+                format_note = None
  
-        mobj = re.search(self._MEDIA_STREAM, media_link)
-        if mobj is None:
-            mobj = re.search(RTSP_STREAM, media_link)
-            if mobj is None:
-                raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
-        video_url = mobj.group('video_url')
+            return {
+                'format_id': format_id + u'-' + quality,
+                'url': video_url,
+                'ext': ext,
+                'acodec': format_m.group('acodec'),
+                'vcodec': format_m.group('vcodec'),
+                'abr': abr,
+                'vbr': vbr,
+                'width': int(fnode.find('./width').text),
+                'height': int(fnode.find('./height').text),
+                'filesize': int(fnode.find('./filesize').text),
+                'format_note': format_note,
+                '_pref': pref,
+                '_available': is_available,
+            }
  
-        title = self._html_search_regex(
-            r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
-            html, u'title')
+        format_nodes = doc.findall('.//formitaeten/formitaet')
+        formats = sorted(filter(lambda f: f['_available'],
+                                map(xml_to_format, format_nodes)),
+                         key=operator.itemgetter('_pref'))
  
          return {
              'id': video_id,
-            'url': video_url,
              'title': title,
-            'ext': determine_ext(video_url)
+            'formats': formats,
+            'description': description,
+            'uploader': uploader,
+            'duration': duration,
+            'upload_date': upload_date,
          }
diff --git a/youtube_dl/update.py b/youtube_dl/update.py

index f41b4785ae80a463fe7d4cd25cd5daaba46cb2e7..cd9670166e582ae9f3074c2371026f0f06c252a1 100644 (file)
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -41,6 +41,7 @@ def rsa_verify(message, signature, key):
      if signature != sha256(message).digest(): return False
      return True
  
+
  def update_self(to_screen, verbose):
      """Update the program file with the latest version from the repository"""
  
@@ -82,6 +83,13 @@ def update_self(to_screen, verbose):
          return
  
      version_id = versions_info['latest']
+
+    def version_tuple(version_str):
+        return tuple(map(int, version_str.split('.')))
+    if version_tuple(__version__) >= version_tuple(version_id):
+        to_screen(u'youtube-dl is up to date (%s)' % __version__)
+        return
+
      to_screen(u'Updating to version ' + version_id + '...')
      version = versions_info['versions'][version_id]
  
@@ -109,7 +117,7 @@ def update_self(to_screen, verbose):
              urlh = compat_urllib_request.urlopen(version['exe'][0])
              newcontent = urlh.read()
              urlh.close()
-        except (IOError, OSError) as err:
+        except (IOError, OSError):
              if verbose: to_screen(compat_str(traceback.format_exc()))
              to_screen(u'ERROR: unable to download latest version')
              return
@@ -122,7 +130,7 @@ def update_self(to_screen, verbose):
          try:
              with open(exe + '.new', 'wb') as outf:
                  outf.write(newcontent)
-        except (IOError, OSError) as err:
+        except (IOError, OSError):
              if verbose: to_screen(compat_str(traceback.format_exc()))
              to_screen(u'ERROR: unable to write the new version')
              return
@@ -141,7 +149,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
  
              subprocess.Popen([bat])  # Continues to run in the background
              return  # Do not show premature success messages
-        except (IOError, OSError) as err:
+        except (IOError, OSError):
              if verbose: to_screen(compat_str(traceback.format_exc()))
              to_screen(u'ERROR: unable to overwrite current version')
              return
@@ -152,7 +160,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
              urlh = compat_urllib_request.urlopen(version['bin'][0])
              newcontent = urlh.read()
              urlh.close()
-        except (IOError, OSError) as err:
+        except (IOError, OSError):
              if verbose: to_screen(compat_str(traceback.format_exc()))
              to_screen(u'ERROR: unable to download latest version')
              return
@@ -165,7 +173,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
          try:
              with open(filename, 'wb') as outf:
                  outf.write(newcontent)
-        except (IOError, OSError) as err:
+        except (IOError, OSError):
              if verbose: to_screen(compat_str(traceback.format_exc()))
              to_screen(u'ERROR: unable to overwrite current version')
              return
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 1d9785341ec685071ea8fcc4846029a3e889bc72..946e90e93061bb7ff26b46595dfd7930974323b9 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -8,13 +8,16 @@ import gzip
  import io
  import json
  import locale
+import math
  import os
  import pipes
  import platform
  import re
+import ssl
  import socket
  import sys
  import traceback
+import xml.etree.ElementTree
  import zlib
  
  try:
@@ -535,17 +538,34 @@ def formatSeconds(secs):
      else:
          return '%d' % secs
  
-def make_HTTPS_handler(opts):
-    if sys.version_info < (3,2):
-        # Python's 2.x handler is very simplistic
-        return compat_urllib_request.HTTPSHandler()
+def make_HTTPS_handler(opts_no_check_certificate):
+    if sys.version_info < (3, 2):
+        import httplib
+
+        class HTTPSConnectionV3(httplib.HTTPSConnection):
+            def __init__(self, *args, **kwargs):
+                httplib.HTTPSConnection.__init__(self, *args, **kwargs)
+
+            def connect(self):
+                sock = socket.create_connection((self.host, self.port), self.timeout)
+                if self._tunnel_host:
+                    self.sock = sock
+                    self._tunnel()
+                try:
+                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
+                except ssl.SSLError:
+                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
+
+        class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
+            def https_open(self, req):
+                return self.do_open(HTTPSConnectionV3, req)
+        return HTTPSHandlerV3()
      else:
-        import ssl
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+        context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
          context.set_default_verify_paths()
          
          context.verify_mode = (ssl.CERT_NONE
-                               if opts.no_check_certificate
+                               if opts_no_check_certificate
                                 else ssl.CERT_REQUIRED)
          return compat_urllib_request.HTTPSHandler(context=context)
  
@@ -734,6 +754,8 @@ def unified_strdate(date_str):
          '%Y/%m/%d %H:%M:%S',
          '%d.%m.%Y %H:%M',
          '%Y-%m-%dT%H:%M:%SZ',
+        '%Y-%m-%dT%H:%M:%S.%fZ',
+        '%Y-%m-%dT%H:%M:%S.%f0Z',
          '%Y-%m-%dT%H:%M:%S',
      ]
      for expression in format_expressions:
@@ -949,7 +971,16 @@ class locked_file(object):
  
  
  def shell_quote(args):
-    return ' '.join(map(pipes.quote, args))
+    quoted_args = []
+    encoding = sys.getfilesystemencoding()
+    if encoding is None:
+        encoding = 'utf-8'
+    for a in args:
+        if isinstance(a, bytes):
+            # We may get a filename encoded with 'encodeFilename'
+            a = a.decode(encoding)
+        quoted_args.append(pipes.quote(a))
+    return u' '.join(quoted_args)
  
  
  def takewhile_inclusive(pred, seq):
@@ -976,3 +1007,22 @@ def unsmuggle_url(smug_url):
      jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
      data = json.loads(jsond)
      return url, data
+
+
+def parse_xml_doc(s):
+    assert isinstance(s, type(u''))
+    return xml.etree.ElementTree.fromstring(s.encode('utf-8'))
+
+
+def format_bytes(bytes):
+    if bytes is None:
+        return u'N/A'
+    if type(bytes) is str:
+        bytes = float(bytes)
+    if bytes == 0.0:
+        exponent = 0
+    else:
+        exponent = int(math.log(bytes, 1024.0))
+    suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
+    converted = float(bytes) / float(1024 ** exponent)
+    return u'%.2f%s' % (converted, suffix)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 110058c790c10e74154e035b2878af92d4341710..2af23040fabf1594e0e597b6c28ec0cff1a446d6 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2013.11.17'
+__version__ = '2013.11.25.1'
author	Philipp Hagemeister <phihag@phihag.de>
	Mon, 25 Nov 2013 05:16:18 +0000 (06:16 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Mon, 25 Nov 2013 05:16:18 +0000 (06:16 +0100)
README.md		patch \| blob \| history
setup.py		patch \| blob \| history
test/helper.py		patch \| blob \| history
test/test_age_restriction.py		patch \| blob \| history
test/test_all_urls.py		patch \| blob \| history
test/test_download.py		patch \| blob \| history
test/test_playlists.py		patch \| blob \| history
test/test_subtitles.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
test/test_write_annotations.py		patch \| blob \| history
test/test_write_info_json.py		patch \| blob \| history
test/test_youtube_lists.py		patch \| blob \| history
test/test_youtube_signature.py		patch \| blob \| history
youtube_dl/FileDownloader.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/anitube.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/auengine.py		patch \| blob \| history
youtube_dl/extractor/bandcamp.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/canalplus.py		patch \| blob \| history
youtube_dl/extractor/clipfish.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/collegehumor.py		patch \| blob \| history
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/d8.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/eighttracks.py		patch \| blob \| history
youtube_dl/extractor/escapist.py		patch \| blob \| history
youtube_dl/extractor/facebook.py		patch \| blob \| history
youtube_dl/extractor/fktv.py		patch \| blob \| history
youtube_dl/extractor/gamespot.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/howcast.py		patch \| blob \| history
youtube_dl/extractor/jeuxvideo.py		patch \| blob \| history
youtube_dl/extractor/livestream.py		patch \| blob \| history
youtube_dl/extractor/mixcloud.py		patch \| blob \| history
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/nhl.py		patch \| blob \| history
youtube_dl/extractor/niconico.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/pornhub.py		patch \| blob \| history
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/spankwire.py		patch \| blob \| history
youtube_dl/extractor/spiegel.py		patch \| blob \| history
youtube_dl/extractor/streamcloud.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/sztvhu.py		patch \| blob \| history
youtube_dl/extractor/teamcoco.py		patch \| blob \| history
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/toutv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tube8.py		patch \| blob \| history
youtube_dl/extractor/videopremium.py		patch \| blob \| history
youtube_dl/extractor/viki.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/xtube.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/extractor/zdf.py		patch \| blob \| history
youtube_dl/update.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history