Merge branch 'cinemassacre' of github.com:rzhxeo/youtube-dl into rzhxeo-cinemassacre

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Fri, 11 Oct 2013 17:53:27 +0000 (19:53 +0200)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Fri, 11 Oct 2013 17:53:27 +0000 (19:53 +0200)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 11 Oct 2013 17:53:27 +0000 (19:53 +0200)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 11 Oct 2013 17:53:27 +0000 (19:53 +0200)
diff --git a/.gitignore b/.gitignore

index 24fdb3626cd0d306cd5fc1367543f4b3691394c3..7dd0ad09b06c172596ca751a22055b9a3d6207d4 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,4 @@ updates_key.pem
  *.mp4
  *.part
  test/testdata
+.tox
diff --git a/README.md b/README.md

index 14d62b189c3dec1ceaeac35aa497f9f66628506f..8824daee2cba4437c44db2576384527c55a3c3f1 100644 (file)
--- a/README.md
+++ b/README.md
@@ -52,6 +52,9 @@ which means you can modify it, redistribute it or use it however you like.
      --datebefore DATE          download only videos uploaded before this date
      --dateafter DATE           download only videos uploaded after this date
      --no-playlist              download only the currently playing video
+    --age-limit YEARS          download only videos suitable for the given age
+    --download-archive FILE    Download only videos not present in the archive
+                               file. Record all downloaded videos in it.
  
  ## Download Options:
      -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m)
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py

index 33f242480e7c967ec0ca89fe57b5392dda0a524f..153e15c8ab674f44e3681e5440c9372634b272d8 100755 (executable)
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -16,10 +16,11 @@ def main():
      ie_htmls = []
      for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
          ie_html = '<b>{}</b>'.format(ie.IE_NAME)
-        try:
+        ie_desc = getattr(ie, 'IE_DESC', None)
+        if ie_desc is False:
+            continue
+        elif ie_desc is not None:
              ie_html += ': {}'.format(ie.IE_DESC)
-        except AttributeError:
-            pass
          if ie.working() == False:
              ie_html += ' (Currently broken)'
          ie_htmls.append('<li>{}</li>'.format(ie_html))
diff --git a/test/helper.py b/test/helper.py

index a2b468b509b3effc6ff61c2b00cb9ca1d59f3711..ad1b74dd30c140b01d8c9a4c51513cb2e3b8997f 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -1,6 +1,9 @@
+import errno
  import io
  import json
  import os.path
+import re
+import types
  
  import youtube_dl.extractor
  from youtube_dl import YoutubeDL, YoutubeDLHandler
@@ -9,30 +12,47 @@ from youtube_dl.utils import (
      compat_urllib_request,
  )
  
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
+youtube_dl._setup_opener(timeout=10)
  
  PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
      parameters = json.load(pf)
  
+
+def try_rm(filename):
+    """ Remove a file if it exists """
+    try:
+        os.remove(filename)
+    except OSError as ose:
+        if ose.errno != errno.ENOENT:
+            raise
+
+
  class FakeYDL(YoutubeDL):
      def __init__(self):
-        self.result = []
          # Different instances of the downloader can't share the same dictionary
          # some test set the "sublang" parameter, which would break the md5 checks.
-        self.params = dict(parameters)
-    def to_screen(self, s):
+        params = dict(parameters)
+        super(FakeYDL, self).__init__(params)
+        self.result = []
+        
+    def to_screen(self, s, skip_eol=None):
          print(s)
+
      def trouble(self, s, tb=None):
          raise Exception(s)
+
      def download(self, x):
          self.result.append(x)
  
+    def expect_warning(self, regex):
+        # Silence an expected warning matching a regex
+        old_report_warning = self.report_warning
+        def report_warning(self, message):
+            if re.match(regex, message): return
+            old_report_warning(message)
+        self.report_warning = types.MethodType(report_warning, self)
+
  def get_testcases():
      for ie in youtube_dl.extractor.gen_extractors():
          t = getattr(ie, '_TEST', None)
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py

new file mode 100644 (file)

index 0000000..943f9a3
--- /dev/null
+++ b/test/test_age_restriction.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+import sys
+import unittest
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl import YoutubeDL
+from helper import try_rm
+
+
+def _download_restricted(url, filename, age):
+    """ Returns true iff the file has been downloaded """
+
+    params = {
+        'age_limit': age,
+        'skip_download': True,
+        'writeinfojson': True,
+        "outtmpl": "%(id)s.%(ext)s",
+    }
+    ydl = YoutubeDL(params)
+    ydl.add_default_info_extractors()
+    json_filename = filename + '.info.json'
+    try_rm(json_filename)
+    ydl.download([url])
+    res = os.path.exists(json_filename)
+    try_rm(json_filename)
+    return res
+
+
+class TestAgeRestriction(unittest.TestCase):
+    def _assert_restricted(self, url, filename, age, old_age=None):
+        self.assertTrue(_download_restricted(url, filename, old_age))
+        self.assertFalse(_download_restricted(url, filename, age))
+
+    def test_youtube(self):
+        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+
+    def test_youporn(self):
+        self._assert_restricted(
+            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+            '505835.mp4', 2, old_age=25)
+
+    def test_pornotube(self):
+        self._assert_restricted(
+            'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
+            '1689755.flv', 13)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py

index 83c65d57e60870be10b25c116ebad6fd20cc8122..ed2ad311df0035010be978e3515e88e18f7ba11a 100644 (file)
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -2,8 +2,6 @@
  
  import sys
  import unittest
-import json
-import io
  import hashlib
  
  # Allow direct execution
@@ -45,15 +43,18 @@ class TestDailymotionSubtitles(unittest.TestCase):
          subtitles = self.getSubtitles()
          self.assertEqual(len(subtitles.keys()), 5)
      def test_list_subtitles(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
          self.DL.params['listsubtitles'] = True
          info_dict = self.getInfoDict()
          self.assertEqual(info_dict, None)
      def test_automatic_captions(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
          self.DL.params['writeautomaticsub'] = True
          self.DL.params['subtitleslang'] = ['en']
          subtitles = self.getSubtitles()
          self.assertTrue(len(subtitles.keys()) == 0)
      def test_nosubtitles(self):
+        self.DL.expect_warning(u'video doesn\'t have subtitles')
          self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
diff --git a/test/test_download.py b/test/test_download.py

index 23a66254d86ed2a68ee3ea54339838fda7d5dc71..fdf59bb5c6af88bc0ee8dcbcbb4bc72b383b7968 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -1,6 +1,5 @@
  #!/usr/bin/env python
  
-import errno
  import hashlib
  import io
  import os
@@ -20,22 +19,6 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para
  
  RETRIES = 3
  
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
-socket.setdefaulttimeout(10)
-
-def _try_rm(filename):
-    """ Remove a file if it exists """
-    try:
-        os.remove(filename)
-    except OSError as ose:
-        if ose.errno != errno.ENOENT:
-            raise
-
  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
  
  class YoutubeDL(youtube_dl.YoutubeDL):
@@ -54,7 +37,8 @@ def _file_md5(fn):
      with open(fn, 'rb') as f:
          return hashlib.md5(f.read()).hexdigest()
  
-from helper import get_testcases
+import helper  # Set up remaining global configuration
+from helper import get_testcases, try_rm
  defs = get_testcases()
  
  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
@@ -97,9 +81,9 @@ def generator(test_case):
  
          test_cases = test_case.get('playlist', [test_case])
          for tc in test_cases:
-            _try_rm(tc['file'])
-            _try_rm(tc['file'] + '.part')
-            _try_rm(tc['file'] + '.info.json')
+            try_rm(tc['file'])
+            try_rm(tc['file'] + '.part')
+            try_rm(tc['file'] + '.info.json')
          try:
              for retry in range(1, RETRIES + 1):
                  try:
@@ -145,9 +129,9 @@ def generator(test_case):
                      self.assertTrue(key in info_dict.keys() and info_dict[key])
          finally:
              for tc in test_cases:
-                _try_rm(tc['file'])
-                _try_rm(tc['file'] + '.part')
-                _try_rm(tc['file'] + '.info.json')
+                try_rm(tc['file'])
+                try_rm(tc['file'] + '.part')
+                try_rm(tc['file'] + '.info.json')
  
      return test_template
  
diff --git a/test/test_playlists.py b/test/test_playlists.py

index c33511333bd82c2e180bf295587fad3078a755f5..de8bd298a3e6c5c54518869b2bac2c166c5c3226 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -16,6 +16,7 @@ from youtube_dl.extractor import (
      UstreamChannelIE,
      SoundcloudUserIE,
      LivestreamIE,
+    NHLVideocenterIE,
  )
  from youtube_dl.utils import *
  
@@ -74,5 +75,14 @@ class TestPlaylists(unittest.TestCase):
          self.assertEqual(result['title'], u'TEDCity2.0 (English)')
          self.assertTrue(len(result['entries']) >= 4)
  
+    def test_nhl_videocenter(self):
+        dl = FakeYDL()
+        ie = NHLVideocenterIE(dl)
+        result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'999')
+        self.assertEqual(result['title'], u'Highlights')
+        self.assertEqual(len(result['entries']), 12)
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py

index 168e6c66cbf089d72622ab6cc20f2c66d19a8c2b..f9b0c1ad0b532191a6d4f1e97be5d86ad20d37fd 100644 (file)
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -2,8 +2,6 @@
  
  import sys
  import unittest
-import json
-import io
  import hashlib
  
  # Allow direct execution
@@ -56,6 +54,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
          subtitles = self.getSubtitles()
          self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
      def test_youtube_list_subtitles(self):
+        self.DL.expect_warning(u'Video doesn\'t have automatic captions')
          self.DL.params['listsubtitles'] = True
          info_dict = self.getInfoDict()
          self.assertEqual(info_dict, None)
@@ -66,6 +65,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
          subtitles = self.getSubtitles()
          self.assertTrue(subtitles['it'] is not None)
      def test_youtube_nosubtitles(self):
+        self.DL.expect_warning(u'video doesn\'t have subtitles')
          self.url = 'sAjKT8FhjI8'
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
diff --git a/tox.ini b/tox.ini

new file mode 100644 (file)

index 0000000..53b461f
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,5 @@
+[tox]
+envlist = py26,py27,py33
+[testenv]
+deps = nose
+commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py

index d6673fd3ab88d543086417f903cf2ff7d4019944..8ecabab1a517467c118dad0857c47291bcb2f929 100644 (file)
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -270,6 +270,7 @@ class FileDownloader(object):
      def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
+        test = self.params.get('test', False)
  
          # Check for rtmpdump first
          try:
@@ -291,6 +292,8 @@ class FileDownloader(object):
              basic_args += ['--playpath', play_path]
          if tc_url is not None:
              basic_args += ['--tcUrl', url]
+        if test:
+            basic_args += ['--stop', '1']
          args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
          if self.params.get('verbose', False):
              try:
@@ -300,7 +303,7 @@ class FileDownloader(object):
                  shell_quote = repr
              self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
          retval = subprocess.call(args)
-        while retval == 2 or retval == 1:
+        while (retval == 2 or retval == 1) and not test:
              prevsize = os.path.getsize(encodeFilename(tmpfilename))
              self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
              time.sleep(5.0) # This seems to be needed
@@ -313,7 +316,7 @@ class FileDownloader(object):
                  self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
                  retval = 0
                  break
-        if retval == 0:
+        if retval == 0 or (test and retval == 2):
              fsize = os.path.getsize(encodeFilename(tmpfilename))
              self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
              self.try_rename(tmpfilename, filename)
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py

index 3ee1d3c5865e203f564d87c8beb55a3de3a46934..fbf8a7f98ffc67792c48de2cb21a1536ffba08ac 100644 (file)
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -2,6 +2,7 @@ import os
  import subprocess
  import sys
  import time
+import datetime
  
  from .utils import *
  
@@ -467,3 +468,35 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
          os.rename(encodeFilename(temp_filename), encodeFilename(filename))
  
          return True, information
+
+
+class FFmpegMetadataPP(FFmpegPostProcessor):
+    def run(self, info):
+        metadata = {}
+        if info.get('title') is not None:
+            metadata['title'] = info['title']
+        if info.get('upload_date') is not None:
+            metadata['date'] = info['upload_date']
+        if info.get('uploader') is not None:
+            metadata['artist'] = info['uploader']
+        elif info.get('uploader_id') is not None:
+            metadata['artist'] = info['uploader_id']
+
+        if not metadata:
+            self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
+            return True, info
+
+        filename = info['filepath']
+        ext = os.path.splitext(filename)[1][1:]
+        temp_filename = filename + u'.temp'
+
+        options = ['-c', 'copy']
+        for (name, value) in metadata.items():
+            options.extend(['-metadata', '%s="%s"' % (name, value)])
+        options.extend(['-f', ext])
+
+        self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+        return True, info
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 2503fd09b976b86da60c9a82fd31495266106c97..e85e03fa44fc7232f8717e08d9e0ba70a7f1e03b 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -3,6 +3,7 @@
  
  from __future__ import absolute_import
  
+import errno
  import io
  import os
  import re
@@ -84,6 +85,11 @@ class YoutubeDL(object):
      cachedir:          Location of the cache files in the filesystem.
                         None to disable filesystem cache.
      noplaylist:        Download single video instead of a playlist if in doubt.
+    age_limit:         An integer representing the user's age in years.
+                       Unsuitable videos for the given age are skipped.
+    downloadarchive:   File name of a file where all downloads are recorded.
+                       Videos already present in the file are not downloaded
+                       again.
      
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
@@ -113,7 +119,7 @@ class YoutubeDL(object):
                  and not params['restrictfilenames']):
              # On Python 3, the Unicode filesystem API will throw errors (#1474)
              self.report_warning(
-                u'Assuming --restrict-filenames isnce file system encoding '
+                u'Assuming --restrict-filenames since file system encoding '
                  u'cannot encode all charactes. '
                  u'Set the LC_ALL environment variable to fix this.')
              params['restrictfilenames'] = True
@@ -309,6 +315,13 @@ class YoutubeDL(object):
              dateRange = self.params.get('daterange', DateRange())
              if date not in dateRange:
                  return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+        age_limit = self.params.get('age_limit')
+        if age_limit is not None:
+            if age_limit < info_dict.get('age_limit', 0):
+                return u'Skipping "' + title + '" because it is age restricted'
+        if self.in_download_archive(info_dict):
+            return (u'%(title)s has already been recorded in archive'
+                    % info_dict)
          return None
          
      def extract_info(self, url, download=True, ie_key=None, extra_info={}):
@@ -578,6 +591,8 @@ class YoutubeDL(object):
                      self.report_error(u'postprocessing: %s' % str(err))
                      return
  
+        self.record_download_archive(info_dict)
+
      def download(self, url_list):
          """Download a given list of URLs."""
          if len(url_list) > 1 and self.fixed_template():
@@ -617,3 +632,26 @@ class YoutubeDL(object):
                  os.remove(encodeFilename(filename))
              except (IOError, OSError):
                  self.report_warning(u'Unable to remove downloaded video file')
+
+    def in_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return False
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        try:
+            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+                for line in archive_file:
+                    if line.strip() == vid_id:
+                        return True
+        except IOError as ioe:
+            if ioe.errno != errno.ENOENT:
+                raise
+        return False
+
+    def record_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+            archive_file.write(vid_id + u'\n')
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 03df835f23ebe58bd1048d336ba68019e4a2e33d..3513d719fd38b136c6b302b3717925fa35e7456e 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -36,6 +36,7 @@ __authors__  = (
  __license__ = 'Public Domain'
  
  import codecs
+import collections
  import getpass
  import optparse
  import os
@@ -188,6 +189,12 @@ def parseOpts(overrideArguments=None):
      selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
      selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
      selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
+                         help='download only videos suitable for the given age',
+                         default=None, type=int)
+    selection.add_option('--download-archive', metavar='FILE',
+                         dest='download_archive',
+                         help='Download only videos not present in the archive file. Record all downloaded videos in it.')
  
  
      authentication.add_option('-u', '--username',
@@ -351,6 +358,8 @@ def parseOpts(overrideArguments=None):
              help='do not overwrite post-processed files; the post-processed files are overwritten by default')
      postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
              help='embed subtitles in the video (only for mp4 videos)')
+    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
+            help='add metadata to the files')
  
  
      parser.add_option_group(general)
@@ -441,27 +450,7 @@ def _real_main(argv=None):
      all_urls = batchurls + args
      all_urls = [url.strip() for url in all_urls]
  
-    # General configuration
-    cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-    if opts.proxy is not None:
-        if opts.proxy == '':
-            proxies = {}
-        else:
-            proxies = {'http': opts.proxy, 'https': opts.proxy}
-    else:
-        proxies = compat_urllib_request.getproxies()
-        # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
-        if 'http' in proxies and 'https' not in proxies:
-            proxies['https'] = proxies['http']
-    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
-    https_handler = make_HTTPS_handler(opts)
-    opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
-    # Delete the default user-agent header, which would otherwise apply in
-    # cases where our custom HTTP handler doesn't come into play
-    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
-    opener.addheaders =[]
-    compat_urllib_request.install_opener(opener)
-    socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
+    opener = _setup_opener(jar=jar, opts=opts)
  
      extractors = gen_extractors()
  
@@ -478,6 +467,8 @@ def _real_main(argv=None):
              if not ie._WORKING:
                  continue
              desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
+            if desc is False:
+                continue
              if hasattr(ie, 'SEARCH_KEY'):
                  _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
                  _COUNTS = (u'', u'5', u'10', u'all')
@@ -631,6 +622,8 @@ def _real_main(argv=None):
          'daterange': date,
          'cachedir': opts.cachedir,
          'youtube_print_sig_code': opts.youtube_print_sig_code,
+        'age_limit': opts.age_limit,
+        'download_archive': opts.download_archive,
          })
  
      if opts.verbose:
@@ -650,11 +643,19 @@ def _real_main(argv=None):
              except:
                  pass
          write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
-        write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
+
+        proxy_map = {}
+        for handler in opener.handlers:
+            if hasattr(handler, 'proxies'):
+                proxy_map.update(handler.proxies)
+        write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
  
      ydl.add_default_info_extractors()
  
      # PostProcessors
+    # Add the metadata pp first, the other pps will copy it
+    if opts.addmetadata:
+        ydl.add_post_processor(FFmpegMetadataPP())
      if opts.extractaudio:
          ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
      if opts.recodevideo:
@@ -688,6 +689,37 @@ def _real_main(argv=None):
  
      sys.exit(retcode)
  
+
+def _setup_opener(jar=None, opts=None, timeout=300):
+    if opts is None:
+        FakeOptions = collections.namedtuple(
+            'FakeOptions', ['proxy', 'no_check_certificate'])
+        opts = FakeOptions(proxy=None, no_check_certificate=False)
+
+    cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
+    if opts.proxy is not None:
+        if opts.proxy == '':
+            proxies = {}
+        else:
+            proxies = {'http': opts.proxy, 'https': opts.proxy}
+    else:
+        proxies = compat_urllib_request.getproxies()
+        # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+        if 'http' in proxies and 'https' not in proxies:
+            proxies['https'] = proxies['http']
+    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+    https_handler = make_HTTPS_handler(opts)
+    opener = compat_urllib_request.build_opener(
+        https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+    # Delete the default user-agent header, which would otherwise apply in
+    # cases where our custom HTTP handler doesn't come into play
+    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+    opener.addheaders = []
+    compat_urllib_request.install_opener(opener)
+    socket.setdefaulttimeout(timeout)
+    return opener
+
+
  def main(argv=None):
      try:
          _real_main(argv)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index db30edc276250e3434db1e8dfd0fded90d12bbba..226c3a762e802b4f98bb5ba33d4ed2a74d5f7b32 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -35,6 +35,7 @@ from .eighttracks import EightTracksIE
  from .escapist import EscapistIE
  from .exfm import ExfmIE
  from .facebook import FacebookIE
+from .faz import FazIE
  from .fktv import (
      FKTVIE,
      FKTVPosteckeIE,
@@ -81,6 +82,7 @@ from .naver import NaverIE
  from .nba import NBAIE
  from .nbc import NBCNewsIE
  from .newgrounds import NewgroundsIE
+from .nhl import NHLIE, NHLVideocenterIE
  from .ooyala import OoyalaIE
  from .orf import ORFIE
  from .pbs import PBSIE
@@ -118,6 +120,7 @@ from .veehd import VeeHDIE
  from .veoh import VeohIE
  from .vevo import VevoIE
  from .vice import ViceIE
+from .viddler import ViddlerIE
  from .videofyme import VideofyMeIE
  from .vimeo import VimeoIE, VimeoChannelIE
  from .vine import VineIE
@@ -141,6 +144,7 @@ from .youtube import (
      YoutubeShowIE,
      YoutubeSubscriptionsIE,
      YoutubeRecommendedIE,
+    YoutubeTruncatedURLIE,
      YoutubeWatchLaterIE,
      YoutubeFavouritesIE,
  )
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py

index 69b3b0ad7820600ef5107ad3d79230c0e4edcaac..4707d7ccab51502dadf787ab2a2fb1558a1c9d45 100644 (file)
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -109,17 +109,27 @@ class ArteTvIE(InfoExtractor):
              return any(re.match(r, f['versionCode']) for r in regexes)
          # Some formats may not be in the same language as the url
          formats = filter(_match_lang, formats)
+        # Some formats use the m3u8 protocol
+        formats = filter(lambda f: f['videoFormat'] != 'M3U8', formats)
          # We order the formats by quality
          formats = sorted(formats, key=lambda f: int(f['height']))
          # Prefer videos without subtitles in the same language
          formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
          # Pick the best quality
-        format_info = formats[-1]
-        if format_info['mediaType'] == u'rtmp':
-            info_dict['url'] = format_info['streamer']
-            info_dict['play_path'] = 'mp4:' + format_info['url']
-        else:
-            info_dict['url'] = format_info['url']
+        def _format(format_info):
+            info = {'ext': 'flv',
+                    'width': format_info.get('width'),
+                    'height': format_info.get('height'),
+                    }
+            if format_info['mediaType'] == u'rtmp':
+                info['url'] = format_info['streamer']
+                info['play_path'] = 'mp4:' + format_info['url']
+            else:
+                info_dict['url'] = format_info['url']
+            return info
+        info_dict['formats'] = [_format(f) for f in formats]
+        # TODO: Remove when #980 has been merged 
+        info_dict.update(info_dict['formats'][-1])
  
          return info_dict
  
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py

index 08b28c994272e3461bba5d99856928ff6adb6cf3..493504f75082f7b7605121acbfd88dbb621e84fb 100644 (file)
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -115,7 +115,7 @@ class BlipTVIE(InfoExtractor):
                  ext = umobj.group(1)
  
                  info = {
-                    'id': data['item_id'],
+                    'id': compat_str(data['item_id']),
                      'url': video_url,
                      'uploader': data['display_name'],
                      'upload_date': upload_date,
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index 558b3d0093975eff273d38a7a2bad9f1d4e36355..745212f2fe731bf305e56e8087089c65efabbd68 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -49,6 +49,11 @@ class BrightcoveIE(InfoExtractor):
          Build a Brightcove url from a xml string containing
          <object class="BrightcoveExperience">{params}</object>
          """
+
+        # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
+        object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
+                            lambda m: m.group(1) + '/>', object_str)
+
          object_doc = xml.etree.ElementTree.fromstring(object_str)
          assert u'BrightcoveExperience' in object_doc.attrib['class']
          params = {'flashID': object_doc.attrib['id'],
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index bf8d711eea44c8d60855f458407391d66ef2664d..69b2beecebac319ef92e8043ab75ad71fad46a25 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -51,12 +51,12 @@ class ComedyCentralIE(InfoExtractor):
          '400': 'mp4',
      }
      _video_dimensions = {
-        '3500': '1280x720',
-        '2200': '960x540',
-        '1700': '768x432',
-        '1200': '640x360',
-        '750': '512x288',
-        '400': '384x216',
+        '3500': (1280, 720),
+        '2200': (960, 540),
+        '1700': (768, 432),
+        '1200': (640, 360),
+        '750': (512, 288),
+        '400': (384, 216),
      }
  
      @classmethod
@@ -64,11 +64,13 @@ class ComedyCentralIE(InfoExtractor):
          """Receives a URL and returns True if suitable for this IE."""
          return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
-    def _print_formats(self, formats):
-        print('Available formats:')
-        for x in formats:
-            print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
-
+    @staticmethod
+    def _transform_rtmp_url(rtmp_video_url):
+        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
+        if not m:
+            raise ExtractorError(u'Cannot transform RTMP url')
+        base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
+        return base + m.group('finalid')
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -155,40 +157,31 @@ class ComedyCentralIE(InfoExtractor):
                  self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')
                  continue
  
-            if self._downloader.params.get('listformats', None):
-                self._print_formats([i[0] for i in turls])
-                return
-
-            # For now, just pick the highest bitrate
-            format,rtmp_video_url = turls[-1]
-
-            # Get the format arg from the arg stream
-            req_format = self._downloader.params.get('format', None)
-
-            # Select format if we can find one
-            for f,v in turls:
-                if f == req_format:
-                    format, rtmp_video_url = f, v
-                    break
-
-            m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
-            if not m:
-                raise ExtractorError(u'Cannot transform RTMP url')
-            base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
-            video_url = base + m.group('finalid')
+            formats = []
+            for format, rtmp_video_url in turls:
+                w, h = self._video_dimensions.get(format, (None, None))
+                formats.append({
+                    'url': self._transform_rtmp_url(rtmp_video_url),
+                    'ext': self._video_extensions.get(format, 'mp4'),
+                    'format_id': format,
+                    'height': h,
+                    'width': w,
+                })
  
              effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
              info = {
                  'id': shortMediaId,
-                'url': video_url,
+                'formats': formats,
                  'uploader': showId,
                  'upload_date': officialDate,
                  'title': effTitle,
-                'ext': 'mp4',
-                'format': format,
                  'thumbnail': None,
                  'description': compat_str(officialTitle),
              }
+
+            # TODO: Remove when #980 has been merged
+            info.update(info['formats'][-1])
+
              results.append(info)
  
          return results
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 77726ee2432fc2bcd6df6ce89dcc560419524051..2a5a85dc67b4f7a57d04d4f21c1608aa2c47f7f3 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -35,6 +35,8 @@ class InfoExtractor(object):
      title:          Video title, unescaped.
      ext:            Video filename extension.
  
+    Instead of url and ext, formats can also specified.
+
      The following fields are optional:
  
      format:         The video format, defaults to ext (used for --get-format)
@@ -52,8 +54,20 @@ class InfoExtractor(object):
      view_count:     How many users have watched the video on the platform.
      urlhandle:      [internal] The urlHandle to be used to download the file,
                      like returned by urllib.request.urlopen
-
-    The fields should all be Unicode strings.
+    age_limit:      Age restriction for the video, as an integer (years)
+    formats:        A list of dictionaries for each format available, it must
+                    be ordered from worst to best quality. Potential fields:
+                    * url       Mandatory. The URL of the video file
+                    * ext       Will be calculated from url if missing
+                    * format    A human-readable description of the format
+                                ("mp4 container with h264/opus").
+                                Calculated from width and height if missing.
+                    * format_id A short description of the format
+                                ("mp4_h264_opus" or "19")
+                    * width     Width of the video, if known
+                    * height    Height of the video, if known
+
+    Unless mentioned otherwise, the fields should be Unicode strings.
  
      Subclasses of this one should re-define the _real_initialize() and
      _real_extract() methods and define a _VALID_URL regexp.
@@ -305,6 +319,15 @@ class InfoExtractor(object):
                                          self._og_regex('video')],
                                         html, name, **kargs)
  
+    def _rta_search(self, html):
+        # See http://www.rtalabel.org/index.php?content=howtofaq#single
+        if re.search(r'(?ix)<meta\s+name="rating"\s+'
+                     r'     content="RTA-5042-1996-1400-1577-RTA"',
+                     html):
+            return 18
+        return 0
+
+
  class SearchInfoExtractor(InfoExtractor):
      """
      Base class for paged search queries extractors.
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py

index 259806f385d621b909f465f0dc78271fb352c4fe..7d83539469d3d7ff120f916cc837a60bacfe8390 100644 (file)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -10,6 +10,7 @@ from ..utils import (
      compat_str,
      get_element_by_attribute,
      get_element_by_id,
+    orderedSet,
  
      ExtractorError,
  )
@@ -158,12 +159,12 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
                                               id, u'Downloading page %s' % pagenum)
  
              playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
-            video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
+            video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
  
              if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                  break
          return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
-                   for video_id in video_ids]
+                   for video_id in orderedSet(video_ids)]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py

new file mode 100644 (file)

index 0000000..deaa4ed
--- /dev/null
+++ b/youtube_dl/extractor/faz.py
@@ -0,0 +1,60 @@
+# encoding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    clean_html,
+    get_element_by_attribute,
+)
+
+
+class FazIE(InfoExtractor):
+    IE_NAME = u'faz.net'
+    _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
+
+    _TEST = {
+        u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
+        u'file': u'12610585.mp4',
+        u'info_dict': {
+            u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
+            u'description': u'md5:1453fbf9a0d041d985a47306192ea253',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        self.to_screen(video_id)
+        webpage = self._download_webpage(url, video_id)
+        config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
+            u'config xml url')
+        config_xml = self._download_webpage(config_xml_url, video_id,
+            u'Downloading config xml')
+        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
+
+        encodings = config.find('ENCODINGS')
+        formats = []
+        for code in ['LOW', 'HIGH', 'HQ']:
+            encoding = encodings.find(code)
+            if encoding is None:
+                continue
+            encoding_url = encoding.find('FILENAME').text
+            formats.append({
+                'url': encoding_url,
+                'ext': determine_ext(encoding_url),
+                'format_id': code.lower(),
+            })
+
+        descr_html = get_element_by_attribute('class', 'Content Copy', webpage)
+        info = {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'formats': formats,
+            'description': clean_html(descr_html),
+            'thumbnail': config.find('STILL/STILL_BIG').text,
+        }
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+        return info
diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py

index 80d96baf739522b97f933878faa8a4083a0e8959..e1d2f0526a42169fbbba96789a413d3d20fec6a6 100644 (file)
--- a/youtube_dl/extractor/flickr.py
+++ b/youtube_dl/extractor/flickr.py
@@ -9,7 +9,7 @@ from ..utils import (
  
  class FlickrIE(InfoExtractor):
      """Information Extractor for Flickr videos"""
-    _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
+    _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
      _TEST = {
          u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
          u'file': u'5645318632.mp4',
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py

index 461dac8efba90ea09f4f1e585d93754c81bd0136..086cafca027e3b99967f50f3220d1632f1cc8033 100644 (file)
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -103,7 +103,7 @@ class France2IE(FranceTVBaseInfoExtractor):
  
  
  class GenerationQuoiIE(InfoExtractor):
-    IE_NAME = u'http://generation-quoi.france2.fr'
+    IE_NAME = u'france2.fr:generation-quoi'
      _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
  
      _TEST = {
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py

index cd3bbe65f5dd9891f5cf4a68fb1adcd8a45c4196..5edbf678ad805f4f5408bf7d478fd5f7402ae5ce 100644 (file)
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -8,6 +8,7 @@ from ..utils import (
  )
  
  class GameSpotIE(InfoExtractor):
+    _WORKING = False
      _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
      _TEST = {
          u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 764070635160d96487baa53b4370b28e8a957316..7060c6f9258c28c9dcb18681c62882f52715edf9 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -117,7 +117,7 @@ class GenericIE(InfoExtractor):
          except ValueError:
              # since this is the last-resort InfoExtractor, if
              # this error is thrown, it'll be thrown here
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError(u'Failed to download URL: %s' % url)
  
          self.report_extraction(video_id)
          # Look for BrightCove:
@@ -149,12 +149,12 @@ class GenericIE(InfoExtractor):
              # HTML5 video
              mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
          if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError(u'Unsupported URL: %s' % url)
  
          # It's possible that one of the regexes
          # matched, but returned an empty group:
          if mobj.group(1) is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError(u'Did not find a valid video URL at %s' % url)
  
          video_url = mobj.group(1)
          video_url = compat_urlparse.urljoin(url, video_url)
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py

index 8895ad2897f8abd30471ba3f7ef07963c44e7b57..ab12d7e9381317b4dfddb679eced39db2f752ed4 100644 (file)
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@@ -41,7 +41,8 @@ class GooglePlusIE(InfoExtractor):
  
          # Extract update date
          upload_date = self._html_search_regex(
-            ['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'],
+            r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*>
+                    ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
              webpage, u'upload date', fatal=False)
          if upload_date:
              # Convert timestring to a format suitable for filename
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py

index 4327bc13dab9c913eca8c4c263b6405e0dcc9076..6bb54b932298395b8f07554b12ad6091cca140d3 100644 (file)
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -6,13 +6,14 @@ import xml.etree.ElementTree
  
  from .common import InfoExtractor
  
+
  class JeuxVideoIE(InfoExtractor):
      _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
  
      _TEST = {
          u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
          u'file': u'5182.mp4',
-        u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
+        u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
          u'info_dict': {
              u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
              u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
@@ -23,25 +24,29 @@ class JeuxVideoIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          title = re.match(self._VALID_URL, url).group(1)
          webpage = self._download_webpage(url, title)
-        m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
-
-        xml_link = m_download.group(1)
+        xml_link = self._html_search_regex(
+            r'<param name="flashvars" value="config=(.*?)" />',
+            webpage, u'config URL')
          
-        id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
+        video_id = self._search_regex(
+            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
+            xml_link, u'video ID')
  
-        xml_config = self._download_webpage(xml_link, title,
-                                                  'Downloading XML config')
+        xml_config = self._download_webpage(
+            xml_link, title, u'Downloading XML config')
          config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
-        info = re.search(r'<format\.json>(.*?)</format\.json>',
-                         xml_config, re.MULTILINE|re.DOTALL).group(1)
-        info = json.loads(info)['versions'][0]
+        info_json = self._search_regex(
+            r'(?sm)<format\.json>(.*?)</format\.json>',
+            xml_config, u'JSON information')
+        info = json.loads(info_json)['versions'][0]
          
          video_url = 'http://video720.jeuxvideo.com/' + info['file']
  
-        return {'id': id,
-                'title' : config.find('titre_video').text,
-                'ext' : 'mp4',
-                'url' : video_url,
-                'description': self._og_search_description(webpage),
-                'thumbnail': config.find('image').text,
-                }
+        return {
+            'id': video_id,
+            'title': config.find('titre_video').text,
+            'ext': 'mp4',
+            'url': video_url,
+            'description': self._og_search_description(webpage),
+            'thumbnail': config.find('image').text,
+        }
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index 8f956571d54dc4a42a4f3726642929e4b2497f13..e520e2bb491f2c55f3867ab214b2b949eca6e684 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -54,23 +54,26 @@ class MTVIE(InfoExtractor):
      def _get_thumbnail_url(self, uri, itemdoc):
          return 'http://mtv.mtvnimages.com/uri/' + uri
  
-    def _extract_video_url(self, metadataXml):
+    def _extract_video_formats(self, metadataXml):
          if '/error_country_block.swf' in metadataXml:
              raise ExtractorError(u'This video is not available from your country.', expected=True)
          mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
          renditions = mdoc.findall('.//rendition')
  
-        # For now, always pick the highest quality.
-        rendition = renditions[-1]
-
-        try:
-            _,_,ext = rendition.attrib['type'].partition('/')
-            format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
-            rtmp_video_url = rendition.find('./src').text
-        except KeyError:
-            raise ExtractorError('Invalid rendition field.')
-        video_url = self._transform_rtmp_url(rtmp_video_url)
-        return {'ext': ext, 'url': video_url, 'format': format}
+        formats = []
+        for rendition in mdoc.findall('.//rendition'):
+            try:
+                _, _, ext = rendition.attrib['type'].partition('/')
+                rtmp_video_url = rendition.find('./src').text
+                formats.append({'ext': ext,
+                                'url': self._transform_rtmp_url(rtmp_video_url),
+                                'format_id': rendition.get('bitrate'),
+                                'width': int(rendition.get('width')),
+                                'height': int(rendition.get('height')),
+                                })
+            except (KeyError, TypeError):
+                raise ExtractorError('Invalid rendition field.')
+        return formats
  
      def _get_video_info(self, itemdoc):
          uri = itemdoc.find('guid').text
@@ -81,19 +84,25 @@ class MTVIE(InfoExtractor):
              mediagen_url += '&acceptMethods=fms'
          mediagen_page = self._download_webpage(mediagen_url, video_id,
                                                 u'Downloading video urls')
-        video_info = self._extract_video_url(mediagen_page)
  
          description_node = itemdoc.find('description')
          if description_node is not None:
-            description = description_node.text
+            description = description_node.text.strip()
          else:
              description = None
-        video_info.update({'title': itemdoc.find('title').text,
-                           'id': video_id,
-                           'thumbnail': self._get_thumbnail_url(uri, itemdoc),
-                           'description': description,
-                           })
-        return video_info
+
+        info = {
+            'title': itemdoc.find('title').text,
+            'formats': self._extract_video_formats(mediagen_page),
+            'id': video_id,
+            'thumbnail': self._get_thumbnail_url(uri, itemdoc),
+            'description': description,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info.update(info['formats'][-1])
+
+        return info
  
      def _get_videos_info(self, uri):
          video_id = self._id_from_uri(uri)
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py

new file mode 100644 (file)

index 0000000..e8d43dd
--- /dev/null
+++ b/youtube_dl/extractor/nhl.py
@@ -0,0 +1,120 @@
+import re
+import json
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    compat_urllib_parse,
+    determine_ext,
+    unified_strdate,
+)
+
+
+class NHLBaseInfoExtractor(InfoExtractor):
+    @staticmethod
+    def _fix_json(json_string):
+        return json_string.replace('\\\'', '\'')
+
+    def _extract_video(self, info):
+        video_id = info['id']
+        self.report_extraction(video_id)
+
+        initial_video_url = info['publishPoint']
+        data = compat_urllib_parse.urlencode({
+            'type': 'fvod',
+            'path': initial_video_url.replace('.mp4', '_sd.mp4'),
+        })
+        path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
+        path_response = self._download_webpage(path_url, video_id,
+            u'Downloading final video url')
+        path_doc = xml.etree.ElementTree.fromstring(path_response)
+        video_url = path_doc.find('path').text
+
+        join = compat_urlparse.urljoin
+        return {
+            'id': video_id,
+            'title': info['name'],
+            'url': video_url,
+            'ext': determine_ext(video_url),
+            'description': info['description'],
+            'duration': int(info['duration']),
+            'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
+            'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
+        }
+
+
+class NHLIE(NHLBaseInfoExtractor):
+    IE_NAME = u'nhl.com'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
+        u'file': u'453614.mp4',
+        u'info_dict': {
+            u'title': u'Quick clip: Weise 4-3 goal vs Flames',
+            u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.',
+            u'duration': 18,
+            u'upload_date': u'20131006',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
+        info_json = self._download_webpage(json_url, video_id,
+            u'Downloading info json')
+        info_json = self._fix_json(info_json)
+        info = json.loads(info_json)[0]
+        return self._extract_video(info)
+
+
+class NHLVideocenterIE(NHLBaseInfoExtractor):
+    IE_NAME = u'nhl.com:videocenter'
+    IE_DESC = u'Download the first 12 videos from a videocenter category'
+    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
+
+    @classmethod
+    def suitable(cls, url):
+        if NHLIE.suitable(url):
+            return False
+        return super(NHLVideocenterIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        team = mobj.group('team')
+        webpage = self._download_webpage(url, team)
+        cat_id = self._search_regex(
+            [r'var defaultCatId = "(.+?)";',
+             r'{statusIndex:0,index:0,.*?id:(.*?),'],
+            webpage, u'category id')
+        playlist_title = self._html_search_regex(
+            r'\?catid=%s">(.*?)</a>' % cat_id,
+            webpage, u'playlist title', flags=re.DOTALL)
+
+        data = compat_urllib_parse.urlencode({
+            'cid': cat_id,
+            # This is the default value
+            'count': 12,
+            'ptrs': 3,
+            'format': 'json',
+        })
+        path = '/videocenter/servlets/browse?' + data
+        request_url = compat_urlparse.urljoin(url, path)
+        response = self._download_webpage(request_url, playlist_title)
+        response = self._fix_json(response)
+        if not response.strip():
+            self._downloader.report_warning(u'Got an empty reponse, trying '
+                                            u'adding the "newvideos" parameter')
+            response = self._download_webpage(request_url + '&newvideos=true',
+                playlist_title)
+            response = self._fix_json(response)
+        videos = json.loads(response)
+
+        return {
+            '_type': 'playlist',
+            'title': playlist_title,
+            'id': cat_id,
+            'entries': [self._extract_video(i) for i in videos],
+        }
diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py

index add76a11e5f2c0c17af76b71db6e8bd07adc6cd6..5d770ec285c3d1e3dcad04cfe49ca7780a9dd2b4 100644 (file)
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor):
          VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
          upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
          if upload_date: upload_date = unified_strdate(upload_date)
+        age_limit = self._rta_search(webpage)
  
          info = {'id': video_id,
                  'url': video_url,
@@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor):
                  'upload_date': upload_date,
                  'title': video_title,
                  'ext': 'flv',
-                'format': 'flv'}
+                'format': 'flv',
+                'age_limit': age_limit}
  
          return [info]
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py

index 1d2cf1f56df7ae932e67d4cd8253f6441818d80e..365aade564bd4f423317d427b3070c91e3e22ad3 100644 (file)
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -14,24 +14,30 @@ class RedTubeIE(InfoExtractor):
          }
      }
  
-    def _real_extract(self,url):
+    def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
  
          video_id = mobj.group('id')
-        video_extension = 'mp4'        
+        video_extension = 'mp4'
          webpage = self._download_webpage(url, video_id)
  
          self.report_extraction(video_id)
  
-        video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
-            webpage, u'video URL')
+        video_url = self._html_search_regex(
+            r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
  
-        video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
+        video_title = self._html_search_regex(
+            r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
              webpage, u'title')
  
-        return [{
-            'id':       video_id,
-            'url':      video_url,
-            'ext':      video_extension,
-            'title':    video_title,
-        }]
+        # No self-labeling, but they describe themselves as
+        # "Home of Videos Porno"
+        age_limit = 18
+
+        return {
+            'id':        video_id,
+            'url':       video_url,
+            'ext':       video_extension,
+            'title':     video_title,
+            'age_limit': age_limit,
+        }
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py

index 580f9e6d5b7c32562f4509fb1514dfff599af2de..d1b08c9bc050b3639ca252f2e84a373a8e4fa5f9 100644 (file)
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -86,8 +86,10 @@ class RTLnowIE(InfoExtractor):
          u'params': {
              u'skip_download': True,
          },
+        u'skip': u'Only works from Germany',
      }]
  
+
      def _real_extract(self,url):
          mobj = re.match(self._VALID_URL, url)
  
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index 4c11f7a03c37136c0c80677e55b66598c647edeb..dfa1176a3e4e4eef333dcb829773c189bf9916ba 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -77,12 +77,20 @@ class TEDIE(InfoExtractor):
          
          thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
                                         webpage, 'thumbnail')
+        formats = [{
+            'ext': 'mp4',
+            'url': stream['file'],
+            'format': stream['id']
+            } for stream in info['htmlStreams']]
          info = {
-                'id': info['id'],
-                'url': info['htmlStreams'][-1]['file'],
-                'ext': 'mp4',
-                'title': title,
-                'thumbnail': thumbnail,
-                'description': desc,
-                }
+            'id': info['id'],
+            'title': title,
+            'thumbnail': thumbnail,
+            'description': desc,
+            'formats': formats,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info.update(info['formats'][-1])
+
          return info
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py

index 70408c4f0edc2ba5b00a9e793cf1e1c2e0ba30ed..1c1cc418d29a8897e2a2825492ed7becab75af6b 100644 (file)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -1,11 +1,15 @@
  import re
  import json
+import xml.etree.ElementTree
+import datetime
  
  from .common import InfoExtractor
  from ..utils import (
+    determine_ext,
      ExtractorError,
  )
  
+
  class VevoIE(InfoExtractor):
      """
      Accepts urls from vevo.com or in the format 'vevo:{id}'
@@ -15,11 +19,11 @@ class VevoIE(InfoExtractor):
      _TEST = {
          u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
          u'file': u'GB1101300280.mp4',
-        u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
          u'info_dict': {
              u"upload_date": u"20130624",
              u"uploader": u"Hurts",
-            u"title": u"Somebody to Die For"
+            u"title": u"Somebody to Die For",
+            u'duration': 230,
          }
      }
  
@@ -27,27 +31,47 @@ class VevoIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
  
-        json_url = 'http://www.vevo.com/data/video/%s' % video_id
-        base_url = 'http://smil.lvl3.vevo.com'
-        videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower())
+        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
          info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
-        links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls')
  
          self.report_extraction(video_id)
-        video_info = json.loads(info_json)
-        m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage))
-        if m_urls is None or len(m_urls) == 0:
-            raise ExtractorError(u'Unable to extract video url')
-        # They are sorted from worst to best quality
-        m_url = m_urls[-1]
-        video_url = base_url + '/' + m_url.group('url')
-        ext = m_url.group('ext')
-
-        return {'url': video_url,
-                'ext': ext,
-                'id': video_id,
-                'title': video_info['title'],
-                'thumbnail': video_info['img'],
-                'upload_date': video_info['launchDate'].replace('/',''),
-                'uploader': video_info['Artists'][0]['title'],
-                }
+        video_info = json.loads(info_json)['video']
+        last_version = {'version': -1}
+        for version in video_info['videoVersions']:
+            # These are the HTTP downloads, other types are for different manifests
+            if version['sourceType'] == 2:
+                if version['version'] > last_version['version']:
+                    last_version = version
+        if last_version['version'] == -1:
+            raise ExtractorError(u'Unable to extract last version of the video')
+
+        renditions = xml.etree.ElementTree.fromstring(last_version['data'])
+        formats = []
+        # Already sorted from worst to best quality
+        for rend in renditions.findall('rendition'):
+            attr = rend.attrib
+            f_url = attr['url']
+            formats.append({
+                'url': f_url,
+                'ext': determine_ext(f_url),
+                'height': int(attr['frameheight']),
+                'width': int(attr['frameWidth']),
+            })
+
+        date_epoch = int(self._search_regex(
+            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
+        upload_date = datetime.datetime.fromtimestamp(date_epoch)
+        info = {
+            'id': video_id,
+            'title': video_info['title'],
+            'formats': formats,
+            'thumbnail': video_info['imageUrl'],
+            'upload_date': upload_date.strftime('%Y%m%d'),
+            'uploader': video_info['mainArtists'][0]['artistName'],
+            'duration': video_info['duration'],
+        }
+
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+
+        return info
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py

new file mode 100644 (file)

index 0000000..12c84a9
--- /dev/null
+++ b/youtube_dl/extractor/viddler.py
@@ -0,0 +1,64 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+)
+
+
+class ViddlerIE(InfoExtractor):
+    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
+    _TEST = {
+        u"url": u"http://www.viddler.com/v/43903784",
+        u'file': u'43903784.mp4',
+        u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
+        u'info_dict': {
+            u"title": u"Video Made Easy",
+            u"uploader": u"viddler",
+            u"duration": 100.89,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        embed_url = mobj.group('domain') + u'/embed/' + video_id
+        webpage = self._download_webpage(embed_url, video_id)
+
+        video_sources_code = self._search_regex(
+            r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
+        video_sources = json.loads(video_sources_code.replace("'", '"'))
+
+        formats = [{
+            'url': video_url,
+            'format': format_id,
+        } for video_url, format_id in video_sources.items()]
+
+        title = self._html_search_regex(
+            r"title\s*:\s*'([^']*)'", webpage, u'title')
+        uploader = self._html_search_regex(
+            r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
+        duration_s = self._html_search_regex(
+            r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
+        duration = float(duration_s) if duration_s else None
+        thumbnail = self._html_search_regex(
+            r"thumbnail\s*:\s*'([^']*)'",
+            webpage, u'thumbnail', fatal=False)
+
+        info = {
+            '_type': 'video',
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'duration': duration,
+            'formats': formats,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
+        info.update(info['formats'][-1])
+
+        return info
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index 4a7d82b7adfb90e337ff17d9f90d731612650ec6..cea29f03525af91d1be56c475da0f62ce45eea83 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -17,7 +17,7 @@ class VimeoIE(InfoExtractor):
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?$'
      _NETRC_MACHINE = 'vimeo'
      IE_NAME = u'vimeo'
      _TESTS = [
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index 39126e6316dda1fa5a13059e482fb8b0c059cf50..464b498f584c3e42b613a79589b52a4d32fec413 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -17,17 +17,21 @@ class YahooIE(InfoExtractor):
      _TESTS = [
          {
              u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
-            u'file': u'214727115.mp4',
+            u'file': u'214727115.flv',
              u'info_dict': {
                  u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
                  u'description': u'Julian and Travis watch Julian Smith',
              },
+            u'params': {
+                # Requires rtmpdump
+                u'skip_download': True,
+            },
          },
          {
              u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
              u'file': u'103000935.flv',
              u'info_dict': {
-                u'title': u'The Cougar Lies with Spanish Moss',
+                u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
                  u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
              },
              u'params': {
@@ -46,6 +50,21 @@ class YahooIE(InfoExtractor):
              webpage, u'items', flags=re.MULTILINE)
          items = json.loads(items_json)
          info = items['mediaItems']['query']['results']['mediaObj'][0]
+        # The 'meta' field is not always in the video webpage, we request it
+        # from another page
+        long_id = info['id']
+        query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
+                 ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id)
+        data = compat_urllib_parse.urlencode({
+            'q': query,
+            'env': 'prod',
+            'format': 'json',
+        })
+        query_result_json = self._download_webpage(
+            'http://video.query.yahoo.com/v1/public/yql?' + data,
+            video_id, u'Downloading video info')
+        query_result = json.loads(query_result_json)
+        info = query_result['query']['results']['mediaObj'][0]
          meta = info['meta']
  
          formats = []
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py

index c85fd4b5af0ccdd3f259bd403ddd4311f2de5fdb..b1f93dd1bb90d964916394d88d83aaaf153ba15b 100644 (file)
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor):
          req = compat_urllib_request.Request(url)
          req.add_header('Cookie', 'age_verified=1')
          webpage = self._download_webpage(req, video_id)
+        age_limit = self._rta_search(webpage)
  
          # Get JSON parameters
          json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
@@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor):
                  'ext': extension,
                  'format': format,
                  'thumbnail': thumbnail,
-                'description': video_description
+                'description': video_description,
+                'age_limit': age_limit,
              })
  
          if self._downloader.params.get('listformats', None):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 39ff33290cc617f853026712cece2155c335f5fb..8222a880f55f7a27afe94e2aad5db570342650d9 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1037,12 +1037,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          if player_url is not None:
              try:
-                if player_url not in self._player_cache:
+                player_id = (player_url, len(s))
+                if player_id not in self._player_cache:
                      func = self._extract_signature_function(
                          video_id, player_url, len(s)
                      )
-                    self._player_cache[player_url] = func
-                func = self._player_cache[player_url]
+                    self._player_cache[player_id] = func
+                func = self._player_cache[player_id]
                  if self._downloader.params.get('youtube_print_sig_code'):
                      self._print_sig_code(func, len(s))
                  return func(s)
@@ -1115,6 +1116,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'lang': lang,
                  'v': video_id,
                  'fmt': self._downloader.params.get('subtitlesformat'),
+                'name': l[0],
              })
              url = u'http://www.youtube.com/api/timedtext?' + params
              sub_lang_list[lang] = url
@@ -1249,9 +1251,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          return url_map
  
      def _real_extract(self, url):
-        if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
-            self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
-
          # Extract original video URL from URL with redirection, like age verification, using next_url parameter
          mobj = re.search(self._NEXT_URL_RE, url)
          if mobj:
@@ -1494,7 +1493,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'description':  video_description,
                  'player_url':   player_url,
                  'subtitles':    video_subtitles,
-                'duration':     video_duration
+                'duration':     video_duration,
+                'age_limit':    18 if age_gate else 0,
              })
          return results
  
@@ -1635,7 +1635,7 @@ class YoutubeChannelIE(InfoExtractor):
  
  class YoutubeUserIE(InfoExtractor):
      IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
      _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
      _GDATA_PAGE_SIZE = 50
      _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1828,3 +1828,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
          webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
          playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
          return self.url_result(playlist_id, 'YoutubePlaylist')
+
+
+class YoutubeTruncatedURLIE(InfoExtractor):
+    IE_NAME = 'youtube:truncated_url'
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
+
+    def _real_extract(self, url):
+        raise ExtractorError(
+            u'Did you forget to quote the URL? Remember that & is a meta '
+            u'character in most shells, so you want to put the URL in quotes, '
+            u'like  youtube-dl '
+            u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
+            u' (or simply  youtube-dl BaW_jenozKc  ).',
+            expected=True)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index f5f9cde99b0c65363a098923e4c350182896f7f1..82a1daeb9075a056aa908e0a91bbd83b8897673a 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -175,7 +175,7 @@ def compat_ord(c):
  compiled_regex_type = type(re.compile(''))
  
  std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Encoding': 'gzip, deflate',
@@ -715,6 +715,7 @@ def unified_strdate(date_str):
          '%Y/%m/%d %H:%M:%S',
          '%d.%m.%Y %H:%M',
          '%Y-%m-%dT%H:%M:%SZ',
+        '%Y-%m-%dT%H:%M:%S',
      ]
      for expression in format_expressions:
          try:
@@ -830,3 +831,99 @@ def get_cachedir(params={}):
      cache_root = os.environ.get('XDG_CACHE_HOME',
                                  os.path.expanduser('~/.cache'))
      return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
+
+
+# Cross-platform file locking
+if sys.platform == 'win32':
+    import ctypes.wintypes
+    import msvcrt
+
+    class OVERLAPPED(ctypes.Structure):
+        _fields_ = [
+            ('Internal', ctypes.wintypes.LPVOID),
+            ('InternalHigh', ctypes.wintypes.LPVOID),
+            ('Offset', ctypes.wintypes.DWORD),
+            ('OffsetHigh', ctypes.wintypes.DWORD),
+            ('hEvent', ctypes.wintypes.HANDLE),
+        ]
+
+    kernel32 = ctypes.windll.kernel32
+    LockFileEx = kernel32.LockFileEx
+    LockFileEx.argtypes = [
+        ctypes.wintypes.HANDLE,     # hFile
+        ctypes.wintypes.DWORD,      # dwFlags
+        ctypes.wintypes.DWORD,      # dwReserved
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+        ctypes.POINTER(OVERLAPPED)  # Overlapped
+    ]
+    LockFileEx.restype = ctypes.wintypes.BOOL
+    UnlockFileEx = kernel32.UnlockFileEx
+    UnlockFileEx.argtypes = [
+        ctypes.wintypes.HANDLE,     # hFile
+        ctypes.wintypes.DWORD,      # dwReserved
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+        ctypes.POINTER(OVERLAPPED)  # Overlapped
+    ]
+    UnlockFileEx.restype = ctypes.wintypes.BOOL
+    whole_low = 0xffffffff
+    whole_high = 0x7fffffff
+
+    def _lock_file(f, exclusive):
+        overlapped = OVERLAPPED()
+        overlapped.Offset = 0
+        overlapped.OffsetHigh = 0
+        overlapped.hEvent = 0
+        f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+        handle = msvcrt.get_osfhandle(f.fileno())
+        if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
+                          whole_low, whole_high, f._lock_file_overlapped_p):
+            raise OSError('Locking file failed: %r' % ctypes.FormatError())
+
+    def _unlock_file(f):
+        assert f._lock_file_overlapped_p
+        handle = msvcrt.get_osfhandle(f.fileno())
+        if not UnlockFileEx(handle, 0,
+                            whole_low, whole_high, f._lock_file_overlapped_p):
+            raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+
+else:
+    import fcntl
+
+    def _lock_file(f, exclusive):
+        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+
+    def _unlock_file(f):
+        fcntl.lockf(f, fcntl.LOCK_UN)
+
+
+class locked_file(object):
+    def __init__(self, filename, mode, encoding=None):
+        assert mode in ['r', 'a', 'w']
+        self.f = io.open(filename, mode, encoding=encoding)
+        self.mode = mode
+
+    def __enter__(self):
+        exclusive = self.mode != 'r'
+        try:
+            _lock_file(self.f, exclusive)
+        except IOError:
+            self.f.close()
+            raise
+        return self
+
+    def __exit__(self, etype, value, traceback):
+        try:
+            _unlock_file(self.f)
+        finally:
+            self.f.close()
+
+    def __iter__(self):
+        return iter(self.f)
+
+    def write(self, *args):
+        return self.f.write(*args)
+
+    def read(self, *args):
+        return self.f.read(*args)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index e773e82dae44d0b8099aeead68b9ec3103b91294..1004af116bc88dba99ff62273d63cc02d6154ea4 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2013.10.04'
+__version__ = '2013.10.09'
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Fri, 11 Oct 2013 17:53:27 +0000 (19:53 +0200)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Fri, 11 Oct 2013 17:53:27 +0000 (19:53 +0200)
.gitignore		patch \| blob \| history
README.md		patch \| blob \| history
devscripts/gh-pages/update-sites.py		patch \| blob \| history
test/helper.py		patch \| blob \| history
test/test_age_restriction.py	[new file with mode: 0644]	patch \| blob
test/test_dailymotion_subtitles.py		patch \| blob \| history
test/test_download.py		patch \| blob \| history
test/test_playlists.py		patch \| blob \| history
test/test_youtube_subtitles.py		patch \| blob \| history
tox.ini	[new file with mode: 0644]	patch \| blob
youtube_dl/FileDownloader.py		patch \| blob \| history
youtube_dl/PostProcessor.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/arte.py		patch \| blob \| history
youtube_dl/extractor/bliptv.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/dailymotion.py		patch \| blob \| history
youtube_dl/extractor/faz.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/flickr.py		patch \| blob \| history
youtube_dl/extractor/francetv.py		patch \| blob \| history
youtube_dl/extractor/gamespot.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/googleplus.py		patch \| blob \| history
youtube_dl/extractor/jeuxvideo.py		patch \| blob \| history
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/nhl.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/pornotube.py		patch \| blob \| history
youtube_dl/extractor/redtube.py		patch \| blob \| history
youtube_dl/extractor/rtlnow.py		patch \| blob \| history
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/vevo.py		patch \| blob \| history
youtube_dl/extractor/viddler.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/yahoo.py		patch \| blob \| history
youtube_dl/extractor/youporn.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history