[lynda] minor changes

author Philipp Hagemeister <phihag@phihag.de>

Fri, 3 Jan 2014 12:24:29 +0000 (13:24 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Fri, 3 Jan 2014 12:24:29 +0000 (13:24 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Fri, 3 Jan 2014 12:24:29 +0000 (13:24 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Fri, 3 Jan 2014 12:24:29 +0000 (13:24 +0100)
diff --git a/README.md b/README.md

index 91e18e372b9a803dc7358f2e6d4af3b31eb754a5..0070617d4dce5bdd0ff3628bd232313f287498ad 100644 (file)
--- a/README.md
+++ b/README.md
@@ -34,9 +34,11 @@ which means you can modify it, redistribute it or use it however you like.
                                 empty string (--proxy "") for direct connection
      --no-check-certificate     Suppress HTTPS certificate validation.
      --cache-dir DIR            Location in the filesystem where youtube-dl can
-                               store downloaded information permanently. By
+                               store some downloaded information permanently. By
                                 default $XDG_CACHE_HOME/youtube-dl or ~/.cache
-                               /youtube-dl .
+                               /youtube-dl . At the moment, only YouTube player
+                               files (for videos with obfuscated signatures) are
+                               cached, but that may change.
      --no-cache-dir             Disable filesystem caching
      --bidi-workaround          Work around terminals that lack bidirectional
                                 text support. Requires bidiv or fribidi
@@ -335,3 +337,7 @@ In particular, every site support request issue should only pertain to services
  ###  Is anyone going to need the feature?
  
  Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+
+###  Is your question about youtube-dl?
+
+It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
diff --git a/test/test_playlists.py b/test/test_playlists.py

index 1b7b4e3d808cb936fa5fac07136049bd174a4490..9d522b3574c221bf6594cd9965ea17bf47b671c6 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -28,7 +28,8 @@ from youtube_dl.extractor import (
      BandcampAlbumIE,
      SmotriCommunityIE,
      SmotriUserIE,
-    IviCompilationIE
+    IviCompilationIE,
+    ImdbListIE,
  )
  
  
@@ -187,6 +188,15 @@ class TestPlaylists(unittest.TestCase):
          self.assertEqual(result['id'], u'dezhurnyi_angel/season2')
          self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
          self.assertTrue(len(result['entries']) >= 20)
+        
+    def test_imdb_list(self):
+        dl = FakeYDL()
+        ie = ImdbListIE(dl)
+        result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'sMjedvGDd8U')
+        self.assertEqual(result['title'], u'Animated and Family Films')
+        self.assertTrue(len(result['entries']) >= 48)
  
  
  if __name__ == '__main__':
diff --git a/test/test_utils.py b/test/test_utils.py

index e5778cd83ee9ea74e4786243f1e6279aed3697d3..bee355ee0e0605a5134dc37b8556e9e233728902 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -18,6 +18,7 @@ from youtube_dl.utils import (
      find_xpath_attr,
      get_meta_content,
      orderedSet,
+    parse_duration,
      sanitize_filename,
      shell_quote,
      smuggle_url,
@@ -192,5 +193,12 @@ class TestUtil(unittest.TestCase):
              url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
              u'trailer.mp4')
  
+    def test_parse_duration(self):
+        self.assertEqual(parse_duration(None), None)
+        self.assertEqual(parse_duration('1'), 1)
+        self.assertEqual(parse_duration('1337:12'), 80232)
+        self.assertEqual(parse_duration('9:12:43'), 33163)
+        self.assertEqual(parse_duration('x:y'), None)
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py

index 69aedf87a44c72060e2af135cd95f6f820e9ab0c..097e1a9e41e67219d4205e8966d311646c9d378f 100644 (file)
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -10,6 +10,7 @@ from .utils import (
      PostProcessingError,
      shell_quote,
      subtitles_filename,
+    prepend_extension,
  )
  
  
@@ -496,13 +497,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
              return True, info
  
          filename = info['filepath']
-        ext = os.path.splitext(filename)[1][1:]
-        temp_filename = filename + u'.temp'
+        temp_filename = prepend_extension(filename, 'temp')
  
          options = ['-c', 'copy']
          for (name, value) in metadata.items():
              options.extend(['-metadata', '%s=%s' % (name, value)])
-        options.extend(['-f', ext])
  
          self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
          self.run_ffmpeg(filename, temp_filename, options)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index a9a3639d7f7a32053990f0b41e487b837a704767..08037deda48c05c74e40e989fee4af34887ae6fa 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -148,6 +148,7 @@ class YoutubeDL(object):
      socket_timeout:    Time to wait for unresponsive hosts, in seconds
      bidi_workaround:   Work around buggy terminals without bidirectional text
                         support, using fridibi
+    debug_printtraffic:Print out sent and received HTTP traffic
  
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
@@ -164,6 +165,8 @@ class YoutubeDL(object):
  
      def __init__(self, params=None):
          """Create a FileDownloader object with the given options."""
+        if params is None:
+            params = {}
          self._ies = []
          self._ies_instances = {}
          self._pps = []
@@ -172,7 +175,7 @@ class YoutubeDL(object):
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
          self._err_file = sys.stderr
-        self.params = {} if params is None else params
+        self.params = params
  
          if params.get('bidi_workaround', False):
              try:
@@ -1014,7 +1017,7 @@ class YoutubeDL(object):
      def list_formats(self, info_dict):
          def format_note(fdict):
              res = u''
-            if f.get('ext') in ['f4f', 'f4m']:
+            if fdict.get('ext') in ['f4f', 'f4m']:
                  res += u'(unsupported) '
              if fdict.get('format_note') is not None:
                  res += fdict['format_note'] + u' '
@@ -1124,10 +1127,13 @@ class YoutubeDL(object):
              if 'http' in proxies and 'https' not in proxies:
                  proxies['https'] = proxies['http']
          proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+
+        debuglevel = 1 if self.params.get('debug_printtraffic') else 0
          https_handler = make_HTTPS_handler(
-            self.params.get('nocheckcertificate', False))
+            self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
+        ydlh = YoutubeDLHandler(debuglevel=debuglevel)
          opener = compat_urllib_request.build_opener(
-            https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+            https_handler, proxy_handler, cookie_processor, ydlh)
          # Delete the default user-agent header, which would otherwise apply in
          # cases where our custom HTTP handler doesn't come into play
          # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index c37d28c5997ed8d348afdcefe61acef1b0bd511e..b29cf6758d0354e6ff34b8823ea1606544a7f5a1 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -44,6 +44,7 @@ __license__ = 'Public Domain'
  
  import codecs
  import getpass
+import locale
  import optparse
  import os
  import random
@@ -185,7 +186,7 @@ def parseOpts(overrideArguments=None):
      general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
      general.add_option(
          '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
-        help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
+        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
      general.add_option(
          '--no-cache-dir', action='store_const', const=None, dest='cachedir',
          help='Disable filesystem caching')
@@ -333,7 +334,9 @@ def parseOpts(overrideArguments=None):
      verbosity.add_option('--youtube-print-sig-code',
              action='store_true', dest='youtube_print_sig_code', default=False,
              help=optparse.SUPPRESS_HELP)
-
+    verbosity.add_option('--print-traffic',
+            dest='debug_printtraffic', action='store_true', default=False,
+            help=optparse.SUPPRESS_HELP)
  
      filesystem.add_option('-t', '--title',
              action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
@@ -473,6 +476,8 @@ def parseOpts(overrideArguments=None):
              write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
              write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
              write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+            write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
+                         (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))
  
      return parser, opts, args
  
@@ -693,6 +698,7 @@ def _real_main(argv=None):
          'proxy': opts.proxy,
          'socket_timeout': opts.socket_timeout,
          'bidi_workaround': opts.bidi_workaround,
+        'debug_printtraffic': opts.debug_printtraffic,
      }
  
      with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py

index 14b88efd3d951345effe95959f72a82575e094d5..8407727ba43df75a31a53d073cbd430877157b29 100644 (file)
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -133,7 +133,7 @@ class HttpFD(FileDownloader):
                      return False
              try:
                  stream.write(data_block)
-            except (IOError, OSError):
+            except (IOError, OSError) as err:
                  self.to_stderr(u"\n")
                  self.report_error(u'unable to write data: %s' % str(err))
                  return False
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 2caa078b58ae81782fb6ccc3555187925ba3dbf3..21d564dba915a56d81f9c0f16ae41a65034c60ff 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -28,6 +28,7 @@ from .channel9 import Channel9IE
  from .cinemassacre import CinemassacreIE
  from .clipfish import ClipfishIE
  from .clipsyndicate import ClipsyndicateIE
+from .cmt import CMTIE
  from .cnn import CNNIE
  from .collegehumor import CollegeHumorIE
  from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
@@ -79,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE
  from .howcast import HowcastIE
  from .hypem import HypemIE
  from .ign import IGNIE, OneUPIE
-from .imdb import ImdbIE
+from .imdb import (
+    ImdbIE,
+    ImdbListIE
+)
  from .ina import InaIE
  from .infoq import InfoQIE
  from .instagram import InstagramIE
@@ -91,6 +95,7 @@ from .ivi import (
  from .jeuxvideo import JeuxVideoIE
  from .jukebox import JukeboxIE
  from .justintv import JustinTVIE
+from .jpopsukitv import JpopsukiIE
  from .kankan import KankanIE
  from .keezmovies import KeezMoviesIE
  from .kickstarter import KickStarterIE
@@ -101,6 +106,7 @@ from .lynda import (
      LyndaIE,
      LyndaCourseIE
  )
+from .macgamestore import MacGameStoreIE
  from .mdr import MDRIE
  from .metacafe import MetacafeIE
  from .metacritic import MetacriticIE
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py

index 3a32c14c598dd2da14841fe68c1cb59582f30799..15aee278626dae48ea589a6cbad0cbc8644d09bf 100644 (file)
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -10,14 +10,14 @@ from ..utils import (
  
  
  class BandcampIE(InfoExtractor):
-    IE_NAME = u'Bandcamp'
      _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
      _TESTS = [{
          u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
          u'file': u'1812978515.mp3',
-        u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
+        u'md5': u'c557841d5e50261777a6585648adf439',
          u'info_dict': {
-            u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
+            u"title": u"youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
+            u"duration": 10,
          },
          u'skip': u'There is a limit of 200 free downloads / month for the test song'
      }]
@@ -30,29 +30,42 @@ class BandcampIE(InfoExtractor):
          m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
          if m_download is None:
              m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
-        if m_trackinfo:
-            json_code = m_trackinfo.group(1)
-            data = json.loads(json_code)
+            if m_trackinfo:
+                json_code = m_trackinfo.group(1)
+                data = json.loads(json_code)
+                d = data[0]
+
+                duration = int(round(d['duration']))
+                formats = []
+                for format_id, format_url in d['file'].items():
+                    ext, _, abr_str = format_id.partition('-')
+
+                    formats.append({
+                        'format_id': format_id,
+                        'url': format_url,
+                        'ext': format_id.partition('-')[0],
+                        'vcodec': 'none',
+                        'acodec': format_id.partition('-')[0],
+                        'abr': int(format_id.partition('-')[2]),
+                    })
+
+                self._sort_formats(formats)
  
-            for d in data:
-                formats = [{
-                    'format_id': 'format_id',
-                    'url': format_url,
-                    'ext': format_id.partition('-')[0]
-                } for format_id, format_url in sorted(d['file'].items())]
                  return {
                      'id': compat_str(d['id']),
                      'title': d['title'],
                      'formats': formats,
+                    'duration': duration,
                  }
-        else:
-            raise ExtractorError(u'No free songs found')
+            else:
+                raise ExtractorError(u'No free songs found')
  
          download_link = m_download.group(1)
-        id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', 
-                       webpage, re.MULTILINE|re.DOTALL).group('id')
+        video_id = re.search(
+            r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
+            webpage, re.MULTILINE | re.DOTALL).group('id')
  
-        download_webpage = self._download_webpage(download_link, id,
+        download_webpage = self._download_webpage(download_link, video_id,
                                                    'Downloading free downloads page')
          # We get the dictionary of the track from some javascrip code
          info = re.search(r'items: (.*?),$',
@@ -66,21 +79,21 @@ class BandcampIE(InfoExtractor):
          m_url = re.match(re_url, initial_url)
          #We build the url we will use to get the final track url
          # This url is build in Bandcamp in the script download_bunde_*.js
-        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
+        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
          final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
          # If we could correctly generate the .rand field the url would be
          #in the "download_url" key
          final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
  
-        track_info = {'id':id,
-                      'title' : info[u'title'],
-                      'ext' :   'mp3',
-                      'url' :   final_url,
-                      'thumbnail' : info[u'thumb_url'],
-                      'uploader' :  info[u'artist']
-                      }
-
-        return [track_info]
+        return {
+            'id': video_id,
+            'title': info[u'title'],
+            'ext': 'mp3',
+            'vcodec': 'none',
+            'url': final_url,
+            'thumbnail': info[u'thumb_url'],
+            'uploader': info[u'artist'],
+        }
  
  
  class BandcampAlbumIE(InfoExtractor):
@@ -117,7 +130,7 @@ class BandcampAlbumIE(InfoExtractor):
          webpage = self._download_webpage(url, title)
          tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
          if not tracks_paths:
-            raise ExtractorError(u'The page doesn\'t contain any track')
+            raise ExtractorError(u'The page doesn\'t contain any tracks')
          entries = [
              self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
              for t_path in tracks_paths]
diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py

index 144ce64ccacd35abbbf51392e192dff8a1ca1649..0229840a3017e2e19fea6a895351fb8299beefd8 100644 (file)
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -61,9 +61,10 @@ class BlinkxIE(InfoExtractor):
              elif m['type'] in ('flv', 'mp4'):
                  vcodec = remove_start(m['vcodec'], 'ff')
                  acodec = remove_start(m['acodec'], 'ff')
+                tbr = (int(m['vbr']) + int(m['abr'])) // 1000
                  format_id = (u'%s-%sk-%s' %
                               (vcodec,
-                              (int(m['vbr']) + int(m['abr'])) // 1000,
+                              tbr,
                                m['w']))
                  formats.append({
                      'format_id': format_id,
@@ -72,10 +73,12 @@ class BlinkxIE(InfoExtractor):
                      'acodec': acodec,
                      'abr': int(m['abr']) // 1000,
                      'vbr': int(m['vbr']) // 1000,
+                    'tbr': tbr,
                      'width': int(m['w']),
                      'height': int(m['h']),
                  })
-        formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr']))
+
+        self._sort_formats(formats)
  
          return {
              'id': display_id,
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py

index ae70ea22967a8d880ba15fa1bb64f32904139094..574881b70de67b9521b5e813f0cafa6da59d1068 100644 (file)
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -76,14 +76,18 @@ class Channel9IE(InfoExtractor):
              </div>)?                                                # File size part may be missing
          '''
          # Extract known formats
-        formats = [{'url': x.group('url'),
-                 'format_id': x.group('quality'),
-                 'format_note': x.group('note'),
-                 'format': '%s (%s)' % (x.group('quality'), x.group('note')), 
-                 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
-                 } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
-        # Sort according to known formats list
-        formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
+        formats = [{
+            'url': x.group('url'),
+            'format_id': x.group('quality'),
+            'format_note': x.group('note'),
+            'format': u'%s (%s)' % (x.group('quality'), x.group('note')),
+            'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
+            'preference': self._known_formats.index(x.group('quality')),
+            'vcodec': 'none' if x.group('note') == 'Audio only' else None,
+        } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
+
+        self._sort_formats(formats)
+
          return formats
  
      def _extract_title(self, html):
diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py

new file mode 100644 (file)

index 0000000..88e0e9a
--- /dev/null
+++ b/youtube_dl/extractor/cmt.py
@@ -0,0 +1,19 @@
+from .mtv import MTVIE
+
+class CMTIE(MTVIE):
+    IE_NAME = u'cmt.com'
+    _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
+    _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
+
+    _TESTS = [
+        {
+            u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
+            u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2',
+            u'info_dict': {
+                u'id': u'989124',
+                u'ext': u'mp4',
+                u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
+                u'description': u'Blame It All On My Roots',
+            },
+        },
+    ]
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py

index a034bb2fb6288fc62d964021405aa94eff532ff6..ecac5e0e9f952539545ba1ea4a4cad7a67a3a43a 100644 (file)
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -1,7 +1,10 @@
  import re
  
  from .common import InfoExtractor
-from ..utils import determine_ext
+from ..utils import (
+    int_or_none,
+    parse_duration,
+)
  
  
  class CNNIE(InfoExtractor):
@@ -15,6 +18,8 @@ class CNNIE(InfoExtractor):
          u'info_dict': {
              u'title': u'Nadal wins 8th French Open title',
              u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
+            u'duration': 135,
+            u'upload_date': u'20130609',
          },
      },
      {
@@ -35,22 +40,58 @@ class CNNIE(InfoExtractor):
          info = self._download_xml(info_url, page_title)
  
          formats = []
+        rex = re.compile(r'''(?x)
+            (?P<width>[0-9]+)x(?P<height>[0-9]+)
+            (?:_(?P<bitrate>[0-9]+)k)?
+        ''')
          for f in info.findall('files/file'):
-            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
-            if mf is not None:
-                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
-        formats = sorted(formats)
-        (_,_,_, video_path) = formats[-1]
-        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
+            video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip())
+            fdct = {
+                'format_id': f.attrib['bitrate'],
+                'url': video_url,
+            }
+
+            mf = rex.match(f.attrib['bitrate'])
+            if mf:
+                fdct['width'] = int(mf.group('width'))
+                fdct['height'] = int(mf.group('height'))
+                fdct['tbr'] = int_or_none(mf.group('bitrate'))
+            else:
+                mf = rex.search(f.text)
+                if mf:
+                    fdct['width'] = int(mf.group('width'))
+                    fdct['height'] = int(mf.group('height'))
+                    fdct['tbr'] = int_or_none(mf.group('bitrate'))
+                else:
+                    mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
+                    if mi:
+                        if mi.group(1) == 'audio':
+                            fdct['vcodec'] = 'none'
+                            fdct['ext'] = 'm4a'
+                        else:
+                            fdct['tbr'] = int(mi.group(1))
+
+            formats.append(fdct)
+
+        self._sort_formats(formats)
  
          thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
          thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
  
-        return {'id': info.attrib['id'],
-                'title': info.find('headline').text,
-                'url': video_url,
-                'ext': determine_ext(video_url),
-                'thumbnail': thumbnails[-1][1],
-                'thumbnails': thumbs_dict,
-                'description': info.find('description').text,
-                }
+        metas_el = info.find('metas')
+        upload_date = (
+            metas_el.attrib.get('version') if metas_el is not None else None)
+
+        duration_el = info.find('length')
+        duration = parse_duration(duration_el.text)
+
+        return {
+            'id': info.attrib['id'],
+            'title': info.find('headline').text,
+            'formats': formats,
+            'thumbnail': thumbnails[-1][1],
+            'thumbnails': thumbs_dict,
+            'description': info.find('description').text,
+            'duration': duration,
+            'upload_date': upload_date,
+        }
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index a54ce3ee7c44727a9e56b1ab8359bd099b48bb35..27bd8256e6bf6dfb8dc7ae7997a29c2162d863d2 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -12,7 +12,9 @@ from ..utils import (
  
  
  class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
+    _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
+        (video-clips|episodes|cc-studios|video-collections)
+        /(?P<title>.*)'''
      _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
  
      _TEST = {
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 6fa60622e096e518a55c4cfddd71ef5a4d9a862a..f498bcf6f5af1feb05eb8df0184f77e6336ac878 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -51,7 +51,8 @@ class InfoExtractor(object):
                                   Calculated from the format_id, width, height.
                                   and format_note fields if missing.
                      * format_id  A short description of the format
-                                 ("mp4_h264_opus" or "19")
+                                 ("mp4_h264_opus" or "19").
+                                Technically optional, but strongly recommended.
                      * format_note Additional info about the format
                                   ("3D" or "DASH video")
                      * width      Width of the video, if known
@@ -68,7 +69,8 @@ class InfoExtractor(object):
                                   download, lower-case.
                                   "http", "https", "rtsp", "rtmp" or so.
                      * preference Order number of this format. If this field is
-                                 present, the formats get sorted by this field.
+                                 present and not None, the formats get sorted
+                                 by this field.
                                   -1 for default (order by other properties),
                                   -2 or smaller for less than default.
      url:            Final video URL.
@@ -376,7 +378,7 @@ class InfoExtractor(object):
      @staticmethod
      def _og_regexes(prop):
          content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
-        property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
+        property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
          template = r'<meta[^>]+?%s[^>]+?%s'
          return [
              template % (property_re, content_re),
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py

index 416e25156e321333d1896983ced60a1e85c16ada..0b11d1f10e18e4358b35f76d0a0e0816b00eaa4c 100644 (file)
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -10,11 +10,11 @@ from ..utils import (
  
  class DreiSatIE(InfoExtractor):
      IE_NAME = '3sat'
-    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
      _TEST = {
          u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
-        u'file': u'36983.webm',
-        u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
+        u'file': u'36983.mp4',
+        u'md5': u'9dcfe344732808dbfcc901537973c922',
          u'info_dict': {
              u"title": u"Kaffeeland Schweiz",
              u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", 
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 7a14c98f9b6ef9d550606c72c330d0730ec1233e..377ae91c4383c78c0749937650097d231a7f2ca8 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -162,6 +162,8 @@ class GenericIE(InfoExtractor):
              return self.url_result('http://' + url)
          video_id = os.path.splitext(url.split('/')[-1])[0]
  
+        self.to_screen(u'%s: Requesting header' % video_id)
+
          try:
              response = self._send_head(url)
  
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py

index e5332cce820ca239c915da402107a77143f0484b..16926b4d391bdc11801510797c26481610b928e3 100644 (file)
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):
              'description': descr,
              'thumbnail': format_info['slate'],
          }
+
+class ImdbListIE(InfoExtractor):
+    IE_NAME = u'imdb:list'
+    IE_DESC = u'Internet Movie Database lists'
+    _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        list_id = mobj.group('id')
+        
+        # RSS XML is sometimes malformed
+        rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS')
+        list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title')
+        
+        # Export is independent of actual author_id, but returns 404 if no author_id is provided.
+        # However, passing dummy author_id seems to be enough.
+        csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
+                                     list_id, u'Downloading list CSV')
+        
+        entries = []
+        for item in csv.split('\n')[1:]:
+            cols = item.split(',')
+            if len(cols) < 2:
+                continue
+            item_id = cols[1][1:-1]
+            if item_id.startswith('vi'):
+                entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
+        
+        return self.playlist_result(entries, list_id, list_title)
+\ No newline at end of file
diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py

index 16a6f73c87e27d47d401c444aa02b80d5a6313b3..4ddda2f1bb86dd534f623218b2acd74566d781e1 100644 (file)
--- a/youtube_dl/extractor/internetvideoarchive.py
+++ b/youtube_dl/extractor/internetvideoarchive.py
@@ -5,7 +5,6 @@ from ..utils import (
      compat_urlparse,
      compat_urllib_parse,
      xpath_with_ns,
-    determine_ext,
  )
  
  
@@ -63,13 +62,17 @@ class InternetVideoArchiveIE(InfoExtractor):
          for content in item.findall(_bp('media:group/media:content')):
              attr = content.attrib
              f_url = attr['url']
+            width = int(attr['width'])
+            bitrate = int(attr['bitrate'])
+            format_id = '%d-%dk' % (width, bitrate)
              formats.append({
+                'format_id': format_id,
                  'url': f_url,
-                'ext': determine_ext(f_url),
-                'width': int(attr['width']),
-                'bitrate': int(attr['bitrate']),
+                'width': width,
+                'tbr': bitrate,
              })
-        formats = sorted(formats, key=lambda f: f['bitrate'])
+
+        self._sort_formats(formats)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py

index 4bdf55f934aa63b005ff06e9b088b956f6806a02..98d1d272a6bb5085dadae9c8233a0e2592571636 100644 (file)
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -84,14 +84,16 @@ class IviIE(InfoExtractor):
  
          result = video_json[u'result']
  
-        formats = [{'url': x[u'url'],
-                    'format_id': x[u'content_format']
-                    } for x in result[u'files'] if x[u'content_format'] in self._known_formats]
-        formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
-
-        if len(formats) == 0:
-            self._downloader.report_warning(u'No media links available for %s' % video_id)
-            return
+        formats = [{
+            'url': x[u'url'],
+            'format_id': x[u'content_format'],
+            'preference': self._known_formats.index(x[u'content_format']),
+        } for x in result[u'files'] if x[u'content_format'] in self._known_formats]
+
+        self._sort_formats(formats)
+
+        if not formats:
+            raise ExtractorError(u'No media links available for %s' % video_id)
  
          duration = result[u'duration']
          compilation = result[u'compilation']
diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py

new file mode 100644 (file)

index 0000000..aad7825
--- /dev/null
+++ b/youtube_dl/extractor/jpopsukitv.py
@@ -0,0 +1,73 @@
+# coding=utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unified_strdate,
+)
+
+
+class JpopsukiIE(InfoExtractor):
+    IE_NAME = 'jpopsuki.tv'
+    _VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/video/(.*?)/(?P<id>\S+)'
+
+    _TEST = {
+        'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
+        'md5': '88018c0c1a9b1387940e90ec9e7e198e',
+        'file': '00be659d23b0b40508169cdee4545771.mp4',
+        'info_dict': {
+            'id': '00be659d23b0b40508169cdee4545771',
+            'title': 'ayumi hamasaki - evolution',
+            'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
+            'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
+            'uploader': 'plama_chan',
+            'uploader_id': '404',
+            'upload_date': '20121101'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = 'http://www.jpopsuki.tv' + self._html_search_regex(
+            r'<source src="(.*?)" type', webpage, 'video url')
+
+        video_title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        uploader = self._html_search_regex(
+            r'<li>from: <a href="/user/view/user/(.*?)/uid/',
+            webpage, 'video uploader', fatal=False)
+        uploader_id = self._html_search_regex(
+            r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
+            webpage, 'video uploader_id', fatal=False)
+        upload_date = self._html_search_regex(
+            r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
+            fatal=False)
+        if upload_date is not None:
+            upload_date = unified_strdate(upload_date)
+        view_count_str = self._html_search_regex(
+            r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
+            fatal=False)
+        comment_count_str = self._html_search_regex(
+            r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count',
+            fatal=False)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'upload_date': upload_date,
+            'view_count': int_or_none(view_count_str),
+            'comment_count': int_or_none(comment_count_str),
+        }
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py

index dd59aa3e6cb3be989a39aa56a95791e4165a8834..592ed747a47492e13bb939659bc516502b3b4aa0 100644 (file)
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import re
  import json
  
@@ -6,17 +8,17 @@ from ..utils import ExtractorError
  
  
  class LyndaIE(InfoExtractor):
-    IE_NAME = u'lynda'
-    IE_DESC = u'lynda.com videos'
+    IE_NAME = 'lynda'
+    IE_DESC = 'lynda.com videos'
      _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
  
      _TEST = {
-        u'url': u'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
-        u'file': u'114408.mp4',
-        u'md5': u'ecfc6862da89489161fb9cd5f5a6fac1',
+        'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
+        'file': '114408.mp4',
+        'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
          u"info_dict": {
-            u'title': u'Using the exercise files',
-            u'duration': 68
+            'title': 'Using the exercise files',
+            'duration': 68
          }
      }
  
@@ -25,26 +27,26 @@ class LyndaIE(InfoExtractor):
          video_id = mobj.group(1)
  
          page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
-                                      video_id, u'Downloading video JSON')
+                                      video_id, 'Downloading video JSON')
          video_json = json.loads(page)
  
-        if u'Status' in video_json and video_json[u'Status'] == u'NotFound':
-            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+        if 'Status' in video_json and video_json['Status'] == 'NotFound':
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  
-        if video_json[u'HasAccess'] is False:
-            raise ExtractorError(u'Video %s is only available for members' % video_id, expected=True)
+        if video_json['HasAccess'] is False:
+            raise ExtractorError('Video %s is only available for members' % video_id, expected=True)
  
-        video_id = video_json[u'ID']
-        duration = video_json[u'DurationInSeconds']
-        title = video_json[u'Title']
+        video_id = video_json['ID']
+        duration = video_json['DurationInSeconds']
+        title = video_json['Title']
  
-        formats = [{'url': fmt[u'Url'],
-                    'ext': fmt[u'Extension'],
-                    'width': fmt[u'Width'],
-                    'height': fmt[u'Height'],
-                    'filesize': fmt[u'FileSize'],
-                    'format_id': fmt[u'Resolution']
-                    } for fmt in video_json[u'Formats']]
+        formats = [{'url': fmt['Url'],
+                    'ext': fmt['Extension'],
+                    'width': fmt['Width'],
+                    'height': fmt['Height'],
+                    'filesize': fmt['FileSize'],
+                    'format_id': fmt['Resolution']
+                    } for fmt in video_json['Formats']]
  
          self._sort_formats(formats)
  
@@ -57,8 +59,8 @@ class LyndaIE(InfoExtractor):
  
  
  class LyndaCourseIE(InfoExtractor):
-    IE_NAME = u'lynda:course'
-    IE_DESC = u'lynda.com online courses'
+    IE_NAME = 'lynda:course'
+    IE_DESC = 'lynda.com online courses'
  
      # Course link equals to welcome/introduction video link of same course
      # We will recognize it as course link
@@ -70,27 +72,31 @@ class LyndaCourseIE(InfoExtractor):
          course_id = mobj.group('courseid')
  
          page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
-                                      course_id, u'Downloading course JSON')
+                                      course_id, 'Downloading course JSON')
          course_json = json.loads(page)
  
-        if u'Status' in course_json and course_json[u'Status'] == u'NotFound':
-            raise ExtractorError(u'Course %s does not exist' % course_id, expected=True)
+        if 'Status' in course_json and course_json['Status'] == 'NotFound':
+            raise ExtractorError('Course %s does not exist' % course_id, expected=True)
  
          unaccessible_videos = 0
          videos = []
  
-        for chapter in course_json[u'Chapters']:
-            for video in chapter[u'Videos']:
-                if video[u'HasAccess'] is not True:
+        for chapter in course_json['Chapters']:
+            for video in chapter['Videos']:
+                if video['HasAccess'] is not True:
                      unaccessible_videos += 1
                      continue
-                videos.append(video[u'ID'])
+                videos.append(video['ID'])
  
          if unaccessible_videos > 0:
-            self._downloader.report_warning(u'%s videos are only available for members and will not be downloaded' % unaccessible_videos)
+            self._downloader.report_warning('%s videos are only available for members and will not be downloaded' % unaccessible_videos)
  
-        entries = [self.url_result('http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), 'Lynda') for video_id in videos]
+        entries = [
+            self.url_result('http://www.lynda.com/%s/%s-4.html' %
+                            (course_path, video_id),
+                            'Lynda')
+            for video_id in videos]
  
-        course_title = course_json[u'Title']
+        course_title = course_json['Title']
  
-        return self.playlist_result(entries, course_id, course_title)
-\ No newline at end of file
+        return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py

new file mode 100644 (file)

index 0000000..b818cf5
--- /dev/null
+++ b/youtube_dl/extractor/macgamestore.py
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class MacGameStoreIE(InfoExtractor):
+    IE_NAME = 'macgamestore'
+    IE_DESC = 'MacGameStore trailers'
+    _VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
+        'file': '2450.m4v',
+        'md5': '8649b8ea684b6666b4c5be736ecddc61',
+        'info_dict': {
+            'title': 'Crow',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id, 'Downloading trailer page')
+
+        if re.search(r'>Missing Media<', webpage) is not None:
+            raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
+
+        video_title = self._html_search_regex(
+            r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
+
+        video_url = self._html_search_regex(
+            r'(?s)<div\s+id="video-player".*?href="([^"]+)"\s*>',
+            webpage, 'video URL')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title
+        }
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py

index 125d81551c26ea67eff82f2d2189bd058d16b873..7c54ea0f4c7f351161adae175edca2743ab55266 100644 (file)
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor):
          info = json.loads(json_data)
  
          preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
-        song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
+        song_url = preview_url.replace('/previews/', '/c/originals/')
          template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
          final_song_url = self._get_url(template_url)
          if final_song_url is None:
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index ed11f521aa02aa3fe421b8fc743b0a26b1e1cdd0..f1cf41e2dbf2012764fdb0f2e1745c07ecdef055 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor):
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('videoid')
-        uri = mobj.group('mgid')
+        uri = mobj.groupdict().get('mgid')
          if uri is None:
              webpage = self._download_webpage(url, video_id)
      
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index e22ff9c387ab0e01c1e6fcb1da793af877f37a5c..951e977bd0ba014340fe3eeb626723bde258e0dd 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
                              (?!sets/)(?P<title>[\w\d-]+)/?
                              (?P<token>[^?]+?)?(?:[?].*)?$)
                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
-                       |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
+                       |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
                      )
                      '''
      IE_NAME = u'soundcloud'
@@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor):
          if track_id is not None:
              info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
              full_title = track_id
-        elif mobj.group('widget'):
+        elif mobj.group('player'):
              query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
              return self.url_result(query['url'][0], ie='Soundcloud')
          else:
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index cec65261bfffd2a25702634047a99526fa3a7d10..23172143ec41ecc48e88c333dfdd476db330e1b8 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -55,15 +55,21 @@ class ThePlatformIE(InfoExtractor):
          formats = []
          for f in switch.findall(_x('smil:video')):
              attr = f.attrib
+            width = int(attr['width'])
+            height = int(attr['height'])
+            vbr = int(attr['system-bitrate']) // 1000
+            format_id = '%dx%d_%dk' % (width, height, vbr)
              formats.append({
+                'format_id': format_id,
                  'url': base_url,
                  'play_path': 'mp4:' + attr['src'],
                  'ext': 'flv',
-                'width': int(attr['width']),
-                'height': int(attr['height']),
-                'vbr': int(attr['system-bitrate']),
+                'width': width,
+                'height': height,
+                'vbr': vbr,
              })
-        formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
+
+        self._sort_formats(formats)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py

index 584550455ad8dbf611424ad6606411850dd72d3e..bc31c2e64f22999adf575e60d59bde3d903bb9cc 100644 (file)
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@@ -44,6 +44,7 @@ class WistiaIE(InfoExtractor):
                  'height': a['height'],
                  'filesize': a['size'],
                  'ext': a['ext'],
+                'preference': 1 if atype == 'original' else None,
              })
  
          self._sort_formats(formats)
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py

index bd0f2cae0298dec0d78f812153976ec6a8434bb0..77ad423c44b38af655fc14a8918dfbcf677ca936 100644 (file)
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -1,5 +1,4 @@
  import json
-import os
  import re
  import sys
  
@@ -16,6 +15,7 @@ from ..aes import (
      aes_decrypt_text
  )
  
+
  class YouPornIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
      _TEST = {
@@ -23,9 +23,9 @@ class YouPornIE(InfoExtractor):
          u'file': u'505835.mp4',
          u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
          u'info_dict': {
-            u"upload_date": u"20101221", 
-            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
-            u"uploader": u"Ask Dan And Jennifer", 
+            u"upload_date": u"20101221",
+            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
+            u"uploader": u"Ask Dan And Jennifer",
              u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
              u"age_limit": 18,
          }
@@ -71,38 +71,36 @@ class YouPornIE(InfoExtractor):
              link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
              links.append(link)
          
-        if not links:
-            raise ExtractorError(u'ERROR: no known formats available for video')
-
          formats = []
          for link in links:
-
              # A link looks like this:
              # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
              # A path looks like this:
              # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
              video_url = unescapeHTML(link)
              path = compat_urllib_parse_urlparse(video_url).path
-            extension = os.path.splitext(path)[1][1:]
-            format = path.split('/')[4].split('_')[:2]
+            format_parts = path.split('/')[4].split('_')[:2]
  
-            # size = format[0]
-            # bitrate = format[1]
-            format = "-".join(format)
-            # title = u'%s-%s-%s' % (video_title, size, bitrate)
+            dn = compat_urllib_parse_urlparse(video_url).netloc.partition('.')[0]
+
+            resolution = format_parts[0]
+            height = int(resolution[:-len('p')])
+            bitrate = int(format_parts[1][:-len('k')])
+            format = u'-'.join(format_parts) + u'-' + dn
  
              formats.append({
                  'url': video_url,
-                'ext': extension,
                  'format': format,
                  'format_id': format,
+                'height': height,
+                'tbr': bitrate,
+                'resolution': resolution,
              })
  
-        # Sort and remove doubles
-        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
-        for i in range(len(formats)-1,0,-1):
-            if formats[i]['format_id'] == formats[i-1]['format_id']:
-                del formats[i]
+        self._sort_formats(formats)
+
+        if not formats:
+            raise ExtractorError(u'ERROR: no known formats available for video')
          
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index b0e29c2a8a5d8c7f6c4c0109ca09afa204d3b30b..9424d5e2669a72e791a0ba0a0120de0bfec27fc8 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -194,6 +194,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
          '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
          '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
+        '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
  
          # Dash mp4 audio
          '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 4c8bdbb0cc0bf2af14c2ec006307b8b2a2c7d65d..fc10fba6327ae17223431da24aae80e341712df3 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -539,7 +539,8 @@ def formatSeconds(secs):
      else:
          return '%d' % secs
  
-def make_HTTPS_handler(opts_no_check_certificate):
+
+def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
      if sys.version_info < (3, 2):
          import httplib
  
@@ -560,7 +561,7 @@ def make_HTTPS_handler(opts_no_check_certificate):
          class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
              def https_open(self, req):
                  return self.do_open(HTTPSConnectionV3, req)
-        return HTTPSHandlerV3()
+        return HTTPSHandlerV3(**kwargs)
      else:
          context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
          context.verify_mode = (ssl.CERT_NONE
@@ -571,7 +572,7 @@ def make_HTTPS_handler(opts_no_check_certificate):
              context.load_default_certs()
          except AttributeError:
              pass  # Python < 3.4
-        return compat_urllib_request.HTTPSHandler(context=context)
+        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
  
  class ExtractorError(Exception):
      """Error during info extraction."""
@@ -1102,3 +1103,24 @@ class HEADRequest(compat_urllib_request.Request):
  
  def int_or_none(v):
      return v if v is None else int(v)
+
+
+def parse_duration(s):
+    if s is None:
+        return None
+
+    m = re.match(
+        r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s)
+    if not m:
+        return None
+    res = int(m.group('secs'))
+    if m.group('mins'):
+        res += int(m.group('mins')) * 60
+        if m.group('hours'):
+            res += int(m.group('hours')) * 60 * 60
+    return res
+
+
+def prepend_extension(filename, ext):
+    name, real_ext = os.path.splitext(filename) 
+    return u'{0}.{1}{2}'.format(name, ext, real_ext)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index b3d015634850187f6c05bc73c0f12422dbbb5169..bf5fc8212660cdbe1004836bfb7f6da301b1df21 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2013.12.23.4'
+__version__ = '2014.01.03'
author	Philipp Hagemeister <phihag@phihag.de>
	Fri, 3 Jan 2014 12:24:29 +0000 (13:24 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Fri, 3 Jan 2014 12:24:29 +0000 (13:24 +0100)
README.md		patch \| blob \| history
test/test_playlists.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
youtube_dl/PostProcessor.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/downloader/http.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/bandcamp.py		patch \| blob \| history
youtube_dl/extractor/blinkx.py		patch \| blob \| history
youtube_dl/extractor/channel9.py		patch \| blob \| history
youtube_dl/extractor/cmt.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/cnn.py		patch \| blob \| history
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/dreisat.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/imdb.py		patch \| blob \| history
youtube_dl/extractor/internetvideoarchive.py		patch \| blob \| history
youtube_dl/extractor/ivi.py		patch \| blob \| history
youtube_dl/extractor/jpopsukitv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/lynda.py		patch \| blob \| history
youtube_dl/extractor/macgamestore.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/mixcloud.py		patch \| blob \| history
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/theplatform.py		patch \| blob \| history
youtube_dl/extractor/wistia.py		patch \| blob \| history
youtube_dl/extractor/youporn.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history