Merge pull request #2939 from codesparkle/upload-date-fix

author Sergey M. <dstftw@gmail.com>

Tue, 20 May 2014 12:53:28 +0000 (19:53 +0700)

committer Sergey M. <dstftw@gmail.com>

Tue, 20 May 2014 12:53:28 +0000 (19:53 +0700)
author Sergey M. <dstftw@gmail.com>
Tue, 20 May 2014 12:53:28 +0000 (19:53 +0700)
committer Sergey M. <dstftw@gmail.com>
Tue, 20 May 2014 12:53:28 +0000 (19:53 +0700)
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py

index 929aafdff3e848af3295eacf1520ec0ec0334966..dcbbdef4346c36c789e49531df1dc602bc35255b 100644 (file)
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -19,7 +19,7 @@ class BandcampIE(InfoExtractor):
          'md5': 'c557841d5e50261777a6585648adf439',
          'info_dict': {
              "title": "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
-            "duration": 10,
+            "duration": 9.8485,
          },
          '_skip': 'There is a limit of 200 free downloads / month for the test song'
      }]
@@ -28,36 +28,32 @@ class BandcampIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          title = mobj.group('title')
          webpage = self._download_webpage(url, title)
-        # We get the link to the free download page
          m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
-        if m_download is None:
+        if not m_download:
              m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
              if m_trackinfo:
                  json_code = m_trackinfo.group(1)
-                data = json.loads(json_code)
-                d = data[0]
+                data = json.loads(json_code)[0]
  
-                duration = int(round(d['duration']))
                  formats = []
-                for format_id, format_url in d['file'].items():
-                    ext, _, abr_str = format_id.partition('-')
-
+                for format_id, format_url in data['file'].items():
+                    ext, abr_str = format_id.split('-', 1)
                      formats.append({
                          'format_id': format_id,
                          'url': format_url,
-                        'ext': format_id.partition('-')[0],
+                        'ext': ext,
                          'vcodec': 'none',
-                        'acodec': format_id.partition('-')[0],
-                        'abr': int(format_id.partition('-')[2]),
+                        'acodec': ext,
+                        'abr': int(abr_str),
                      })
  
                  self._sort_formats(formats)
  
                  return {
-                    'id': compat_str(d['id']),
-                    'title': d['title'],
+                    'id': compat_str(data['id']),
+                    'title': data['title'],
                      'formats': formats,
-                    'duration': duration,
+                    'duration': float(data['duration']),
                  }
              else:
                  raise ExtractorError('No free songs found')
@@ -67,11 +63,9 @@ class BandcampIE(InfoExtractor):
              r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
              webpage, re.MULTILINE | re.DOTALL).group('id')
  
-        download_webpage = self._download_webpage(download_link, video_id,
-                                                  'Downloading free downloads page')
-        # We get the dictionary of the track from some javascrip code
-        info = re.search(r'items: (.*?),$',
-                         download_webpage, re.MULTILINE).group(1)
+        download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
+        # We get the dictionary of the track from some javascript code
+        info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
          info = json.loads(info)[0]
          # We pick mp3-320 for now, until format selection can be easily implemented.
          mp3_info = info['downloads']['mp3-320']
@@ -100,7 +94,7 @@ class BandcampIE(InfoExtractor):
  
  class BandcampAlbumIE(InfoExtractor):
      IE_NAME = 'Bandcamp:album'
-    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))?'
+    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
  
      _TEST = {
          'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -123,7 +117,7 @@ class BandcampAlbumIE(InfoExtractor):
          'params': {
              'playlistend': 2
          },
-        'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
+        'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
      }
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 2861332826b5f20cdf0493edfdba8a7a12f357a8..38a357d3b0406906144e25cbbc45fbe74d2f6c2c 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -363,8 +363,13 @@ class GenericIE(InfoExtractor):
                      return self.url_result('http://' + url)
                  else:
                      if default_search == 'auto_warning':
-                        self._downloader.report_warning(
-                            'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
+                        if re.match(r'^(?:url|URL)$', url):
+                            raise ExtractorError(
+                                'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
+                                expected=True)
+                        else:
+                            self._downloader.report_warning(
+                                'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
                      return self.url_result('ytsearch:' + url)
              else:
                  assert ':' in default_search
@@ -560,7 +565,7 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded NovaMov-based player
          mobj = re.search(
-            r'''(?x)<iframe[^>]+?src=(["\'])
+            r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
                      (?P<url>http://(?:(?:embed|www)\.)?
                          (?:novamov\.com|
                             nowvideo\.(?:ch|sx|eu|at|ag|co)|
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py

index 0650f956481c9011032a278fc1a9375b98e26539..53b34f5e646b233dd72a0657be5d285e1a534ddc 100644 (file)
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
  
  
  class NDRIE(InfoExtractor):
@@ -45,13 +48,12 @@ class NDRIE(InfoExtractor):
  
          page = self._download_webpage(url, video_id, 'Downloading page')
  
-        title = self._og_search_title(page)
+        title = self._og_search_title(page).strip()
          description = self._og_search_description(page)
+        if description:
+            description = description.strip()
  
-        mobj = re.search(
-            r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
-            page)
-        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
+        duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False))
  
          formats = []
  
@@ -66,10 +68,12 @@ class NDRIE(InfoExtractor):
  
          video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
          if video_url:
-            thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
-                page, 'thumbnail', fatal=False)
-            if thumbnail:
-                thumbnail = 'http://www.ndr.de' + thumbnail
+            thumbnails = re.findall(r'''\d+: {src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
+            if thumbnails:
+                QUALITIES = ['xs', 's', 'm', 'l', 'xl']
+                thumbnails.sort(key=lambda thumb: QUALITIES.index(thumb[1]) if thumb[1] in QUALITIES else -1)
+                thumbnail = 'http://www.ndr.de' + thumbnails[-1][0]
+
              for format_id in ['lo', 'hi', 'hq']:
                  formats.append({
                      'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py

index b1bcb7e54cf3f01989eb17c51160acce680eed2c..1c5e9401f36c72a73a701bdffc89529979a1eaaf 100644 (file)
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@@ -4,9 +4,7 @@ import re
  
  from .brightcove import BrightcoveIE
  from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
+from ..utils import ExtractorError
  
  
  class NownessIE(InfoExtractor):
@@ -14,9 +12,10 @@ class NownessIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
-        'file': '2520295746001.mp4',
-        'md5': '0ece2f70a7bd252c7b00f3070182d418',
+        'md5': '068bc0202558c2e391924cb8cc470676',
          'info_dict': {
+            'id': '2520295746001',
+            'ext': 'mp4',
              'description': 'Candor: The Art of Gesticulation',
              'uploader': 'Nowness',
              'title': 'Candor: The Art of Gesticulation',
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 3193fd152cc7dda04ae2f4bfc20732f0cc2513e9..638ff8af5ae995a5ebe7a5d4171a7c02ab0feec4 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2014.05.17'
+__version__ = '2014.05.19'
author	Sergey M. <dstftw@gmail.com>
	Tue, 20 May 2014 12:53:28 +0000 (19:53 +0700)
committer	Sergey M. <dstftw@gmail.com>
	Tue, 20 May 2014 12:53:28 +0000 (19:53 +0700)
youtube_dl/extractor/bandcamp.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/ndr.py		patch \| blob \| history
youtube_dl/extractor/nowness.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history