Merge branch 'youtube-dash' of github.com:m0vie/youtube-dl
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)
youtube_dl/extractor/wimp.py
youtube_dl/extractor/youtube.py
youtube_dl/version.py

index 9a6bb0c768a046e96bac0aa3dd39875821119e83..79fd53e0c8e85daae8efd86b70dc1302c1c0a629 100644 (file)
@@ -6,14 +6,15 @@ from .common import InfoExtractor
 
 
 class WimpIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
+    _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
     _TEST = {
-        'url': 'http://www.wimp.com/deerfence/',
-        'file': 'deerfence.flv',
-        'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
+        'url': 'http://www.wimp.com/maruexhausted/',
+        'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
         'info_dict': {
-            "title": "Watch Till End: Herd of deer jump over a fence.",
-            "description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
+            'id': 'maruexhausted',
+            'ext': 'flv',
+            'title': 'Maru is exhausted.',
+            'description': 'md5:57e099e857c0a4ea312542b684a869b8',
         }
     }
 
@@ -30,4 +31,4 @@ class WimpIE(InfoExtractor):
             'title': self._og_search_title(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
             'description': self._og_search_description(webpage),
-        }
+        }
\ No newline at end of file
index 5b0d30ed1f426114bf5047997496ce1c08baa923..49cca4c6306cd379acc59ab5e2cf1633e34f7afc 100644 (file)
@@ -1457,9 +1457,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                      |
                         ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                      )"""
-    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
-    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
+    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
     IE_NAME = u'youtube:playlist'
 
     def _real_initialize(self):
@@ -1507,29 +1507,31 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
             raise ExtractorError(u'For downloading YouTube.com top lists, use '
                 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
 
+        url = self._TEMPLATE_URL % playlist_id
+        page = self._download_webpage(url, playlist_id)
+        more_widget_html = content_html = page
+
         # Extract the video ids from the playlist pages
         ids = []
 
         for page_num in itertools.count(1):
-            url = self._TEMPLATE_URL % (playlist_id, page_num)
-            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
-            matches = re.finditer(self._VIDEO_RE, page)
+            matches = re.finditer(self._VIDEO_RE, content_html)
             # We remove the duplicates and the link with index 0
             # (it's not the first video of the playlist)
             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
             ids.extend(new_ids)
 
-            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
+            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+            if not mobj:
                 break
 
-        try:
-            playlist_title = self._og_search_title(page)
-        except RegexNotFoundError:
-            self.report_warning(
-                u'Playlist page is missing OpenGraph title, falling back ...',
-                playlist_id)
-            playlist_title = self._html_search_regex(
-                r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
+            more = self._download_json(
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+            content_html = more['content_html']
+            more_widget_html = more['load_more_widget_html']
+
+        playlist_title = self._html_search_regex(
+                r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
 
         url_results = self._ids_to_results(ids)
         return self.playlist_result(url_results, playlist_id, playlist_title)
index b722f817541906da9da4f99d4070d18abe500037..c2660a3162ac75cdcee90b10b32645f15ea25414 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.02.20'
+__version__ = '2014.02.21'