[vimeo] add an extractor for channels
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 29 Jul 2013 11:12:09 +0000 (13:12 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 29 Jul 2013 11:12:09 +0000 (13:12 +0200)
test/test_playlists.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/vimeo.py

index f8cc75afbe4b1ff7740700cd22bb6be961813b4c..65de3a55c8694dd764dafa10977448d9274727d2 100644 (file)
@@ -8,7 +8,7 @@ import json
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.extractor import DailymotionPlaylistIE
+from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
 from youtube_dl.utils import *
 
 from helper import FakeYDL
 from youtube_dl.utils import *
 
 from helper import FakeYDL
@@ -26,5 +26,13 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], u'SPORT')
         self.assertTrue(len(result['entries']) > 20)
 
         self.assertEqual(result['title'], u'SPORT')
         self.assertTrue(len(result['entries']) > 20)
 
+    def test_vimeo_channel(self):
+        dl = FakeYDL()
+        ie = VimeoChannelIE(dl)
+        result = ie.extract('http://vimeo.com/channels/tributes')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'Vimeo Tributes')
+        self.assertTrue(len(result['entries']) > 24)
+
 if __name__ == '__main__':
     unittest.main()
 if __name__ == '__main__':
     unittest.main()
index 063a3d0658fa66536fe43ff3e872567c0e9a6031..9f4dd3c243aaf8fa962ab3b42e674177f14621af 100644 (file)
@@ -71,7 +71,7 @@ from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .vevo import VevoIE
-from .vimeo import VimeoIE
+from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .c56 import C56IE
 from .wat import WatIE
 from .vine import VineIE
 from .c56 import C56IE
 from .wat import WatIE
index ac32043c1e651abaadf1a223c18e4983b9301ba7..cc9c8d0188749761b79b90652d3c0c843a24eda3 100644 (file)
@@ -1,5 +1,6 @@
 import json
 import re
 import json
 import re
+import itertools
 
 from .common import InfoExtractor
 from ..utils import (
 
 from .common import InfoExtractor
 from ..utils import (
@@ -171,3 +172,31 @@ class VimeoIE(InfoExtractor):
             'thumbnail':    video_thumbnail,
             'description':  video_description,
         }]
             'thumbnail':    video_thumbnail,
             'description':  video_description,
         }]
+
+
+class VimeoChannelIE(InfoExtractor):
+    IE_NAME = u'vimeo:channel'
+    _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
+    _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        channel_id =  mobj.group('id')
+        video_ids = []
+
+        for pagenum in itertools.count(1):
+            webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
+                                             channel_id, u'Downloading page %s' % pagenum)
+            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
+            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
+                break
+
+        entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+                   for video_id in video_ids]
+        channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
+                                                webpage, u'channel title')
+        return {'_type': 'playlist',
+                'id': channel_id,
+                'title': channel_title,
+                'entries': entries,
+                }