[bambuser] Add an extractor for channels (closes #1702)
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sat, 2 Nov 2013 18:50:57 +0000 (19:50 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sat, 2 Nov 2013 18:50:57 +0000 (19:50 +0100)
test/test_playlists.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/bambuser.py

index d6a8d56df99609e50ea5885d2f5a3eb48b72cf37..de1e8d88edc647806e53a17574ad4415f09563cc 100644 (file)
@@ -20,6 +20,7 @@ from youtube_dl.extractor import (
     SoundcloudUserIE,
     LivestreamIE,
     NHLVideocenterIE,
     SoundcloudUserIE,
     LivestreamIE,
     NHLVideocenterIE,
+    BambuserChannelIE,
 )
 
 
 )
 
 
@@ -85,5 +86,13 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], u'Highlights')
         self.assertEqual(len(result['entries']), 12)
 
         self.assertEqual(result['title'], u'Highlights')
         self.assertEqual(len(result['entries']), 12)
 
+    def test_bambuser_channel(self):
+        dl = FakeYDL()
+        ie = BambuserChannelIE(dl)
+        result = ie.extract('http://bambuser.com/channel/pixelversity')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'pixelversity')
+        self.assertTrue(len(result['entries']) >= 66)
+
 if __name__ == '__main__':
     unittest.main()
 if __name__ == '__main__':
     unittest.main()
index a1e35eb468952e7ce3be8b521292d056ba7875a8..a69c08f51ccaf7f54f05f73a14d0cfb303f51b52 100644 (file)
@@ -9,7 +9,7 @@ from .arte import (
     ArteTVFutureIE,
 )
 from .auengine import AUEngineIE
     ArteTVFutureIE,
 )
 from .auengine import AUEngineIE
-from .bambuser import BambuserIE
+from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
index cf8da22e3b71afa0e48afe6affdd10b996932337..f3b36f4733021e05fb8c3db5bf3d218cb2e59536 100644 (file)
@@ -1,10 +1,15 @@
 import re
 import json
 import re
 import json
+import itertools
 
 from .common import InfoExtractor
 
 from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+)
 
 
 class BambuserIE(InfoExtractor):
 
 
 class BambuserIE(InfoExtractor):
+    IE_NAME = u'bambuser'
     _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
     _API_KEY = '005f64509e19a868399060af746a00aa'
 
     _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
     _API_KEY = '005f64509e19a868399060af746a00aa'
 
@@ -33,10 +38,43 @@ class BambuserIE(InfoExtractor):
             'id': video_id,
             'title': info['title'],
             'url': info['url'],
             'id': video_id,
             'title': info['title'],
             'url': info['url'],
-            'thumbnail': info['preview'],
+            'thumbnail': info.get('preview'),
             'duration': int(info['length']),
             'view_count': int(info['views_total']),
             'uploader': info['username'],
             'uploader_id': info['uid'],
         }
 
             'duration': int(info['length']),
             'view_count': int(info['views_total']),
             'uploader': info['username'],
             'uploader_id': info['uid'],
         }
 
+
+class BambuserChannelIE(InfoExtractor):
+    IE_NAME = u'bambuser:channel'
+    _VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
+    # The maximum number we can get with each request
+    _STEP = 50
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user = mobj.group('user')
+        urls = []
+        last_id = ''
+        for i in itertools.count(1):
+            req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
+                '&sort=created&access_mode=0%2C1%2C2&limit={count}'
+                '&method=broadcast&format=json&vid_older_than={last}'
+                ).format(user=user, count=self._STEP, last=last_id)
+            req = compat_urllib_request.Request(req_url)
+            # Without setting this header, we wouldn't get any result
+            req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
+            info_json = self._download_webpage(req, user,
+                u'Downloading page %d' % i)
+            results = json.loads(info_json)['result']
+            if len(results) == 0:
+                break
+            last_id = results[-1]['vid']
+            urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
+
+        return {
+            '_type': 'playlist',
+            'title': user,
+            'entries': urls,
+        }