Merge pull request #1413 from tewe/master
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 13 Sep 2013 17:55:49 +0000 (19:55 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 13 Sep 2013 19:34:31 +0000 (21:34 +0200)
Add Ustream channel support

devscripts/youtube_genalgo.py
test/test_playlists.py
youtube_dl/YoutubeDL.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/canalplus.py
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/kickstarter.py [new file with mode: 0644]
youtube_dl/extractor/ustream.py
youtube_dl/extractor/youtube.py

index 6e359536617d3e4d07cba41752480a3c3db47674..b390c7e2ead61db13197020327d082dbae30f0ba 100644 (file)
@@ -32,9 +32,9 @@ tests = [
     # 83
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
-    # 82 - vflZK4ZYR 2013/08/23
+    # 82 - vflGNjMhJ 2013/09/12
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
-     "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
+     ".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"),
     # 81 - vflLC8JvQ 2013/07/25
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
      "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
index 65de3a55c8694dd764dafa10977448d9274727d2..4a2e00b01a5a368a8ef2b14ecae231e53491935c 100644 (file)
@@ -8,7 +8,7 @@ import json
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
+from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE
 from youtube_dl.utils import *
 
 from helper import FakeYDL
@@ -34,5 +34,13 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], u'Vimeo Tributes')
         self.assertTrue(len(result['entries']) > 24)
 
+    def test_ustream_channel(self):
+        dl = FakeYDL()
+        ie = UstreamChannelIE(dl)
+        result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'5124905')
+        self.assertTrue(len(result['entries']) >= 11)
+
 if __name__ == '__main__':
     unittest.main()
index b289bd9e26bbc9993e6f1295a31d20b3275f5f48..c2f992b8e00f333f59da38ffd59153183d057281 100644 (file)
@@ -492,6 +492,8 @@ class YoutubeDL(object):
                 self.report_writedescription(descfn)
                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
                     descfile.write(info_dict['description'])
+            except (KeyError, TypeError):
+                self.report_warning(u'There\'s no description to write.')
             except (OSError, IOError):
                 self.report_error(u'Cannot write description file ' + descfn)
                 return
index a7cddef733bc611ccf7f6ea5ae6db0ae9613903b..06f9542d2389d8f2455b98e7532c4b4c6fea3944 100644 (file)
@@ -52,6 +52,7 @@ from .jeuxvideo import JeuxVideoIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .kankan import KankanIE
+from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE
index 1f02519a0149ad85aad3886bf0a01e5c986064c0..1db9b24cf204cc26d68b1a1bdaff93577c3ae903 100644 (file)
@@ -1,3 +1,4 @@
+# encoding: utf-8
 import re
 import xml.etree.ElementTree
 
@@ -5,24 +6,29 @@ from .common import InfoExtractor
 from ..utils import unified_strdate
 
 class CanalplusIE(InfoExtractor):
-    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
+    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
     IE_NAME = u'canalplus.fr'
 
     _TEST = {
-        u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861',
-        u'file': u'889861.flv',
-        u'md5': u'590a888158b5f0d6832f84001fbf3e99',
+        u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
+        u'file': u'922470.flv',
         u'info_dict': {
-            u'title': u'Le Petit Journal 20/06/13 - La guerre des drone',
-            u'upload_date': u'20130620',
+            u'title': u'Zapping - 26/08/13',
+            u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
+            u'upload_date': u'20130826',
+        },
+        u'params': {
+            u'skip_download': True,
         },
-        u'skip': u'Requires rtmpdump'
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
+        if video_id is None:
+            webpage = self._download_webpage(url, mobj.group('path'))
+            video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
         info_url = self._VIDEO_INFO_TEMPLATE % video_id
         info_page = self._download_webpage(info_url,video_id, 
                                            u'Downloading video info')
@@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor):
                 'ext': 'flv',
                 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
                 'thumbnail': media.find('IMAGES/GRAND').text,
+                'description': infos.find('DESCRIPTION').text,
+                'view_count': int(infos.find('NB_VUES').text),
                 }
index 7585b70618d1e4f92e8297fbf4d1397359a5224b..cd3bbe65f5dd9891f5cf4a68fb1adcd8a45c4196 100644 (file)
@@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor):
         u"file": u"6410818.mp4",
         u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
         u"info_dict": {
-            u"title": u"Arma III - Community Guide: SITREP I",
+            u"title": u"Arma 3 - Community Guide: SITREP I",
             u"upload_date": u"20130627", 
         }
     }
diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py
new file mode 100644 (file)
index 0000000..50bc883
--- /dev/null
@@ -0,0 +1,37 @@
+import re
+
+from .common import InfoExtractor
+
+
+class KickStarterIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
+    _TEST = {
+        u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
+        u"file": u"1404461844.mp4",
+        u"md5": u"c81addca81327ffa66c642b5d8b08cab",
+        u"info_dict": {
+            u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
+        },
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+        webpage_src = self._download_webpage(url, video_id)
+
+        video_url = self._search_regex(r'data-video="(.*?)">',
+            webpage_src, u'video URL')
+        if 'mp4' in video_url:
+            ext = 'mp4'
+        else:
+            ext = 'flv'
+        video_title = self._html_search_regex(r"<title>(.*?)</title>",
+            webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
+
+        results = [{
+                    'id': video_id,
+                    'url': video_url,
+                    'title': video_title,
+                    'ext': ext,
+                    }]
+        return results
index 16cdcc76592feb03ad9b30b962e66041eb391168..f69b27d4405d719e9c8d87807d2c1392753150eb 100644 (file)
@@ -1,9 +1,11 @@
-from HTMLParser import HTMLParser
 import json
 import re
-from urlparse import urljoin
 
 from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    compat_html_parser,
+)
 
 
 class UstreamIE(InfoExtractor):
@@ -49,7 +51,7 @@ class UstreamIE(InfoExtractor):
 
 # More robust than regular expressions
 
-class ChannelParser(HTMLParser):
+class ChannelParser(compat_html_parser.HTMLParser):
     """
     <meta name="ustream:channel_id" content="1234">
     """
@@ -65,13 +67,13 @@ class ChannelParser(HTMLParser):
         if value.isdigit():
             self.channel_id = value
 
-class SocialstreamParser(HTMLParser):
+class SocialstreamParser(compat_html_parser.HTMLParser):
     """
     <li class="content123 video" data-content-id="123" data-length="1452"
         data-href="/recorded/123" data-og-url="/recorded/123">
     """
     def __init__(self):
-        HTMLParser.__init__(self)
+        compat_html_parser.HTMLParser.__init__(self)
         self.content_ids = []
 
     def handle_starttag(self, tag, attrs):
@@ -88,8 +90,6 @@ class UstreamChannelIE(InfoExtractor):
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         slug = m.group('slug')
-        # Slugs can be non-ascii, but youtube-dl can't handle non-ascii command lines,
-        # so if we got this far it's probably percent encoded and we needn't worry.
 
         p = ChannelParser()
         p.feed(self._download_webpage(url, slug))
@@ -100,16 +100,12 @@ class UstreamChannelIE(InfoExtractor):
         BASE = 'http://www.ustream.tv'
         next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
         while next_url:
-            reply = json.loads(self._download_webpage(urljoin(BASE, next_url), channel_id))
+            reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
             p.feed(reply['data'])
             next_url = reply['nextUrl']
         p.close()
         video_ids = p.content_ids
 
-        # From YoutubeChannelIE
-
-        self._downloader.to_screen(u'[ustream] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
-
         urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
         url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
-        return [self.playlist_result(url_entries, channel_id)]
+        return self.playlist_result(url_entries, channel_id)
index 2e0d70eaf59a15ded6ec24d30cbcd793a3c4bc11..f49665925fdeadeb9f3f64f9eafb7c7a12080fee 100644 (file)
@@ -434,7 +434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         elif len(s) == 83:
             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
         elif len(s) == 82:
-            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
+            return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
         elif len(s) == 81:
             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
         elif len(s) == 80: