Merge branch 'master' into opener-to-ydl
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 24 Nov 2013 14:18:44 +0000 (15:18 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 24 Nov 2013 14:18:44 +0000 (15:18 +0100)
20 files changed:
test/test_all_urls.py
test/test_playlists.py
test/test_youtube_lists.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/clipfish.py [new file with mode: 0644]
youtube_dl/extractor/collegehumor.py
youtube_dl/extractor/common.py
youtube_dl/extractor/howcast.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/niconico.py [new file with mode: 0644]
youtube_dl/extractor/streamcloud.py
youtube_dl/extractor/viki.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/update.py
youtube_dl/utils.py
youtube_dl/version.py

index 56e5f80e1f6ddb17fef3ee5c499c238996c12051..42813da1a4059f2049772e1f2767558f9bc2498d 100644 (file)
@@ -100,6 +100,7 @@ class TestAllURLsMatching(unittest.TestCase):
     def test_keywords(self):
         self.assertMatch(':ytsubs', ['youtube:subscriptions'])
         self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
+        self.assertMatch(':ythistory', ['youtube:history'])
         self.assertMatch(':thedailyshow', ['ComedyCentral'])
         self.assertMatch(':tds', ['ComedyCentral'])
         self.assertMatch(':colbertreport', ['ComedyCentral'])
index d83b3bf519649839e42641950853018d9592441e..7c67239a438917fd6e64af152cb63c124ff6721c 100644 (file)
@@ -102,7 +102,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://bambuser.com/channel/pixelversity')
         self.assertIsPlaylist(result)
         self.assertEqual(result['title'], u'pixelversity')
-        self.assertTrue(len(result['entries']) >= 66)
+        self.assertTrue(len(result['entries']) >= 60)
 
     def test_bandcamp_album(self):
         dl = FakeYDL()
index 4b7a7847bd3a33a9a2bff3e99f9f4cff0de7eebf..938517a2de02c1d81f88f5ca64efbc13e53b54fa 100644 (file)
@@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase):
     def test_youtube_playlist(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
+        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
         self.assertIsPlaylist(result)
         self.assertEqual(result['title'], 'ytdl test PL')
         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
@@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase):
     def test_issue_673(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
-        result = ie.extract('PLBB231211A4F62143')[0]
+        result = ie.extract('PLBB231211A4F62143')
         self.assertTrue(len(result['entries']) > 25)
 
     def test_youtube_playlist_long(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
+        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
         self.assertIsPlaylist(result)
         self.assertTrue(len(result['entries']) >= 799)
 
@@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase):
         #651
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
+        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
         self.assertFalse('pElCt5oNDuI' in ytie_results)
         self.assertFalse('KdPEApIVdWM' in ytie_results)
@@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase):
     def test_youtube_playlist_empty(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
+        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
         self.assertIsPlaylist(result)
         self.assertEqual(len(result['entries']), 0)
 
@@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
         # TODO find a > 100 (paginating?) videos course
-        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
+        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
         entries = result['entries']
         self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
         self.assertEqual(len(entries), 25)
@@ -84,22 +84,22 @@ class TestYoutubeLists(unittest.TestCase):
         dl = FakeYDL()
         ie = YoutubeChannelIE(dl)
         #test paginated channel
-        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
+        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
         self.assertTrue(len(result['entries']) > 90)
         #test autogenerated channel
-        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
+        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
         self.assertTrue(len(result['entries']) >= 18)
 
     def test_youtube_user(self):
         dl = FakeYDL()
         ie = YoutubeUserIE(dl)
-        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
+        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
         self.assertTrue(len(result['entries']) >= 320)
 
     def test_youtube_safe_search(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
-        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
+        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
         self.assertEqual(len(result['entries']), 2)
 
     def test_youtube_show(self):
index 0a845a344baa259151347b41966fb3ce23dedf4b..87eb1a0b37fd5342b9a50d55109a2efe29a8c2fb 100644 (file)
@@ -104,6 +104,7 @@ class YoutubeDL(object):
     playlistend:       Playlist item to end at.
     matchtitle:        Download only matching titles.
     rejecttitle:       Reject downloads for matching titles.
+    logger:            Log messages to a logging.Logger instance.
     logtostderr:       Log messages to stderr instead of stdout.
     writedescription:  Write the video description to a .description file
     writeinfojson:     Write the video description to a .info.json file
@@ -204,7 +205,9 @@ class YoutubeDL(object):
 
     def to_screen(self, message, skip_eol=False):
         """Print message to stdout if not in quiet mode."""
-        if not self.params.get('quiet', False):
+        if self.params.get('logger'):
+            self.params['logger'].debug(message)
+        elif not self.params.get('quiet', False):
             terminator = [u'\n', u''][skip_eol]
             output = message + terminator
             write_string(output, self._screen_file)
@@ -212,10 +215,13 @@ class YoutubeDL(object):
     def to_stderr(self, message):
         """Print message to stderr."""
         assert type(message) == type(u'')
-        output = message + u'\n'
-        if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
-            output = output.encode(preferredencoding())
-        sys.stderr.write(output)
+        if self.params.get('logger'):
+            self.params['logger'].error(message)
+        else:
+            output = message + u'\n'
+            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+                output = output.encode(preferredencoding())
+            sys.stderr.write(output)
 
     def to_console_title(self, message):
         if not self.params.get('consoletitle', False):
@@ -370,15 +376,17 @@ class YoutubeDL(object):
     def _match_entry(self, info_dict):
         """ Returns None iff the file should be downloaded """
 
-        title = info_dict['title']
-        matchtitle = self.params.get('matchtitle', False)
-        if matchtitle:
-            if not re.search(matchtitle, title, re.IGNORECASE):
-                return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
-        rejecttitle = self.params.get('rejecttitle', False)
-        if rejecttitle:
-            if re.search(rejecttitle, title, re.IGNORECASE):
-                return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+        if 'title' in info_dict:
+            # This can happen when we're just evaluating the playlist
+            title = info_dict['title']
+            matchtitle = self.params.get('matchtitle', False)
+            if matchtitle:
+                if not re.search(matchtitle, title, re.IGNORECASE):
+                    return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+            rejecttitle = self.params.get('rejecttitle', False)
+            if rejecttitle:
+                if re.search(rejecttitle, title, re.IGNORECASE):
+                    return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
         date = info_dict.get('upload_date', None)
         if date is not None:
             dateRange = self.params.get('daterange', DateRange())
@@ -389,8 +397,8 @@ class YoutubeDL(object):
             if age_limit < info_dict.get('age_limit', 0):
                 return u'Skipping "' + title + '" because it is age restricted'
         if self.in_download_archive(info_dict):
-            return (u'%(title)s has already been recorded in archive'
-                    % info_dict)
+            return (u'%s has already been recorded in archive'
+                    % info_dict.get('title', info_dict.get('id', u'video')))
         return None
 
     @staticmethod
@@ -469,7 +477,7 @@ class YoutubeDL(object):
                                      ie_key=ie_result.get('ie_key'),
                                      extra_info=extra_info)
         elif result_type == 'playlist':
-            self.add_extra_info(ie_result, extra_info)
+
             # We process each entry in the playlist
             playlist = ie_result.get('title', None) or ie_result.get('id', None)
             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -499,6 +507,12 @@ class YoutubeDL(object):
                     'webpage_url': ie_result['webpage_url'],
                     'extractor_key': ie_result['extractor_key'],
                 }
+
+                reason = self._match_entry(entry)
+                if reason is not None:
+                    self.to_screen(u'[download] ' + reason)
+                    continue
+
                 entry_result = self.process_ie_result(entry,
                                                       download=download,
                                                       extra_info=extra)
@@ -654,7 +668,7 @@ class YoutubeDL(object):
 
         # Forced printings
         if self.params.get('forcetitle', False):
-            compat_print(info_dict['title'])
+            compat_print(info_dict['fulltitle'])
         if self.params.get('forceid', False):
             compat_print(info_dict['id'])
         if self.params.get('forceurl', False):
@@ -825,7 +839,16 @@ class YoutubeDL(object):
         fn = self.params.get('download_archive')
         if fn is None:
             return False
-        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        extractor = info_dict.get('extractor_id')
+        if extractor is None:
+            if 'id' in info_dict:
+                extractor = info_dict.get('ie_key')  # key in a playlist
+        if extractor is None:
+            return False  # Incomplete video information
+        # Future-proof against any change in case
+        # and backwards compatibility with prior versions
+        extractor = extractor.lower()
+        vid_id = extractor + u' ' + info_dict['id']
         try:
             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
                 for line in archive_file:
index 27886593b4f40a2a4108b867bb7ca71b26379e93..1f15c7eaa03acc63a5d3cbf1e244b292a053344e 100644 (file)
@@ -35,6 +35,7 @@ __authors__  = (
     'Jelle van der Waa',
     'Marcin Cieślak',
     'Anton Larionov',
+    'Takuya Tsuchida',
 )
 
 __license__ = 'Public Domain'
index 02f9e254681d545f2e00f9753ee5c12ca34e8034..1fbd10bc5c410efb82ca9568872042fe951a68fd 100644 (file)
@@ -20,6 +20,7 @@ from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cinemassacre import CinemassacreIE
+from .clipfish import ClipfishIE
 from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
@@ -98,6 +99,7 @@ from .nba import NBAIE
 from .nbc import NBCNewsIE
 from .newgrounds import NewgroundsIE
 from .nhl import NHLIE, NHLVideocenterIE
+from .niconico import NiconicoIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
@@ -156,6 +158,7 @@ from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
+from .viki import VikiIE
 from .vk import VKIE
 from .wat import WatIE
 from .websurg import WeBSurgIE
@@ -183,6 +186,7 @@ from .youtube import (
     YoutubeTruncatedURLIE,
     YoutubeWatchLaterIE,
     YoutubeFavouritesIE,
+    YoutubeHistoryIE,
 )
 from .zdf import ZDFIE
 
index 359d4174bc181abb3ec7b210b9f851c182d3f765..3a32c14c598dd2da14841fe68c1cb59582f30799 100644 (file)
@@ -20,28 +20,6 @@ class BandcampIE(InfoExtractor):
             u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
         },
         u'skip': u'There is a limit of 200 free downloads / month for the test song'
-    }, {
-        u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
-        u'playlist': [
-            {
-                u'file': u'1353101989.mp3',
-                u'md5': u'39bc1eded3476e927c724321ddf116cf',
-                u'info_dict': {
-                    u'title': u'Intro',
-                }
-            },
-            {
-                u'file': u'38097443.mp3',
-                u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
-                u'info_dict': {
-                    u'title': u'Kero One - Keep It Alive (Blazo remix)',
-                }
-            },
-        ],
-        u'params': {
-            u'playlistend': 2
-        },
-        u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
     }]
 
     def _real_extract(self, url):
@@ -56,20 +34,17 @@ class BandcampIE(InfoExtractor):
             json_code = m_trackinfo.group(1)
             data = json.loads(json_code)
 
-            entries = []
             for d in data:
                 formats = [{
                     'format_id': 'format_id',
                     'url': format_url,
                     'ext': format_id.partition('-')[0]
                 } for format_id, format_url in sorted(d['file'].items())]
-                entries.append({
+                return {
                     'id': compat_str(d['id']),
                     'title': d['title'],
                     'formats': formats,
-                })
-
-            return self.playlist_result(entries, title, title)
+                }
         else:
             raise ExtractorError(u'No free songs found')
 
@@ -112,6 +87,30 @@ class BandcampAlbumIE(InfoExtractor):
     IE_NAME = u'Bandcamp:album'
     _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
 
+    _TEST = {
+        u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
+        u'playlist': [
+            {
+                u'file': u'1353101989.mp3',
+                u'md5': u'39bc1eded3476e927c724321ddf116cf',
+                u'info_dict': {
+                    u'title': u'Intro',
+                }
+            },
+            {
+                u'file': u'38097443.mp3',
+                u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
+                u'info_dict': {
+                    u'title': u'Kero One - Keep It Alive (Blazo remix)',
+                }
+            },
+        ],
+        u'params': {
+            u'playlistend': 2
+        },
+        u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
+    }
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         title = mobj.group('title')
index d8c35465a34fa4c4d4ca822d499892504a51ce62..66fe0ac9ade6fad80d77f0429c136c2d022af16d 100644 (file)
@@ -75,16 +75,22 @@ class BrightcoveIE(InfoExtractor):
         params = {'flashID': object_doc.attrib['id'],
                   'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
                   }
-        playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
+        def find_param(name):
+            node = find_xpath_attr(object_doc, './param', 'name', name)
+            if node is not None:
+                return node.attrib['value']
+            return None
+        playerKey = find_param('playerKey')
         # Not all pages define this value
         if playerKey is not None:
-            params['playerKey'] = playerKey.attrib['value']
-        videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
+            params['playerKey'] = playerKey
+        # The three fields hold the id of the video
+        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
         if videoPlayer is not None:
-            params['@videoPlayer'] = videoPlayer.attrib['value']
-        linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
+            params['@videoPlayer'] = videoPlayer
+        linkBase = find_param('linkBaseURL')
         if linkBase is not None:
-            params['linkBaseURL'] = linkBase.attrib['value']
+            params['linkBaseURL'] = linkBase
         data = compat_urllib_parse.urlencode(params)
         return cls._FEDERATED_URL_TEMPLATE % data
 
diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py
new file mode 100644 (file)
index 0000000..95449da
--- /dev/null
@@ -0,0 +1,53 @@
+import re
+import time
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+
+
+class ClipfishIE(InfoExtractor):
+    IE_NAME = u'clipfish'
+
+    _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
+    _TEST = {
+        u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
+        u'file': u'4028320.f4v',
+        u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
+        u'info_dict': {
+            u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
+            u'duration': 399,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
+                    (video_id, int(time.time())))
+        info_xml = self._download_webpage(
+            info_url, video_id, note=u'Downloading info page')
+        doc = xml.etree.ElementTree.fromstring(info_xml)
+        title = doc.find('title').text
+        video_url = doc.find('filename').text
+        thumbnail = doc.find('imageurl').text
+        duration_str = doc.find('duration').text
+        m = re.match(
+            r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
+            duration_str)
+        if m:
+            duration = (
+                (int(m.group('hours')) * 60 * 60) +
+                (int(m.group('minutes')) * 60) +
+                (int(m.group('seconds')))
+            )
+        else:
+            duration = None
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
index 0c29acfb13eafebe226e23f603f9f18d13a33304..b27c1dfc52401f3c148d48d2b2897d2b06db3834 100644 (file)
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
 
         self.report_extraction(video_id)
         xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
-        metaXml = self._download_webpage(xmlUrl, video_id,
+        mdoc = self._download_xml(xmlUrl, video_id,
                                          u'Downloading info XML',
                                          u'Unable to download video info XML')
 
-        mdoc = xml.etree.ElementTree.fromstring(metaXml)
         try:
             videoNode = mdoc.findall('./video')[0]
             youtubeIdNode = videoNode.find('./youtubeID')
@@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):
 
         if next_url.endswith(u'manifest.f4m'):
             manifest_url = next_url + '?hdcore=2.10.3'
-            manifestXml = self._download_webpage(manifest_url, video_id,
+            adoc = self._download_xml(manifest_url, video_id,
                                          u'Downloading XML manifest',
                                          u'Unable to download video info XML')
 
-            adoc = xml.etree.ElementTree.fromstring(manifestXml)
             try:
                 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
             except IndexError:
index 423e54ceaa2544d62d2d266f5a499caef5293c72..6ec835f8af56c61af620a389f8706125c21c8099 100644 (file)
@@ -4,6 +4,7 @@ import re
 import socket
 import sys
 import netrc
+import xml.etree.ElementTree
 
 from ..utils import (
     compat_http_client,
@@ -208,6 +209,11 @@ class InfoExtractor(object):
         """ Returns the data of the page as a string """
         return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
 
+    def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
+        """Return the xml as an xml.etree.ElementTree.Element"""
+        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+
     def to_screen(self, msg):
         """Print msg to screen, prefixing it with '[ie_name]'"""
         self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
@@ -229,12 +235,14 @@ class InfoExtractor(object):
         self.to_screen(u'Logging in')
 
     #Methods for following #608
-    def url_result(self, url, ie=None):
+    def url_result(self, url, ie=None, video_id=None):
         """Returns a url that points to a page that should be processed"""
         #TODO: ie should be the class used for getting the info
         video_info = {'_type': 'url',
                       'url': url,
                       'ie_key': ie}
+        if video_id is not None:
+            video_info['id'] = video_id
         return video_info
     def playlist_result(self, entries, playlist_id=None, playlist_title=None):
         """Returns a playlist"""
index 46954337f25e1cbd7bae89e7da76d4e93ecc8c9e..bafc5826f680353af40b820609a543192ac73d17 100644 (file)
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
     _TEST = {
         u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
         u'file': u'390161.mp4',
-        u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
+        u'md5': u'8b743df908c42f60cf6496586c7f12c3',
         u'info_dict': {
             u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.", 
             u"title": u"How to Tie a Square Knot Properly"
index a200dcd74a5a7af220cedea02a60c01cfd643e79..e2baf44d7e15032022e6b304ace2bf8ef11a09b2 100644 (file)
@@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
             'title': info['name'],
             'url': final_song_url,
             'ext': 'mp3',
-            'description': info['description'],
+            'description': info.get('description'),
             'thumbnail': info['pictures'].get('extra_large'),
             'uploader': info['user']['name'],
             'uploader_id': info['user']['username'],
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
new file mode 100644 (file)
index 0000000..729607e
--- /dev/null
@@ -0,0 +1,131 @@
+# encoding: utf-8
+
+import re
+import socket
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_http_client,
+    compat_urllib_error,
+    compat_urllib_parse,
+    compat_urllib_request,
+    compat_urlparse,
+    compat_str,
+
+    ExtractorError,
+    unified_strdate,
+)
+
+
+class NiconicoIE(InfoExtractor):
+    IE_NAME = u'niconico'
+    IE_DESC = u'ニコニコ動画'
+
+    _TEST = {
+        u'url': u'http://www.nicovideo.jp/watch/sm22312215',
+        u'file': u'sm22312215.mp4',
+        u'md5': u'd1a75c0823e2f629128c43e1212760f9',
+        u'info_dict': {
+            u'title': u'Big Buck Bunny',
+            u'uploader': u'takuya0301',
+            u'uploader_id': u'2698420',
+            u'upload_date': u'20131123',
+            u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+        },
+        u'params': {
+            u'username': u'ydl.niconico@gmail.com',
+            u'password': u'youtube-dl',
+        },
+    }
+
+    _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
+    _NETRC_MACHINE = 'niconico'
+    # If True it will raise an error if no login info is provided
+    _LOGIN_REQUIRED = True
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        # No authentication to be performed
+        if username is None:
+            if self._LOGIN_REQUIRED:
+                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+            return False
+
+        # Log in
+        login_form_strs = {
+            u'mail': username,
+            u'password': password,
+        }
+        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+        # chokes on unicode
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
+        request = compat_urllib_request.Request(
+            u'https://secure.nicovideo.jp/secure/login', login_data)
+        login_results = self._download_webpage(
+            request, u'', note=u'Logging in', errnote=u'Unable to log in')
+        if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
+            self._downloader.report_warning(u'unable to log in: bad username or password')
+            return False
+        return True
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        # Get video webpage. We are not actually interested in it, but need
+        # the cookies in order to be able to download the info webpage
+        self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
+
+        video_info_webpage = self._download_webpage(
+            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
+            note=u'Downloading video info page')
+
+        # Get flv info
+        flv_info_webpage = self._download_webpage(
+            u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+            video_id, u'Downloading flv info')
+        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
+
+        # Start extracting information
+        video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
+        video_title = video_info.find('.//title').text
+        video_extension = video_info.find('.//movie_type').text
+        video_format = video_extension.upper()
+        video_thumbnail = video_info.find('.//thumbnail_url').text
+        video_description = video_info.find('.//description').text
+        video_uploader_id = video_info.find('.//user_id').text
+        video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
+        video_view_count = video_info.find('.//view_counter').text
+        video_webpage_url = video_info.find('.//watch_url').text
+
+        # uploader
+        video_uploader = video_uploader_id
+        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
+        try:
+            user_info_webpage = self._download_webpage(
+                url, video_id, note=u'Downloading user information')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
+        else:
+            user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
+            video_uploader = user_info.find('.//nickname').text
+
+        return {
+            'id':          video_id,
+            'url':         video_real_url,
+            'title':       video_title,
+            'ext':         video_extension,
+            'format':      video_format,
+            'thumbnail':   video_thumbnail,
+            'description': video_description,
+            'uploader':    video_uploader,
+            'upload_date': video_upload_date,
+            'uploader_id': video_uploader_id,
+            'view_count':  video_view_count,
+            'webpage_url': video_webpage_url,
+        }
index d476693ec0f98d296dc92753ed0a6bc3dfa5f870..9faf3a5e3f677ae8b00454c492f6ef2bf129d329 100644 (file)
@@ -21,6 +21,7 @@ class StreamcloudIE(InfoExtractor):
             u'title': u'youtube-dl test video  \'/\\ ä ↭',
             u'duration': 9,
         },
+        u'skip': u'Only available from the EU'
     }
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
new file mode 100644 (file)
index 0000000..78d03c0
--- /dev/null
@@ -0,0 +1,91 @@
+import re
+
+from ..utils import (
+    unified_strdate,
+)
+from .subtitles import SubtitlesInfoExtractor
+
+
+class VikiIE(SubtitlesInfoExtractor):
+    IE_NAME = u'viki'
+
+    _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
+    _TEST = {
+        u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
+        u'file': u'1023585v.mp4',
+        u'md5': u'a21454021c2646f5433514177e2caa5f',
+        u'info_dict': {
+            u'title': u'Heirs Episode 14',
+            u'uploader': u'SBS',
+            u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+            u'upload_date': u'20131121',
+            u'age_limit': 13,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        uploader = self._html_search_regex(
+            r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
+            u'uploader')
+        if uploader is not None:
+            uploader = uploader.strip()
+
+        rating_str = self._html_search_regex(
+            r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
+            u'rating information', default='').strip()
+        RATINGS = {
+            'G': 0,
+            'PG': 10,
+            'PG-13': 13,
+            'R': 16,
+            'NC': 18,
+        }
+        age_limit = RATINGS.get(rating_str)
+
+        info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
+        info_webpage = self._download_webpage(info_url, video_id)
+        video_url = self._html_search_regex(
+            r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
+
+        upload_date_str = self._html_search_regex(
+            r'"created_at":"([^"]+)"', info_webpage, u'upload date')
+        upload_date = (
+            unified_strdate(upload_date_str)
+            if upload_date_str is not None
+            else None
+        )
+
+        # subtitles
+        video_subtitles = self.extract_subtitles(video_id, info_webpage)
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, info_webpage)
+            return
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'description': description,
+            'thumbnail': thumbnail,
+            'age_limit': age_limit,
+            'uploader': uploader,
+            'subtitles': video_subtitles,
+            'upload_date': upload_date,
+        }
+
+    def _get_available_subtitles(self, video_id, info_webpage):
+        res = {}
+        for sturl in re.findall(r'<track src="([^"]+)"/>'):
+            m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
+            if not m:
+                continue
+            res[m.group('lang')] = sturl
+        return res
index 41838237c617f9758054cef6499ef97b17df5e93..64d4c2445892baad5decb53620f605368ff335d5 100644 (file)
@@ -1510,7 +1510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             })
         return results
 
-class YoutubePlaylistIE(InfoExtractor):
+class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
     IE_DESC = u'YouTube.com playlists'
     _VALID_URL = r"""(?:
                         (?:https?://)?
@@ -1526,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):
                      |
                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
                      )"""
-    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
-    _MAX_RESULTS = 50
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
+    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
+    _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
     IE_NAME = u'youtube:playlist'
 
     @classmethod
@@ -1535,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor):
         """Receives a URL and returns True if suitable for this IE."""
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 
+    def _real_initialize(self):
+        self._login()
+
     def _real_extract(self, url):
         # Extract playlist id
         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1548,45 +1552,28 @@ class YoutubePlaylistIE(InfoExtractor):
             video_id = query_dict['v'][0]
             if self._downloader.params.get('noplaylist'):
                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
-                return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
+                return self.url_result(video_id, 'Youtube', video_id=video_id)
             else:
                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
 
-        # Download playlist videos from API
-        videos = []
+        # Extract the video ids from the playlist pages
+        ids = []
 
         for page_num in itertools.count(1):
-            start_index = self._MAX_RESULTS * (page_num - 1) + 1
-            if start_index >= 1000:
-                self._downloader.report_warning(u'Max number of results reached')
-                break
-            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
+            url = self._TEMPLATE_URL % (playlist_id, page_num)
             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
+            # The ids are duplicated
+            new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
+            ids.extend(new_ids)
 
-            try:
-                response = json.loads(page)
-            except ValueError as err:
-                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
-
-            if 'feed' not in response:
-                raise ExtractorError(u'Got a malformed response from YouTube API')
-            playlist_title = response['feed']['title']['$t']
-            if 'entry' not in response['feed']:
-                # Number of videos is a multiple of self._MAX_RESULTS
+            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
                 break
 
-            for entry in response['feed']['entry']:
-                index = entry['yt$position']['$t']
-                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
-                    videos.append((
-                        index,
-                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
-                    ))
+        playlist_title = self._og_search_title(page)
 
-        videos = [v[1] for v in sorted(videos)]
-
-        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
-        return [self.playlist_result(url_results, playlist_id, playlist_title)]
+        url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
+                       for vid_id in ids]
+        return self.playlist_result(url_results, playlist_id, playlist_title)
 
 
 class YoutubeChannelIE(InfoExtractor):
@@ -1640,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor):
 
         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
 
-        urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
-        url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
-        return [self.playlist_result(url_entries, channel_id)]
+        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
+                       for video_id in video_ids]
+        return self.playlist_result(url_entries, channel_id)
 
 
 class YoutubeUserIE(InfoExtractor):
@@ -1706,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):
             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
                 break
 
-        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
-        url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
-        return [self.playlist_result(url_results, playlist_title = username)]
+        url_results = [
+            self.url_result(video_id, 'Youtube', video_id=video_id)
+            for video_id in video_ids]
+        return self.playlist_result(url_results, playlist_title=username)
+
 
 class YoutubeSearchIE(SearchInfoExtractor):
     IE_DESC = u'YouTube.com searches'
@@ -1749,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
 
         if len(video_ids) > n:
             video_ids = video_ids[:n]
-        videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
+        videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
+                  for video_id in video_ids]
         return self.playlist_result(videos, query)
 
 class YoutubeSearchDateIE(YoutubeSearchIE):
@@ -1809,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
             feed_html = info['feed_html']
             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
             ids = orderedSet(m.group(1) for m in m_ids)
-            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
+            feed_entries.extend(
+                self.url_result(video_id, 'Youtube', video_id=video_id)
+                for video_id in ids)
             if info['paging'] is None:
                 break
         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
@@ -1834,6 +1826,20 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
     _PAGING_STEP = 100
     _PERSONAL_FEED = True
 
+class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
+    _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
+    _FEED_NAME = 'history'
+    _PERSONAL_FEED = True
+    _PLAYLIST_TITLE = u'Youtube Watch History'
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
+        data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
+        # The step is actually a ridiculously big number (like 1374343569725646)
+        self._PAGING_STEP = int(data_paging)
+        return super(YoutubeHistoryIE, self)._real_extract(url)
+
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
     IE_NAME = u'youtube:favorites'
     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
index e5f441707c10b76f1f84c07454a71d6aa4cd030d..cd9670166e582ae9f3074c2371026f0f06c252a1 100644 (file)
@@ -41,6 +41,7 @@ def rsa_verify(message, signature, key):
     if signature != sha256(message).digest(): return False
     return True
 
+
 def update_self(to_screen, verbose):
     """Update the program file with the latest version from the repository"""
 
@@ -82,6 +83,13 @@ def update_self(to_screen, verbose):
         return
 
     version_id = versions_info['latest']
+
+    def version_tuple(version_str):
+        return tuple(map(int, version_str.split('.')))
+    if version_tuple(__version__) >= version_tuple(version_id):
+        to_screen(u'youtube-dl is up to date (%s)' % __version__)
+        return
+
     to_screen(u'Updating to version ' + version_id + '...')
     version = versions_info['versions'][version_id]
 
index 0d2b7bd10e1f385bf6515c0faae6588a1c7f31ec..317aee2b572f31effd654bfefc95601f77b6482b 100644 (file)
@@ -12,6 +12,7 @@ import os
 import pipes
 import platform
 import re
+import ssl
 import socket
 import sys
 import traceback
@@ -536,12 +537,29 @@ def formatSeconds(secs):
         return '%d' % secs
 
 def make_HTTPS_handler(opts_no_check_certificate):
-    if sys.version_info < (3,2):
-        # Python's 2.x handler is very simplistic
-        return compat_urllib_request.HTTPSHandler()
+    if sys.version_info < (3, 2):
+        import httplib
+
+        class HTTPSConnectionV3(httplib.HTTPSConnection):
+            def __init__(self, *args, **kwargs):
+                httplib.HTTPSConnection.__init__(self, *args, **kwargs)
+
+            def connect(self):
+                sock = socket.create_connection((self.host, self.port), self.timeout)
+                if self._tunnel_host:
+                    self.sock = sock
+                    self._tunnel()
+                try:
+                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
+                except ssl.SSLError as e:
+                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
+
+        class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
+            def https_open(self, req):
+                return self.do_open(HTTPSConnectionV3, req)
+        return HTTPSHandlerV3()
     else:
-        import ssl
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+        context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
         context.set_default_verify_paths()
         
         context.verify_mode = (ssl.CERT_NONE
index c1f581cd66a1fcdfff15e1f938fd1cee19b4afce..de92411bba2eaa56fb8a1f8d55b31733fb3a7fc4 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2013.11.22'
+__version__ = '2013.11.24.1'