Merge remote-tracking branch 'd912e3/golem'
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 28 Sep 2014 08:10:34 +0000 (10:10 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 28 Sep 2014 08:10:34 +0000 (10:10 +0200)
29 files changed:
test/test_youtube_lists.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/downloader/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/http.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/anysex.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/eitb.py
youtube_dl/extractor/extremetube.py
youtube_dl/extractor/flickr.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/muenchentv.py
youtube_dl/extractor/nfl.py
youtube_dl/extractor/oktoberfesttv.py [new file with mode: 0644]
youtube_dl/extractor/sport5.py [new file with mode: 0644]
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/vube.py
youtube_dl/extractor/wat.py
youtube_dl/extractor/wistia.py
youtube_dl/extractor/ynet.py [new file with mode: 0644]
youtube_dl/extractor/youku.py
youtube_dl/extractor/youtube.py
youtube_dl/version.py

index 1fa99f88b595644df5c4ed50e4c134cc15668638..410f9edc297036d7aeb59250c3241dc10bb7cf2d 100644 (file)
@@ -10,7 +10,6 @@ from test.helper import FakeYDL
 
 
 from youtube_dl.extractor import (
-    YoutubeUserIE,
     YoutubePlaylistIE,
     YoutubeIE,
     YoutubeChannelIE,
@@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):
         self.assertEqual(len(entries), 25)
         self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
 
-    def test_youtube_channel(self):
-        dl = FakeYDL()
-        ie = YoutubeChannelIE(dl)
-        #test paginated channel
-        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
-        self.assertTrue(len(result['entries']) > 90)
-        #test autogenerated channel
-        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
-        self.assertTrue(len(result['entries']) >= 18)
-
-    def test_youtube_user(self):
-        dl = FakeYDL()
-        ie = YoutubeUserIE(dl)
-        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
-        self.assertTrue(len(result['entries']) >= 320)
-
-    def test_youtube_show(self):
-        dl = FakeYDL()
-        ie = YoutubeShowIE(dl)
-        result = ie.extract('http://www.youtube.com/show/airdisasters')
-        self.assertTrue(len(result) >= 3)
-
     def test_youtube_mix(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
@@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):
         entries = result['entries']
         self.assertEqual(len(entries), 100)
 
-    def test_youtube_toplist(self):
-        dl = FakeYDL()
-        ie = YoutubeTopListIE(dl)
-        result = ie.extract('yttoplist:music:Trending')
-        entries = result['entries']
-        self.assertTrue(len(entries) >= 5)
-
-    def test_youtube_search_url(self):
-        dl = FakeYDL()
-        ie = YoutubeSearchURLIE(dl)
-        result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
-        entries = result['entries']
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'youtube-dl test video')
-        self.assertTrue(len(entries) >= 5)
-
 if __name__ == '__main__':
     unittest.main()
index a1713dc5ad7d3e8f79fe15223a88300b9f4434fb..4a9610355aea878cb7daa68abd2f65e398cbee69 100755 (executable)
@@ -1250,12 +1250,13 @@ class YoutubeDL(object):
         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
         # To work around aforementioned issue we will replace request's original URL with
         # percent-encoded one
-        url = req if isinstance(req, compat_str) else req.get_full_url()
+        req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
+        url = req if req_is_string else req.get_full_url()
         url_escaped = escape_url(url)
 
         # Substitute URL if any change after escaping
         if url != url_escaped:
-            if isinstance(req, compat_str):
+            if req_is_string:
                 req = url_escaped
             else:
                 req = compat_urllib_request.Request(
index 20d7a57ce667b41fc8ae6d7ef1b4dd7bdbc803b6..7f2b4dfcc60ddada121b7b662a61fc10c62de580 100644 (file)
@@ -78,6 +78,7 @@ __authors__  = (
     'Hari Padmanaban',
     'Carlos Ramos',
     '5moufl',
+    'lenaten',
 )
 
 __license__ = 'Public Domain'
index 4ea5811a56f77180afea16b2e5fbc30056850b49..3f941596edd83edda99917b57187485941133e8f 100644 (file)
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 from .common import FileDownloader
 from .hls import HlsFD
+from .hls import NativeHlsFD
 from .http import HttpFD
 from .mplayer import MplayerFD
 from .rtmp import RtmpFD
@@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict):
 
     if url.startswith('rtmp'):
         return RtmpFD
+    if protocol == 'm3u8_native':
+        return NativeHlsFD
     if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
         return HlsFD
     if url.startswith('mms') or url.startswith('rtsp'):
index 9ce97f5fe6c4517ec8b6d681e924bcd387a185cf..f85f0c94e7544b6eabad3d1f4158f88b9e3c981d 100644 (file)
@@ -42,6 +42,7 @@ class FileDownloader(object):
     Subclasses of this one must re-define the real_download method.
     """
 
+    _TEST_FILE_SIZE = 10241
     params = None
 
     def __init__(self, ydl, params):
index 32852f333a0a6329f48ef2d690555d218d4228c5..68eafa403df4ad157feb13174584786205b41bea 100644 (file)
@@ -1,8 +1,13 @@
+from __future__ import unicode_literals
+
 import os
+import re
 import subprocess
 
 from .common import FileDownloader
 from ..utils import (
+    compat_urlparse,
+    compat_urllib_request,
     check_executable,
     encodeFilename,
 )
@@ -43,3 +48,57 @@ class HlsFD(FileDownloader):
             self.to_stderr(u"\n")
             self.report_error(u'%s exited with code %d' % (program, retval))
             return False
+
+
+class NativeHlsFD(FileDownloader):
+    """ A more limited implementation that does not require ffmpeg """
+
+    def real_download(self, filename, info_dict):
+        url = info_dict['url']
+        self.report_destination(filename)
+        tmpfilename = self.temp_name(filename)
+
+        self.to_screen(
+            '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
+        data = self.ydl.urlopen(url).read()
+        s = data.decode('utf-8', 'ignore')
+        segment_urls = []
+        for line in s.splitlines():
+            line = line.strip()
+            if line and not line.startswith('#'):
+                segment_url = (
+                    line
+                    if re.match(r'^https?://', line)
+                    else compat_urlparse.urljoin(url, line))
+                segment_urls.append(segment_url)
+
+        is_test = self.params.get('test', False)
+        remaining_bytes = self._TEST_FILE_SIZE if is_test else None
+        byte_counter = 0
+        with open(tmpfilename, 'wb') as outf:
+            for i, segurl in enumerate(segment_urls):
+                self.to_screen(
+                    '[hlsnative] %s: Downloading segment %d / %d' %
+                    (info_dict['id'], i + 1, len(segment_urls)))
+                seg_req = compat_urllib_request.Request(segurl)
+                if remaining_bytes is not None:
+                    seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
+
+                segment = self.ydl.urlopen(seg_req).read()
+                if remaining_bytes is not None:
+                    segment = segment[:remaining_bytes]
+                    remaining_bytes -= len(segment)
+                outf.write(segment)
+                byte_counter += len(segment)
+                if remaining_bytes is not None and remaining_bytes <= 0:
+                    break
+
+        self._hook_progress({
+            'downloaded_bytes': byte_counter,
+            'total_bytes': byte_counter,
+            'filename': filename,
+            'status': 'finished',
+        })
+        self.try_rename(tmpfilename, filename)
+        return True
+
index 6caf7451ed99a00511c56cc4a7c6cb7711601ff0..f62555ce0e33353f5eac848e3956b263f9d43bcb 100644 (file)
@@ -14,8 +14,6 @@ from ..utils import (
 
 
 class HttpFD(FileDownloader):
-    _TEST_FILE_SIZE = 10241
-
     def real_download(self, filename, info_dict):
         url = info_dict['url']
         tmpfilename = self.temp_name(filename)
index 71fe38ca00a8962ad45b9e43bfec0682019145fa..f815e6b91d974de72615c506dbe16d5386e72941 100644 (file)
@@ -262,6 +262,7 @@ from .nrk import (
 from .ntv import NTVIE
 from .nytimes import NYTimesIE
 from .nuvid import NuvidIE
+from .oktoberfesttv import OktoberfestTVIE
 from .ooyala import OoyalaIE
 from .orf import (
     ORFTVthekIE,
@@ -340,6 +341,7 @@ from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE, SpiegelArticleIE
 from .spiegeltv import SpiegeltvIE
 from .spike import SpikeIE
+from .sport5 import Sport5IE
 from .sportdeutschland import SportDeutschlandIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
@@ -451,6 +453,7 @@ from .yahoo import (
     YahooNewsIE,
     YahooSearchIE,
 )
+from .ynet import YnetIE
 from .youjizz import YouJizzIE
 from .youku import YoukuIE
 from .youporn import YouPornIE
index 7d89f44ee4e6c5a1607c0567aafa3e2172cebbb1..69f89320ce7e30a5fdf65acff01df1668620334f 100644 (file)
@@ -22,8 +22,7 @@ class ABCIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         urls_info_json = self._search_regex(
index bc64423a3f69cd3407701e7b02df333a4f7e628c..ad86d6e58a0d6ac739af711745e73787cc83289e 100644 (file)
@@ -35,7 +35,7 @@ class AnySexIE(InfoExtractor):
 
         title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
         description = self._html_search_regex(
-            r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
+            r'<div class="description"[^>]*>([^<]+)</div>', webpage, 'description', fatal=False)
         thumbnail = self._html_search_regex(
             r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
 
@@ -43,7 +43,7 @@ class AnySexIE(InfoExtractor):
             r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
 
         duration = parse_duration(self._search_regex(
-            r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False))
+            r'<b>Duration:</b> (?:<q itemprop="duration">)?(\d+:\d+)', webpage, 'duration', fatal=False))
         view_count = int_or_none(self._html_search_regex(
             r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
 
index 9c30a1d331064e6efed709a45a5c0755780ea26d..8d6a6f601e3a2d9316d6ce9fa08a1b2ac98bbf45 100644 (file)
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 import base64
+import datetime
 import hashlib
 import json
 import netrc
@@ -15,6 +16,7 @@ from ..utils import (
     compat_http_client,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
+    compat_urlparse,
     compat_str,
 
     clean_html,
@@ -163,6 +165,14 @@ class InfoExtractor(object):
             cls._VALID_URL_RE = re.compile(cls._VALID_URL)
         return cls._VALID_URL_RE.match(url) is not None
 
+    @classmethod
+    def _match_id(cls, url):
+        if '_VALID_URL_RE' not in cls.__dict__:
+            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+        m = cls._VALID_URL_RE.match(url)
+        assert m
+        return m.group('id')
+
     @classmethod
     def working(cls):
         """Getter method for _WORKING."""
@@ -640,7 +650,9 @@ class InfoExtractor(object):
 
         return formats
 
-    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
+    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+                              entry_protocol='m3u8', preference=None):
+
         formats = [{
             'format_id': 'm3u8-meta',
             'url': m3u8_url,
@@ -651,6 +663,11 @@ class InfoExtractor(object):
             'format_note': 'Quality selection URL',
         }]
 
+        format_url = lambda u: (
+            u
+            if re.match(r'^https?://', u)
+            else compat_urlparse.urljoin(m3u8_url, u))
+
         m3u8_doc = self._download_webpage(m3u8_url, video_id)
         last_info = None
         kv_rex = re.compile(
@@ -667,15 +684,17 @@ class InfoExtractor(object):
                 continue
             else:
                 if last_info is None:
-                    formats.append({'url': line})
+                    formats.append({'url': format_url(line)})
                     continue
                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
 
                 f = {
                     'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
-                    'url': line.strip(),
+                    'url': format_url(line.strip()),
                     'tbr': tbr,
                     'ext': ext,
+                    'protocol': entry_protocol,
+                    'preference': preference,
                 }
                 codecs = last_info.get('CODECS')
                 if codecs:
@@ -695,6 +714,12 @@ class InfoExtractor(object):
         self._sort_formats(formats)
         return formats
 
+    def _live_title(self, name):
+        """ Generate the title for a live video """
+        now = datetime.datetime.now()
+        now_str = now.strftime("%Y-%m-%d %H:%M")
+        return name + ' ' + now_str
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
index 4903764f7008ec6f22c054cda2c73557c44b2a91..f99888ecc378ea2a5404fe42d8d32a6a8c4093fb 100644 (file)
@@ -9,7 +9,7 @@ import xml.etree.ElementTree
 
 from hashlib import sha1
 from math import pow, sqrt, floor
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
     ExtractorError,
     compat_urllib_parse,
@@ -26,7 +26,7 @@ from ..aes import (
 )
 
 
-class CrunchyrollIE(InfoExtractor):
+class CrunchyrollIE(SubtitlesInfoExtractor):
     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
     _TEST = {
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -271,6 +271,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             else:
                 subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
 
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
         return {
             'id':          video_id,
             'title':       video_title,
index 4ba323148cc9e81e1e77c9ce12fba65ac8030638..2cba825325ad46caf931c5382c54dab263b7c15a 100644 (file)
@@ -1,4 +1,6 @@
 # encoding: utf-8
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -7,20 +9,20 @@ from ..utils import ExtractorError
 
 
 class EitbIE(InfoExtractor):
-    IE_NAME = u'eitb.tv'
+    IE_NAME = 'eitb.tv'
     _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
 
     _TEST = {
-        u'add_ie': ['Brightcove'],
-        u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
-        u'md5': u'edf4436247185adee3ea18ce64c47998',
-        u'info_dict': {
-            u'id': u'2743577154001',
-            u'ext': u'mp4',
-            u'title': u'60 minutos (Lasa y Zabala, 30 años)',
+        'add_ie': ['Brightcove'],
+        'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
+        'md5': 'edf4436247185adee3ea18ce64c47998',
+        'info_dict': {
+            'id': '2743577154001',
+            'ext': 'mp4',
+            'title': '60 minutos (Lasa y Zabala, 30 años)',
             # All videos from eitb has this description in the brightcove info
-            u'description': u'.',
-            u'uploader': u'Euskal Telebista',
+            'description': '.',
+            'uploader': 'Euskal Telebista',
         },
     }
 
@@ -30,7 +32,7 @@ class EitbIE(InfoExtractor):
         webpage = self._download_webpage(url, chapter_id)
         bc_url = BrightcoveIE._extract_brightcove_url(webpage)
         if bc_url is None:
-            raise ExtractorError(u'Could not extract the Brightcove url')
+            raise ExtractorError('Could not extract the Brightcove url')
         # The BrightcoveExperience object doesn't contain the video id, we set
         # it manually
         bc_url += '&%40videoPlayer={0}'.format(chapter_id)
index 14a196ffc63336ae7d016b035cfb28cc7f7d28a0..aacbf14141f6d5109d265b8e4dfa37883cee81ab 100644 (file)
@@ -7,6 +7,7 @@ from ..utils import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
+    str_to_int,
 )
 
 
@@ -20,6 +21,7 @@ class ExtremeTubeIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Music Video 14 british euro brit european cumshots swallow',
             'uploader': 'unknown',
+            'view_count': int,
             'age_limit': 18,
         }
     }, {
@@ -39,8 +41,12 @@ class ExtremeTubeIE(InfoExtractor):
         video_title = self._html_search_regex(
             r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
         uploader = self._html_search_regex(
-            r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
-            fatal=False)
+            r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
+            webpage, 'uploader', fatal=False)
+        view_count = str_to_int(self._html_search_regex(
+            r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
+            webpage, 'view count', fatal=False))
+
         video_url = compat_urllib_parse.unquote(self._html_search_regex(
             r'video_url=(.+?)&amp;', webpage, 'video_url'))
         path = compat_urllib_parse_urlparse(video_url).path
@@ -51,6 +57,7 @@ class ExtremeTubeIE(InfoExtractor):
             'id': video_id,
             'title': video_title,
             'uploader': uploader,
+            'view_count': view_count,
             'url': video_url,
             'format': format,
             'format_id': format,
index 21ea5ec2bf1c499149809971b780daa9d10d0291..e09982e88b913676a2f8c75946e79c82502bf650 100644 (file)
@@ -10,13 +10,13 @@ from ..utils import (
 
 
 class FlickrIE(InfoExtractor):
-    """Information Extractor for Flickr videos"""
-    _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
+    _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
     _TEST = {
         'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
-        'file': '5645318632.mp4',
         'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
         'info_dict': {
+            'id': '5645318632',
+            'ext': 'mp4',
             "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 
             "uploader_id": "forestwander-nature-pictures", 
             "title": "Dark Hollow Waterfalls"
@@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor):
             raise ExtractorError('Unable to extract video url')
         video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
 
-        return [{
-            'id':          video_id,
-            'url':         video_url,
-            'ext':         'mp4',
-            'title':       self._og_search_title(webpage),
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': self._og_search_title(webpage),
             'description': self._og_search_description(webpage),
-            'thumbnail':   self._og_search_thumbnail(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
             'uploader_id': video_uploader_id,
-        }]
+        }
index 40eeaad16d42acf089d1d426a055d4e5ce5624cf..367f930dd093567b1c033961fdf0326ded47dbba 100644 (file)
@@ -382,6 +382,19 @@ class GenericIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
         },
+        # Wistia embed
+        {
+            'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
+            'md5': '8788b683c777a5cf25621eaf286d0c23',
+            'info_dict': {
+                'id': '1cfaf6b7ea',
+                'ext': 'mov',
+                'title': 'md5:51364a8d3d009997ba99656004b5e20d',
+                'duration': 643.0,
+                'filesize': 182808282,
+                'uploader': 'education-portal.com',
+            },
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -584,7 +597,9 @@ class GenericIE(InfoExtractor):
 
         # Helper method
         def _playlist_from_matches(matches, getter, ie=None):
-            urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
+            urlrs = orderedSet(
+                self.url_result(self._proto_relative_url(getter(m)), ie)
+                for m in matches)
             return self.playlist_result(
                 urlrs, playlist_id=video_id, playlist_title=video_title)
 
@@ -629,11 +644,11 @@ class GenericIE(InfoExtractor):
             )
             (["\'])
                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
-                (?:embed|v)/.+?)
+                (?:embed|v|p)/.+?)
             \1''', webpage)
         if matches:
             return _playlist_from_matches(
-                matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
+                matches, lambda m: unescapeHTML(m[1]))
 
         # Look for embedded Dailymotion player
         matches = re.findall(
@@ -654,6 +669,16 @@ class GenericIE(InfoExtractor):
                 'title': video_title,
                 'id': video_id,
             }
+        match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
+        if match:
+            return {
+                '_type': 'url_transparent',
+                'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
+                'ie_key': 'Wistia',
+                'uploader': video_uploader,
+                'title': video_title,
+                'id': match.group('id')
+            }
 
         # Look for embedded blip.tv player
         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
index 3a938861bbfb38b70ee9ad18621769af8bf9c367..7cb6749bee4eda9b8767fa6d1eb246b8eb6f56e3 100644 (file)
@@ -1,7 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import datetime
 import json
 
 from .common import InfoExtractor
@@ -33,9 +32,7 @@ class MuenchenTVIE(InfoExtractor):
         display_id = 'live'
         webpage = self._download_webpage(url, display_id)
 
-        now = datetime.datetime.now()
-        now_str = now.strftime("%Y-%m-%d %H:%M")
-        title = self._og_search_title(webpage) + ' ' + now_str
+        title = self._live_title(self._og_search_title(webpage))
 
         data_js = self._search_regex(
             r'(?s)\nplaylist:\s*(\[.*?}\]),related:',
index f53596f5e12424fd7fcf13c4cfb17cc69540c742..963c4587c88c26c02e479ee1c3764bb2bd46269c 100644 (file)
@@ -17,7 +17,7 @@ class NFLIE(InfoExtractor):
     _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json'
     _TEST = {
         'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
-        'skip_download': True,  # md5 sum fluctuates
+        # 'md5': '5eb8c40a727dda106d510e5d6ffa79e5',  # md5 checksum fluctuates
         'info_dict': {
             'id': '0ap3000000398478',
             'ext': 'mp4',
diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py
new file mode 100644 (file)
index 0000000..4a41c05
--- /dev/null
@@ -0,0 +1,47 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class OktoberfestTVIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)'
+
+    _TEST = {
+        'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt',
+        'info_dict': {
+            'id': 'hb-zelt',
+            'ext': 'mp4',
+            'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._live_title(self._html_search_regex(
+            r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title'))
+
+        clip = self._search_regex(
+            r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip')
+        ncurl = self._search_regex(
+            r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base')
+        video_url = ncurl + clip
+        thumbnail = self._search_regex(
+            r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage,
+            'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'ext': 'mp4',
+            'is_live': True,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py
new file mode 100644 (file)
index 0000000..3f680bf
--- /dev/null
@@ -0,0 +1,92 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class Sport5IE(InfoExtractor):
+    _VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
+            'info_dict': {
+                'id': 's5-Y59xx1-GUh2',
+                'ext': 'mp4',
+                'title': 'ולנסיה-קורדובה 0:3',
+                'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה',
+                'duration': 228,
+                'categories': list,
+            },
+            'skip': 'Blocked outside of Israel',
+        }, {
+            'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE',
+            'info_dict': {
+                'id': 's5-SiXxx1-hKh2',
+                'ext': 'mp4',
+                'title': 'GOALS_CELTIC_270914.mp4',
+                'description': '',
+                'duration': 87,
+                'categories': list,
+            },
+            'skip': 'Blocked outside of Israel',
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        media_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, media_id)
+
+        video_id = self._html_search_regex('clipId=([\w-]+)', webpage, 'video id')
+
+        metadata = self._download_xml(
+            'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id,
+            video_id)
+
+        error = metadata.find('./Error')
+        if error is not None:
+            raise ExtractorError(
+                '%s returned error: %s - %s' % (
+                    self.IE_NAME,
+                    error.find('./Name').text,
+                    error.find('./Description').text),
+                expected=True)
+
+        title = metadata.find('./Title').text
+        description = metadata.find('./Description').text
+        duration = int(metadata.find('./Duration').text)
+
+        posters_el = metadata.find('./PosterLinks')
+        thumbnails = [{
+            'url': thumbnail.text,
+            'width': int(thumbnail.get('width')),
+            'height': int(thumbnail.get('height')),
+        } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else []
+
+        categories_el = metadata.find('./Categories')
+        categories = [
+            cat.get('name') for cat in categories_el.findall('./Category')
+        ] if categories_el is not None else []
+
+        formats = [{
+            'url': fmt.text,
+            'ext': 'mp4',
+            'vbr': int(fmt.get('bitrate')),
+            'width': int(fmt.get('width')),
+            'height': int(fmt.get('height')),
+        } for fmt in metadata.findall('./PlaybackLinks/FileURL')]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnails': thumbnails,
+            'duration': duration,
+            'categories': categories,
+            'formats': formats,
+        }
\ No newline at end of file
index df115d2516196517b1a66fe2fb12bd1fbf96b6e5..ebd64f0f54df23fca1d243e32dc8071fcfcbfb1d 100644 (file)
@@ -19,7 +19,7 @@ class Vbox7IE(InfoExtractor):
         'md5': '99f65c0c9ef9b682b97313e052734c3f',
         'info_dict': {
             'id': '249bb972c2',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Смях! Чудо - чист за секунди - Скрита камера',
         },
     }
@@ -50,7 +50,6 @@ class Vbox7IE(InfoExtractor):
         return {
             'id': video_id,
             'url': final_url,
-            'ext': 'flv',
             'title': title,
             'thumbnail': thumbnail_url,
         }
index d2ffd1b6ba893f2cb2cc50f00a3131a835dba97d..ebab8b86c6f5727795d54b234ee4adda7258cf39 100644 (file)
@@ -6,6 +6,7 @@ import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
     compat_HTTPError,
+    compat_urllib_request,
     ExtractorError,
 )
 
@@ -24,7 +25,7 @@ class VevoIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
-        "md5": "06bea460acb744eab74a9d7dcb4bfd61",
+        "md5": "95ee28ee45e70130e3ab02b0f579ae23",
         'info_dict': {
             'id': 'GB1101300280',
             'ext': 'mp4',
@@ -40,7 +41,7 @@ class VevoIE(InfoExtractor):
     }, {
         'note': 'v3 SMIL format',
         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
-        'md5': '893ec0e0d4426a1d96c01de8f2bdff58',
+        'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
         'info_dict': {
             'id': 'USUV71302923',
             'ext': 'mp4',
@@ -69,6 +70,21 @@ class VevoIE(InfoExtractor):
     }]
     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
 
+    def _real_initialize(self):
+        req = compat_urllib_request.Request(
+            'http://www.vevo.com/auth', data=b'')
+        webpage = self._download_webpage(
+            req, None,
+            note='Retrieving oauth token',
+            errnote='Unable to retrieve oauth token',
+            fatal=False)
+        if webpage is False:
+            self._oauth_token = None
+        else:
+            self._oauth_token = self._search_regex(
+                r'access_token":\s*"([^"]+)"',
+                webpage, 'access token', fatal=False)
+
     def _formats_from_json(self, video_info):
         last_version = {'version': -1}
         for version in video_info['videoVersions']:
@@ -129,6 +145,26 @@ class VevoIE(InfoExtractor):
             })
         return formats
 
+    def _download_api_formats(self, video_id):
+        if not self._oauth_token:
+            self._downloader.report_warning(
+                'No oauth token available, skipping API HLS download')
+            return []
+
+        api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
+            video_id, self._oauth_token)
+        api_data = self._download_json(
+            api_url, video_id,
+            note='Downloading HLS formats',
+            errnote='Failed to download HLS format list', fatal=False)
+        if api_data is None:
+            return []
+
+        m3u8_url = api_data[0]['url']
+        return self._extract_m3u8_formats(
+            m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
+            preference=0)
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
@@ -152,30 +188,8 @@ class VevoIE(InfoExtractor):
         else:
             age_limit = None
 
-        # Download SMIL
-        smil_blocks = sorted((
-            f for f in video_info['videoVersions']
-            if f['sourceType'] == 13),
-            key=lambda f: f['version'])
-
-        smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
-            self._SMIL_BASE_URL, video_id, video_id.lower())
-        if smil_blocks:
-            smil_url_m = self._search_regex(
-                r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
-                fatal=False)
-            if smil_url_m is not None:
-                smil_url = smil_url_m
-
-        try:
-            smil_xml = self._download_webpage(smil_url, video_id,
-                                              'Downloading SMIL info')
-            formats.extend(self._formats_from_smil(smil_xml))
-        except ExtractorError as ee:
-            if not isinstance(ee.cause, compat_HTTPError):
-                raise
-            self._downloader.report_warning(
-                'Cannot download SMIL information, falling back to JSON ..')
+        # Download via HLS API
+        formats.extend(self._download_api_formats(video_id))
 
         self._sort_formats(formats)
         timestamp_ms = int(self._search_regex(
index 2544c24bd16e4af7b3fba806dff1b87530759f82..1b2f731e932a63fbc7722251c0b4e57f0963c34c 100644 (file)
@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..utils import (
     int_or_none,
     compat_str,
+    ExtractorError,
 )
 
 
@@ -16,6 +17,24 @@ class VubeIE(InfoExtractor):
 
     _TESTS = [
         {
+            'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s',
+            'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42',
+            'info_dict': {
+                'id': 'Y8NUZ69Tf7',
+                'ext': 'mp4',
+                'title': 'Best Drummer Ever [HD]',
+                'description': 'md5:2d63c4b277b85c2277761c2cf7337d71',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'uploader': 'William',
+                'timestamp': 1406876915,
+                'upload_date': '20140801',
+                'duration': 258.051,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+                'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'],
+            },
+        }, {
             'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
             'md5': 'db7aba89d4603dadd627e9d1973946fe',
             'info_dict': {
@@ -32,7 +51,8 @@ class VubeIE(InfoExtractor):
                 'dislike_count': int,
                 'comment_count': int,
                 'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
-            }
+            },
+            'skip': 'Removed due to DMCA',
         },
         {
             'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1',
@@ -51,7 +71,8 @@ class VubeIE(InfoExtractor):
                 'dislike_count': int,
                 'comment_count': int,
                 'categories': ['seraina', 'jessica', 'krewella', 'alive'],
-            }
+            },
+            'skip': 'Removed due to DMCA',
         }, {
             'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
             'md5': '0584fc13b50f887127d9d1007589d27f',
@@ -69,7 +90,8 @@ class VubeIE(InfoExtractor):
                 'dislike_count': int,
                 'comment_count': int,
                 'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
-            }
+            },
+            'skip': 'Removed due to DMCA',
         }
     ]
 
@@ -102,6 +124,11 @@ class VubeIE(InfoExtractor):
 
         self._sort_formats(formats)
 
+        if not formats and video.get('vst') == 'dmca':
+            raise ExtractorError(
+                'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.',
+                expected=True)
+
         title = video['title']
         description = video.get('description')
         thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
index 268e2f6183405117b7fd10f6037857fa6ce8cf10..bf9e40bad7c29e01b231b2ecf9e0cbb53295d52f 100644 (file)
@@ -40,6 +40,7 @@ class WatIE(InfoExtractor):
                 'upload_date': '20140816',
                 'duration': 2910,
             },
+            'skip': "Ce contenu n'est pas disponible pour l'instant.",
         },
     ]
 
index e6bfa9e147a2b62e6dbcdd52343675bb9ca52a65..748443f811f184d4276d4628cd13ed1e2bf92d9c 100644 (file)
@@ -1,13 +1,14 @@
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
+from ..utils import ExtractorError, compat_urllib_request
 
 
 class WistiaIE(InfoExtractor):
     _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
+    _API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json'
 
     _TEST = {
         'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
@@ -24,11 +25,13 @@ class WistiaIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
-        webpage = self._download_webpage(url, video_id)
-        data_json = self._html_search_regex(
-            r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data')
-
-        data = json.loads(data_json)
+        request = compat_urllib_request.Request(self._API_URL.format(video_id))
+        request.add_header('Referer', url)  # Some videos require this.
+        data_json = self._download_json(request, video_id)
+        if data_json.get('error'):
+            raise ExtractorError('Error while getting the playlist',
+                                 expected=True)
+        data = data_json['media']
 
         formats = []
         thumbnails = []
diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py
new file mode 100644 (file)
index 0000000..2487286
--- /dev/null
@@ -0,0 +1,54 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import compat_urllib_parse
+
+
+class YnetIE(InfoExtractor):
+    _VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
+    _TESTS = [
+        {
+            'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
+            'md5': '002b44ee2f33d50363a1c153bed524cf',
+            'info_dict': {
+                'id': 'L-11659-99244',
+                'ext': 'flv',
+                'title': 'איש לא יודע מאיפה באנו',
+                'thumbnail': 're:^https?://.*\.jpg',
+            }
+        }, {
+            'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
+            'md5': '6455046ae1b48cf7e2b7cae285e53a16',
+            'info_dict': {
+                'id': 'L-8859-84418',
+                'ext': 'flv',
+                'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין",
+                'thumbnail': 're:^https?://.*\.jpg',
+            }
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        
+        webpage = self._download_webpage(url, video_id)
+
+        content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage))
+        config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
+        f4m_url = config['clip']['url']
+        title = self._og_search_title(webpage)
+        m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
+        if m:
+            title = m.group('title')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': self._extract_f4m_formats(f4m_url, video_id),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
\ No newline at end of file
index a8fd40c833fb7707eb1cd8760c288da5f2299025..07ed7cbd19ae400ce91e201b58f7fc8e3763834c 100644 (file)
@@ -1,5 +1,7 @@
 # coding: utf-8
 
+from __future__ import unicode_literals
+
 import json
 import math
 import random
@@ -13,18 +15,25 @@ from ..utils import (
 
 
 class YoukuIE(InfoExtractor):
-    _VALID_URL =  r'(?:(?:http://)?(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|youku:)(?P<ID>[A-Za-z0-9]+)(?:\.html|/v\.swf|)'
-    _TEST =   {
-        u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
-        u"file": u"XNDgyMDQ2NTQw_part00.flv",
-        u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
-        u"params": {u"test": False},
-        u"info_dict": {
-            u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
+    _VALID_URL = r'''(?x)
+        (?:
+            http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
+            youku:)
+        (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
+    '''
+    _TEST = {
+        'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html',
+        'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b',
+        'params': {
+            'test': False
+        },
+        'info_dict': {
+            'id': 'XNDgyMDQ2NTQw_part00',
+            'ext': 'flv',
+            'title': 'youtube-dl test video "\'/\\ä↭𝕐'
         }
     }
 
-
     def _gen_sid(self):
         nowTime = int(time.time() * 1000)
         random1 = random.randint(1000,1998)
@@ -55,49 +64,42 @@ class YoukuIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        video_id = mobj.group('ID')
+        video_id = mobj.group('id')
 
         info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
 
-        jsondata = self._download_webpage(info_url, video_id)
-
-        self.report_extraction(video_id)
-        try:
-            config = json.loads(jsondata)
-            error_code = config['data'][0].get('error_code')
-            if error_code:
-                # -8 means blocked outside China.
-                error = config['data'][0].get('error')  # Chinese and English, separated by newline.
-                raise ExtractorError(error or u'Server reported error %i' % error_code,
-                    expected=True)
-
-            video_title =  config['data'][0]['title']
-            seed = config['data'][0]['seed']
-
-            format = self._downloader.params.get('format', None)
-            supported_format = list(config['data'][0]['streamfileids'].keys())
-
-            if format is None or format == 'best':
-                if 'hd2' in supported_format:
-                    format = 'hd2'
-                else:
-                    format = 'flv'
-                ext = u'flv'
-            elif format == 'worst':
-                format = 'mp4'
-                ext = u'mp4'
-            else:
-                format = 'flv'
-                ext = u'flv'
+        config = self._download_json(info_url, video_id)
+
+        error_code = config['data'][0].get('error_code')
+        if error_code:
+            # -8 means blocked outside China.
+            error = config['data'][0].get('error')  # Chinese and English, separated by newline.
+            raise ExtractorError(error or 'Server reported error %i' % error_code,
+                expected=True)
 
+        video_title = config['data'][0]['title']
+        seed = config['data'][0]['seed']
 
-            fileid = config['data'][0]['streamfileids'][format]
-            keys = [s['k'] for s in config['data'][0]['segs'][format]]
-            # segs is usually a dictionary, but an empty *list* if an error occured.
-        except (UnicodeDecodeError, ValueError, KeyError):
-            raise ExtractorError(u'Unable to extract info section')
+        format = self._downloader.params.get('format', None)
+        supported_format = list(config['data'][0]['streamfileids'].keys())
+
+        # TODO proper format selection
+        if format is None or format == 'best':
+            if 'hd2' in supported_format:
+                format = 'hd2'
+            else:
+                format = 'flv'
+            ext = 'flv'
+        elif format == 'worst':
+            format = 'mp4'
+            ext = 'mp4'
+        else:
+            format = 'flv'
+            ext = 'flv'
+
+        fileid = config['data'][0]['streamfileids'][format]
+        keys = [s['k'] for s in config['data'][0]['segs'][format]]
+        # segs is usually a dictionary, but an empty *list* if an error occured.
 
         files_info=[]
         sid = self._gen_sid()
@@ -106,9 +108,8 @@ class YoukuIE(InfoExtractor):
         #column 8,9 of fileid represent the segment number
         #fileid[7:9] should be changed
         for index, key in enumerate(keys):
-
             temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
-            download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
+            download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
 
             info = {
                 'id': '%s_part%02d' % (video_id, index),
index b54c69122afb1265acc545cd79daa8ffd09a1752..99198e38092a8ed507b8e44aae41677e7ce17e17 100644 (file)
@@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     def _set_language(self):
         return bool(self._download_webpage(
             self._LANG_URL, None,
-            note=u'Setting language', errnote='unable to set language',
+            note='Setting language', errnote='unable to set language',
             fatal=False))
 
     def _login(self):
@@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         # No authentication to be performed
         if username is None:
             if self._LOGIN_REQUIRED:
-                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
             return True
 
         login_page = self._download_webpage(
             self._LOGIN_URL, None,
-            note=u'Downloading login page',
-            errnote=u'unable to fetch login page', fatal=False)
+            note='Downloading login page',
+            errnote='unable to fetch login page', fatal=False)
         if login_page is False:
             return
 
@@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
         login_results = self._download_webpage(
             req, None,
-            note=u'Logging in', errnote=u'unable to log in', fatal=False)
+            note='Logging in', errnote='unable to log in', fatal=False)
         if login_results is False:
             return False
 
         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
-            raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
+            raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 
         # Two-Factor
         # TODO add SMS and phone call support - these require making a request and then prompting the user
@@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             tfa_code = self._get_tfa_info()
 
             if tfa_code is None:
-                self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>')
-                self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
+                self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
+                self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
                 return False
 
             # Unlike the first login form, secTok and timeStmp are both required for the TFA form
 
             match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
             if match is None:
-                self._downloader.report_warning(u'Failed to get secTok - did the page structure change?')
+                self._downloader.report_warning('Failed to get secTok - did the page structure change?')
             secTok = match.group(1)
             match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
             if match is None:
-                self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?')
+                self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
             timeStmp = match.group(1)
 
             tfa_form_strs = {
@@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
             tfa_results = self._download_webpage(
                 tfa_req, None,
-                note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False)
+                note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
 
             if tfa_results is False:
                 return False
 
             if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
-                self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.')
+                self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
                 return False
             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
-                self._downloader.report_warning(u'unable to log in - did the page structure change?')
+                self._downloader.report_warning('unable to log in - did the page structure change?')
                 return False
             if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
-                self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
+                self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
                 return False
 
         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
-            self._downloader.report_warning(u'unable to log in: bad username or password')
+            self._downloader.report_warning('unable to log in: bad username or password')
             return False
         return True
 
@@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
         self._download_webpage(
             req, None,
-            note=u'Confirming age', errnote=u'Unable to confirm age')
+            note='Confirming age', errnote='Unable to confirm age')
         return True
 
     def _real_initialize(self):
@@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:                                                  # the various things that can precede the ID:
-                             (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
+                             (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
                              |(?:                                             # or the v= param in all its forms
                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
@@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
     IE_NAME = 'youtube'
     _TESTS = [
         {
-            u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
-            u"file":  u"BaW_jenozKc.mp4",
-            u"info_dict": {
-                u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
-                u"uploader": u"Philipp Hagemeister",
-                u"uploader_id": u"phihag",
-                u"upload_date": u"20121002",
-                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
-                u"categories": [u'Science & Technology'],
+            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
+            'info_dict': {
+                'id': 'BaW_jenozKc',
+                'ext': 'mp4',
+                'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+                'uploader': 'Philipp Hagemeister',
+                'uploader_id': 'phihag',
+                'upload_date': '20121002',
+                'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
+                'categories': ['Science & Technology'],
                 'like_count': int,
                 'dislike_count': int,
             }
         },
         {
-            u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
-            u"file":  u"UxxajLWwzqY.mp4",
-            u"note": u"Test generic use_cipher_signature video (#897)",
-            u"info_dict": {
-                u"upload_date": u"20120506",
-                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
-                u"uploader": u"Icona Pop",
-                u"uploader_id": u"IconaPop"
+            'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
+            'note': 'Test generic use_cipher_signature video (#897)',
+            'info_dict': {
+                'id': 'UxxajLWwzqY',
+                'ext': 'mp4',
+                'upload_date': '20120506',
+                'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
+                'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
+                'uploader': 'Icona Pop',
+                'uploader_id': 'IconaPop',
             }
         },
         {
-            u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
-            u"file":  u"07FYdnEawAQ.mp4",
-            u"note": u"Test VEVO video with age protection (#956)",
-            u"info_dict": {
-                u"upload_date": u"20130703",
-                u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
-                u"description": u"md5:64249768eec3bc4276236606ea996373",
-                u"uploader": u"justintimberlakeVEVO",
-                u"uploader_id": u"justintimberlakeVEVO"
+            'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
+            'note': 'Test VEVO video with age protection (#956)',
+            'info_dict': {
+                'id': '07FYdnEawAQ',
+                'ext': 'mp4',
+                'upload_date': '20130703',
+                'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+                'description': 'md5:64249768eec3bc4276236606ea996373',
+                'uploader': 'justintimberlakeVEVO',
+                'uploader_id': 'justintimberlakeVEVO',
             }
         },
         {
-            u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
-            u"file":  u"yZIXLfi8CZQ.mp4",
-            u"note": u"Embed-only video (#1746)",
-            u"info_dict": {
-                u"upload_date": u"20120608",
-                u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
-                u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
-                u"uploader": u"SET India",
-                u"uploader_id": u"setindia"
+            'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
+            'note': 'Embed-only video (#1746)',
+            'info_dict': {
+                'id': 'yZIXLfi8CZQ',
+                'ext': 'mp4',
+                'upload_date': '20120608',
+                'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
+                'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
+                'uploader': 'SET India',
+                'uploader_id': 'setindia'
             }
         },
         {
-            u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
-            u"file": u"a9LDPn-MO4I.m4a",
-            u"note": u"256k DASH audio (format 141) via DASH manifest",
-            u"info_dict": {
-                u"upload_date": "20121002",
-                u"uploader_id": "8KVIDEO",
-                u"description": '',
-                u"uploader": "8KVIDEO",
-                u"title": "UHDTV TEST 8K VIDEO.mp4"
+            'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
+            'note': '256k DASH audio (format 141) via DASH manifest',
+            'info_dict': {
+                'id': 'a9LDPn-MO4I',
+                'ext': 'm4a',
+                'upload_date': '20121002',
+                'uploader_id': '8KVIDEO',
+                'description': '',
+                'uploader': '8KVIDEO',
+                'title': 'UHDTV TEST 8K VIDEO.mp4'
             },
-            u"params": {
-                u"youtube_include_dash_manifest": True,
-                u"format": "141",
+            'params': {
+                'youtube_include_dash_manifest': True,
+                'format': '141',
             },
         },
         # DASH manifest with encrypted signature
@@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'uploader_id': 'AfrojackVEVO',
                 'upload_date': '20131011',
             },
-            u"params": {
+            'params': {
                 'youtube_include_dash_manifest': True,
                 'format': '141',
             },
@@ -397,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def report_video_info_webpage_download(self, video_id):
         """Report attempt to download video info webpage."""
-        self.to_screen(u'%s: Downloading video info webpage' % video_id)
+        self.to_screen('%s: Downloading video info webpage' % video_id)
 
     def report_information_extraction(self, video_id):
         """Report attempt to extract video information."""
-        self.to_screen(u'%s: Extracting video information' % video_id)
+        self.to_screen('%s: Extracting video information' % video_id)
 
     def report_unavailable_format(self, video_id, format):
         """Report extracted video URL."""
-        self.to_screen(u'%s: Format %s not available' % (video_id, format))
+        self.to_screen('%s: Format %s not available' % (video_id, format))
 
     def report_rtmp_download(self):
         """Indicate the download will use the RTMP protocol."""
-        self.to_screen(u'RTMP download detected')
+        self.to_screen('RTMP download detected')
 
     def _signature_cache_id(self, example_sig):
         """ Return a string representation of a signature """
@@ -429,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             player_type, player_id, self._signature_cache_id(example_sig))
         assert os.path.basename(func_id) == func_id
 
-        cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
+        cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
         if cache_spec is not None:
             return lambda s: ''.join(s[i] for i in cache_spec)
 
         if player_type == 'js':
             code = self._download_webpage(
                 player_url, video_id,
-                note=u'Downloading %s player %s' % (player_type, player_id),
-                errnote=u'Download of %s failed' % player_url)
+                note='Downloading %s player %s' % (player_type, player_id),
+                errnote='Download of %s failed' % player_url)
             res = self._parse_sig_js(code)
         elif player_type == 'swf':
             urlh = self._request_webpage(
                 player_url, video_id,
-                note=u'Downloading %s player %s' % (player_type, player_id),
-                errnote=u'Download of %s failed' % player_url)
+                note='Downloading %s player %s' % (player_type, player_id),
+                errnote='Download of %s failed' % player_url)
             code = urlh.read()
             res = self._parse_sig_swf(code)
         else:
@@ -454,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             cache_res = res(test_string)
             cache_spec = [ord(c) for c in cache_res]
 
-        self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
+        self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
         return res
 
     def _print_sig_code(self, func, example_sig):
         def gen_sig_code(idxs):
             def _genslice(start, end, step):
                 starts = '' if start == 0 else str(start)
-                ends = (u':%d' % (end+step)) if end + step >= 0 else ':'
-                steps = '' if step == 1 else (u':%d' % step)
+                ends = (':%d' % (end+step)) if end + step >= 0 else ':'
+                steps = '' if step == 1 else (':%d' % step)
                 return 's[%s%s%s]' % (starts, ends, steps)
 
             step = None
@@ -492,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         expr_code = ' + '.join(gen_sig_code(cache_spec))
         signature_id_tuple = '(%s)' % (
             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
-        code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+        code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
                 '    return %s\n') % (signature_id_tuple, expr_code)
-        self.to_screen(u'Extracted signature function:\n' + code)
+        self.to_screen('Extracted signature function:\n' + code)
 
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
@@ -516,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         """Turn the encrypted s field into a working signature"""
 
         if player_url is None:
-            raise ExtractorError(u'Cannot decrypt signature without player_url')
+            raise ExtractorError('Cannot decrypt signature without player_url')
 
-        if player_url.startswith(u'//'):
+        if player_url.startswith('//'):
             player_url = 'https:' + player_url
         try:
             player_id = (player_url, self._signature_cache_id(s))
@@ -542,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
                 video_id, note=False)
         except ExtractorError as err:
-            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
             return {}
         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
 
@@ -560,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             url = 'https://www.youtube.com/api/timedtext?' + params
             sub_lang_list[lang] = url
         if not sub_lang_list:
-            self._downloader.report_warning(u'video doesn\'t have subtitles')
+            self._downloader.report_warning('video doesn\'t have subtitles')
             return {}
         return sub_lang_list
 
@@ -568,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         """We need the webpage for getting the captions url, pass it as an
            argument to speed up the process."""
         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
-        self.to_screen(u'%s: Looking for automatic captions' % video_id)
+        self.to_screen('%s: Looking for automatic captions' % video_id)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
         if mobj is None:
@@ -589,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             caption_list = self._download_xml(list_url, video_id)
             original_lang_node = caption_list.find('track')
             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
-                self._downloader.report_warning(u'Video doesn\'t have automatic captions')
+                self._downloader.report_warning('Video doesn\'t have automatic captions')
                 return {}
             original_lang = original_lang_node.attrib['lang_code']
 
@@ -615,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
     def extract_id(cls, url):
         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
         if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
         video_id = mobj.group(2)
         return video_id
 
@@ -635,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _extract_annotations(self, video_id):
         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
-        return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
+        return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
 
     def _real_extract(self, url):
         proto = (
@@ -705,14 +710,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # Check for "rental" videos
         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
-            raise ExtractorError(u'"rental" videos not supported')
+            raise ExtractorError('"rental" videos not supported')
 
         # Start extracting information
         self.report_information_extraction(video_id)
 
         # uploader
         if 'author' not in video_info:
-            raise ExtractorError(u'Unable to extract uploader name')
+            raise ExtractorError('Unable to extract uploader name')
         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
 
         # uploader_id
@@ -721,13 +726,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         if mobj is not None:
             video_uploader_id = mobj.group(1)
         else:
-            self._downloader.report_warning(u'unable to extract uploader nickname')
+            self._downloader.report_warning('unable to extract uploader nickname')
 
         # title
         if 'title' in video_info:
             video_title = video_info['title'][0]
         else:
-            self._downloader.report_warning(u'Unable to extract video title')
+            self._downloader.report_warning('Unable to extract video title')
             video_title = '_'
 
         # thumbnail image
@@ -737,7 +742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         if m_thumb is not None:
             video_thumbnail = m_thumb.group(1)
         elif 'thumbnail_url' not in video_info:
-            self._downloader.report_warning(u'unable to extract video thumbnail')
+            self._downloader.report_warning('unable to extract video thumbnail')
             video_thumbnail = None
         else:   # don't panic if we can't find it
             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
@@ -791,8 +796,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             if count is not None:
                 return int(count.replace(',', ''))
             return None
-        like_count = _extract_count(u'like')
-        dislike_count = _extract_count(u'dislike')
+        like_count = _extract_count('like')
+        dislike_count = _extract_count('dislike')
 
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, video_webpage)
@@ -802,7 +807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             return
 
         if 'length_seconds' not in video_info:
-            self._downloader.report_warning(u'unable to extract video duration')
+            self._downloader.report_warning('unable to extract video duration')
             video_duration = None
         else:
             video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
@@ -823,11 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
             # this signatures are encrypted
             if 'url_encoded_fmt_stream_map' not in args:
-                raise ValueError(u'No stream_map present')  # caught below
+                raise ValueError('No stream_map present')  # caught below
             re_signature = re.compile(r'[&,]s=')
             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
             if m_s is not None:
-                self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
+                self.to_screen('%s: Encrypted signatures detected.' % video_id)
                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
             m_s = re_signature.search(args.get('adaptive_fmts', ''))
             if m_s is not None:
@@ -905,7 +910,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                                 player_desc = 'html5 player %s' % player_version
 
                         parts_sizes = self._signature_cache_id(encrypted_sig)
-                        self.to_screen(u'{%s} signature length %s, %s' %
+                        self.to_screen('{%s} signature length %s, %s' %
                             (format_id, parts_sizes, player_desc))
 
                     signature = self._decrypt_signature(
@@ -920,7 +925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             url_map = self._extract_from_m3u8(manifest_url, video_id)
             formats = _map_to_format_list(url_map)
         else:
-            raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
+            raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
 
         # Look for the DASH manifest
         if (self._downloader.params.get('youtube_include_dash_manifest', False)):
@@ -941,9 +946,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
                 dash_doc = self._download_xml(
                     dash_manifest_url, video_id,
-                    note=u'Downloading DASH manifest',
-                    errnote=u'Could not download DASH manifest')
-                for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+                    note='Downloading DASH manifest',
+                    errnote='Could not download DASH manifest')
+                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
                     url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
                     if url_el is None:
                         continue
@@ -969,7 +974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                         existing_format.update(f)
 
             except (ExtractorError, KeyError) as e:
-                self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
+                self.report_warning('Skipping DASH manifest: %s' % e, video_id)
 
         self._sort_formats(formats)
 
@@ -1000,7 +1005,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                         (?:\w+\.)?
                         youtube\.com/
                         (?:
-                           (?:course|view_play_list|my_playlists|artist|playlist|watch)
+                           (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
                            \? (?:.*?&)*? (?:p|a|list)=
                         |  p/
                         )
@@ -1056,6 +1061,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
             'title': 'YDL_safe_search',
         },
         'playlist_count': 2,
+    }, {
+        'note': 'embedded',
+        'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+        'playlist_count': 4,
+        'info_dict': {
+            'title': 'JODA15',
+        }
+    }, {
+        'note': 'Embedded SWF player',
+        'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
+        'playlist_count': 4,
+        'info_dict': {
+            'title': 'JODA7',
+        }
     }]
 
     def _real_initialize(self):
@@ -1090,7 +1109,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         # Extract playlist id
         mobj = re.match(self._VALID_URL, url)
         if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
         playlist_id = mobj.group(1) or mobj.group(2)
 
         # Check if it's a video-specific URL
@@ -1098,16 +1117,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         if 'v' in query_dict:
             video_id = query_dict['v'][0]
             if self._downloader.params.get('noplaylist'):
-                self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
+                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
                 return self.url_result(video_id, 'Youtube', video_id=video_id)
             else:
-                self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
 
         if playlist_id.startswith('RD'):
             # Mixes require a custom extraction process
             return self._extract_mix(playlist_id)
         if playlist_id.startswith('TL'):
-            raise ExtractorError(u'For downloading YouTube.com top lists, use '
+            raise ExtractorError('For downloading YouTube.com top lists, use '
                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
 
         url = self._TEMPLATE_URL % playlist_id
@@ -1152,19 +1171,28 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
 
 class YoutubeTopListIE(YoutubePlaylistIE):
     IE_NAME = 'youtube:toplist'
-    IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
+    IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
         ' (Example: "yttoplist:music:Top Tracks")')
     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
-    _TESTS = []
+    _TESTS = [{
+        'url': 'yttoplist:music:Trending',
+        'playlist_mincount': 5,
+        'skip': 'Only works for logged-in users',
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         channel = mobj.group('chann')
         title = mobj.group('title')
         query = compat_urllib_parse.urlencode({'title': title})
-        playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
-        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
-        link = self._html_search_regex(playlist_re, channel_page, 'list')
+        channel_page = self._download_webpage(
+            'https://www.youtube.com/%s' % channel, title)
+        link = self._html_search_regex(
+            r'''(?x)
+                <a\s+href="([^"]+)".*?>\s*
+                <span\s+class="branded-page-module-title-text">\s*
+                <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
+            channel_page, 'list')
         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
         
         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
@@ -1190,6 +1218,11 @@ class YoutubeChannelIE(InfoExtractor):
     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
     IE_NAME = 'youtube:channel'
+    _TESTS = [{
+        'note': 'paginated channel',
+        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+        'playlist_mincount': 91,
+    }]
 
     def extract_videos_from_page(self, page):
         ids_in_page = []
@@ -1202,7 +1235,7 @@ class YoutubeChannelIE(InfoExtractor):
         # Extract channel id
         mobj = re.match(self._VALID_URL, url)
         if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
 
         # Download channel page
         channel_id = mobj.group(1)
@@ -1224,7 +1257,7 @@ class YoutubeChannelIE(InfoExtractor):
             for pagenum in itertools.count(1):
                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
                 page = self._download_json(
-                    url, channel_id, note=u'Downloading page #%s' % pagenum,
+                    url, channel_id, note='Downloading page #%s' % pagenum,
                     transform_source=uppercase_escape)
 
                 ids_in_page = self.extract_videos_from_page(page['content_html'])
@@ -1233,7 +1266,7 @@ class YoutubeChannelIE(InfoExtractor):
                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
                     break
 
-        self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
+        self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
 
         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
                        for video_id in video_ids]
@@ -1248,6 +1281,17 @@ class YoutubeUserIE(InfoExtractor):
     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
     IE_NAME = 'youtube:user'
 
+    _TESTS = [{
+        'url': 'https://www.youtube.com/user/TheLinuxFoundation',
+        'playlist_mincount': 320,
+        'info_dict': {
+            'title': 'TheLinuxFoundation',
+        }
+    }, {
+        'url': 'ytuser:phihag',
+        'only_matching': True,
+    }]
+
     @classmethod
     def suitable(cls, url):
         # Don't return True if the url can be extracted with other youtube
@@ -1260,7 +1304,7 @@ class YoutubeUserIE(InfoExtractor):
         # Extract username
         mobj = re.match(self._VALID_URL, url)
         if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
 
         username = mobj.group(1)
 
@@ -1281,7 +1325,7 @@ class YoutubeUserIE(InfoExtractor):
             try:
                 response = json.loads(page)
             except ValueError as err:
-                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
+                raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
             if 'entry' not in response['feed']:
                 return
 
@@ -1322,9 +1366,9 @@ class YoutubeSearchIE(SearchInfoExtractor):
                 compat_urllib_parse.quote_plus(query.encode('utf-8')),
                 (PAGE_SIZE * pagenum) + 1)
             data_json = self._download_webpage(
-                result_url, video_id=u'query "%s"' % query,
-                note=u'Downloading page %s' % (pagenum + 1),
-                errnote=u'Unable to download API page')
+                result_url, video_id='query "%s"' % query,
+                note='Downloading page %s' % (pagenum + 1),
+                errnote='Unable to download API page')
             data = json.loads(data_json)
             api_response = data['data']
 
@@ -1356,6 +1400,13 @@ class YoutubeSearchURLIE(InfoExtractor):
     IE_DESC = 'YouTube.com search URLs'
     IE_NAME = 'youtube:search_url'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
+    _TESTS = [{
+        'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'title': 'youtube-dl test video',
+        }
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -1390,17 +1441,38 @@ class YoutubeSearchURLIE(InfoExtractor):
 
 class YoutubeShowIE(InfoExtractor):
     IE_DESC = 'YouTube.com (multi-season) shows'
-    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
+    _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
     IE_NAME = 'youtube:show'
+    _TESTS = [{
+        'url': 'http://www.youtube.com/show/airdisasters',
+        'playlist_mincount': 3,
+        'info_dict': {
+            'id': 'airdisasters',
+            'title': 'Air Disasters',
+        }
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        show_name = mobj.group(1)
-        webpage = self._download_webpage(url, show_name, 'Downloading show webpage')
+        playlist_id = mobj.group('id')
+        webpage = self._download_webpage(
+            url, playlist_id, 'Downloading show webpage')
         # There's one playlist for each season of the show
         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
-        self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
-        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
+        self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
+        entries = [
+            self.url_result(
+                'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
+            for season in m_seasons
+        ]
+        title = self._og_search_title(webpage, fatal=False)
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': title,
+            'entries': entries,
+        }
 
 
 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
index 2853c79c98e9444218f2c8835cbb1acaaaa0e4d5..e62bef2cf49f4cc7a25f438ab69b6e93de78a4dc 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.09.22.1'
+__version__ = '2014.09.28'