Merge remote-tracking branch 'thornomad/glide'
authorPhilipp Hagemeister <phihag@phihag.de>
Fri, 24 Oct 2014 13:29:03 +0000 (15:29 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Fri, 24 Oct 2014 13:29:03 +0000 (15:29 +0200)
27 files changed:
README.md
test/test_all_urls.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/bild.py [new file with mode: 0644]
youtube_dl/extractor/cinemassacre.py
youtube_dl/extractor/cnn.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/justintv.py [deleted file]
youtube_dl/extractor/mitele.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/sexykarma.py [new file with mode: 0644]
youtube_dl/extractor/ted.py
youtube_dl/extractor/telecinco.py [new file with mode: 0644]
youtube_dl/extractor/twitch.py [new file with mode: 0644]
youtube_dl/extractor/vidzi.py [new file with mode: 0644]
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/version.py

index 90ba928c3b7beb3de2a4ed7cc7fa09aef03c3d1c..e772fc22aa9f557f64a13b629adb6a75ea4d613d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -69,6 +69,8 @@ which means you can modify it, redistribute it or use it however you like.
                                      configuration in ~/.config/youtube-dl.conf
                                      (%APPDATA%/youtube-dl/config.txt on
                                      Windows)
+    --flat-playlist                  Do not extract the videos of a playlist,
+                                     only list them.
 
 ## Video Selection:
     --playlist-start NUMBER          playlist video to start at (default is 1)
index 84b05da39e1e28d0df4d65acb6248aa77d7b6b65..965e5d8a5859886937e80d85a97ae39396378d26 100644 (file)
@@ -14,7 +14,7 @@ from test.helper import gettestcases
 from youtube_dl.extractor import (
     FacebookIE,
     gen_extractors,
-    JustinTVIE,
+    TwitchIE,
     YoutubeIE,
 )
 
@@ -72,21 +72,17 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
 
-    def test_justin_tv_channelid_matching(self):
-        self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv'))
-        self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv'))
-        self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv'))
-        self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv'))
-        self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv'))
-        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv'))
-        self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/'))
-        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/'))
-
-    def test_justintv_videoid_matching(self):
-        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
-
-    def test_justin_tv_chapterid_matching(self):
-        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
+    def test_twitch_channelid_matching(self):
+        self.assertTrue(TwitchIE.suitable('twitch.tv/vanillatv'))
+        self.assertTrue(TwitchIE.suitable('www.twitch.tv/vanillatv'))
+        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv'))
+        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/'))
+
+    def test_twitch_videoid_matching(self):
+        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
+
+    def test_twitch_chapterid_matching(self):
+        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
 
     def test_youtube_extract(self):
         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
index dec0e20e7907d9fcf0110d6c992f14456336580d..623f9d6fe1cdf98e5a192168ecdef36ab1c4c136 100755 (executable)
@@ -165,6 +165,8 @@ class YoutubeDL(object):
                        'auto' for elaborate guessing
     encoding:          Use this encoding instead of the system-specified.
     extract_flat:      Do not resolve URLs, return the immediate result.
+                       Pass in 'in_playlist' to only show this behavior for
+                       playlist items.
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
@@ -568,8 +570,13 @@ class YoutubeDL(object):
 
         result_type = ie_result.get('_type', 'video')
 
-        if self.params.get('extract_flat', False):
-            if result_type in ('url', 'url_transparent'):
+        if result_type in ('url', 'url_transparent'):
+            extract_flat = self.params.get('extract_flat', False)
+            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
+                    extract_flat is True):
+                self.add_extra_info(ie_result, extra_info)
+                if self.params.get('forcejson', False):
+                    self.to_stdout(json.dumps(ie_result))
                 return ie_result
 
         if result_type == 'video':
index 7f2b4dfcc60ddada121b7b662a61fc10c62de580..a1079e5966f3ac9cfb4edde5c20f904f1c760554 100644 (file)
@@ -79,6 +79,7 @@ __authors__  = (
     'Carlos Ramos',
     '5moufl',
     'lenaten',
+    'Dennis Scheiba',
 )
 
 __license__ = 'Public Domain'
@@ -255,8 +256,6 @@ def _real_main(argv=None):
         date = DateRange.day(opts.date)
     else:
         date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
-        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
 
     # Do not download videos when there are audio-only formats
     if opts.extractaudio and not opts.keepvideo and opts.format is None:
@@ -369,6 +368,7 @@ def _real_main(argv=None):
         'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
         'encoding': opts.encoding,
         'exec_cmd': opts.exec_cmd,
+        'extract_flat': opts.extract_flat,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
index c0b26c5b375a936f22755f9420f948d99206a0ef..ab4af20790ee99890e199f84ba159d75c8a6777e 100644 (file)
@@ -26,6 +26,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
+from .bild import BildIE
 from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
@@ -174,7 +175,6 @@ from .jadorecettepub import JadoreCettePubIE
 from .jeuxvideo import JeuxVideoIE
 from .jove import JoveIE
 from .jukebox import JukeboxIE
-from .justintv import JustinTVIE
 from .jpopsukitv import JpopsukiIE
 from .kankan import KankanIE
 from .keezmovies import KeezMoviesIE
@@ -317,6 +317,7 @@ from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
+from .sexykarma import SexyKarmaIE
 from .shared import SharedIE
 from .sharesix import ShareSixIE
 from .sina import SinaIE
@@ -368,6 +369,7 @@ from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
+from .telecinco import TelecincoIE
 from .telemb import TeleMBIE
 from .tenplay import TenPlayIE
 from .testurl import TestURLIE
@@ -396,6 +398,7 @@ from .tutv import TutvIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE
 from .tvplay import TVPlayIE
+from .twitch import TwitchIE
 from .ubu import UbuIE
 from .udemy import (
     UdemyIE,
@@ -421,6 +424,7 @@ from .videopremium import VideoPremiumIE
 from .videott import VideoTtIE
 from .videoweed import VideoWeedIE
 from .vidme import VidmeIE
+from .vidzi import VidziIE
 from .vimeo import (
     VimeoIE,
     VimeoAlbumIE,
@@ -489,10 +493,8 @@ from .youtube import (
     YoutubeUserIE,
     YoutubeWatchLaterIE,
 )
-
 from .zdf import ZDFIE
 
-
 _ALL_CLASSES = [
     klass
     for name, klass in globals().items()
index c3d02f85e8f023deac51287b72cd45623db72f07..b9a9440c09b85365a2997bd5feddbae017601c2d 100644 (file)
@@ -10,8 +10,8 @@ from ..utils import (
     unified_strdate,
     determine_ext,
     get_element_by_id,
-    compat_str,
     get_element_by_attribute,
+    int_or_none,
 )
 
 # There are different sources of video in arte.tv, the extraction process 
@@ -90,15 +90,24 @@ class ArteTVPlus7IE(InfoExtractor):
         if not upload_date_str:
             upload_date_str = player_info.get('VDA', '').split(' ')[0]
 
+        title = player_info['VTI'].strip()
+        subtitle = player_info.get('VSU', '').strip()
+        if subtitle:
+            title += ' - %s' % subtitle
+
         info_dict = {
             'id': player_info['VID'],
-            'title': player_info['VTI'],
+            'title': title,
             'description': player_info.get('VDE'),
             'upload_date': unified_strdate(upload_date_str),
             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
         }
 
-        all_formats = player_info['VSR'].values()
+        all_formats = []
+        for format_id, format_dict in player_info['VSR'].items():
+            fmt = dict(format_dict)
+            fmt['format_id'] = format_id
+            all_formats.append(fmt)
         # Some formats use the m3u8 protocol
         all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
         def _match_lang(f):
@@ -149,22 +158,12 @@ class ArteTVPlus7IE(InfoExtractor):
                 )
         formats = sorted(formats, key=sort_key)
         def _format(format_info):
-            quality = ''
-            height = format_info.get('height')
-            if height is not None:
-                quality = compat_str(height)
-            bitrate = format_info.get('bitrate')
-            if bitrate is not None:
-                quality += '-%d' % bitrate
-            if format_info.get('versionCode') is not None:
-                format_id = '%s-%s' % (quality, format_info['versionCode'])
-            else:
-                format_id = quality
             info = {
-                'format_id': format_id,
-                'format_note': format_info.get('versionLibelle'),
-                'width': format_info.get('width'),
-                'height': height,
+                'format_id': format_info['format_id'],
+                'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
+                'width': int_or_none(format_info.get('width')),
+                'height': int_or_none(format_info.get('height')),
+                'tbr': int_or_none(format_info.get('bitrate')),
             }
             if format_info['mediaType'] == 'rtmp':
                 info['url'] = format_info['streamer']
diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py
new file mode 100644 (file)
index 0000000..0269d11
--- /dev/null
@@ -0,0 +1,39 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class BildIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
+    IE_DESC = 'Bild.de'
+    _TEST = {
+        'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
+        'md5': 'dd495cbd99f2413502a1713a1156ac8a',
+        'info_dict': {
+            'id': '38184146',
+            'ext': 'mp4',
+            'title': 'BILD hat sie getestet',
+            'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',
+            'duration': 196,
+            'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml"
+        doc = self._download_xml(xml_url, video_id)
+
+        duration = int_or_none(doc.attrib.get('duration'), scale=1000)
+
+        return {
+            'id': video_id,
+            'title': doc.attrib['ueberschrift'],
+            'description': doc.attrib.get('text'),
+            'url': doc.attrib['src'],
+            'thumbnail': doc.attrib.get('img'),
+            'duration': duration,
+        }
index 496271be4e5f7170ad3d814ec5e2c0b99d15538d..d064a28f97920933f30cc11ec323858d5c5ee5f0 100644 (file)
@@ -42,7 +42,7 @@ class CinemassacreIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
+        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
         if not mobj:
             raise ExtractorError('Can\'t extract embed url and video id')
         playerdata_url = mobj.group('embed_url')
@@ -53,17 +53,22 @@ class CinemassacreIE(InfoExtractor):
         video_description = self._html_search_regex(
             r'<div class="entry-content">(?P<description>.+?)</div>',
             webpage, 'description', flags=re.DOTALL, fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
 
         playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
-        video_thumbnail = self._search_regex(
-            r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
-        sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
-        videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
 
+        vidurl = self._search_regex(
+            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
+        vidid = self._search_regex(
+            r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid')
+        videoserver = self._html_search_regex(
+            r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver')
+
+        videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
         videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
 
         formats = []
-        baseurl = sd_url[:sd_url.rfind('/')+1]
+        baseurl = vidurl[:vidurl.rfind('/')+1]
         for video in videolist.findall('.//video'):
             src = video.get('src')
             if not src:
index dae40c136bae20fd54cae401e711b9233c750e14..78877b1cf1ee5bbf2dce05c28762e066b48a0178 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class CNNIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
-        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''
 
     _TESTS = [{
         'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
index 450c7dfd69d0000c810f18ef35741aae05221c40..e8366f7f91c663f1f7bdf70db0588016f49da3de 100644 (file)
@@ -281,6 +281,12 @@ class InfoExtractor(object):
             raw_filename = basen + '.dump'
             filename = sanitize_filename(raw_filename, restricted=True)
             self.to_screen('Saving request to ' + filename)
+            # Working around MAX_PATH limitation on Windows (see
+            # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+            if os.name == 'nt':
+                absfilepath = os.path.abspath(filename)
+                if len(absfilepath) > 259:
+                    filename = '\\\\?\\' + absfilepath
             with open(filename, 'wb') as outf:
                 outf.write(webpage_bytes)
 
index f99888ecc378ea2a5404fe42d8d32a6a8c4093fb..e3057d90036575b8ef4dad2f8605ee44e0c9c558 100644 (file)
@@ -39,6 +39,7 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
             'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
             'uploader': 'Yomiuri Telecasting Corporation (YTV)',
             'upload_date': '20131013',
+            'url': 're:(?!.*&amp)',
         },
         'params': {
             # rtmp
@@ -237,12 +238,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
             streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
             streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
-            streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
-            video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
-            video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
+            streamdata = self._download_xml(
+                streamdata_req, video_id,
+                note='Downloading media info for %s' % video_format)
+            video_url = streamdata.find('.//host').text
+            video_play_path = streamdata.find('.//file').text
             formats.append({
                 'url': video_url,
-                'play_path':   video_play_path,
+                'play_path': video_play_path,
                 'ext': 'flv',
                 'format': video_format,
                 'format_id': video_format,
index 0b3374d97d7c72a559afc1ed6906549c092491d9..566e20d76fbad33c7879b31027da5f956cd33bbb 100644 (file)
@@ -46,7 +46,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                         f4m_format['preference'] = 1
                     formats.extend(f4m_formats)
             elif video_url.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(video_url, video_id))
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
             elif video_url.startswith('rtmp'):
                 formats.append({
                     'url': video_url,
@@ -58,7 +58,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                 formats.append({
                     'url': video_url,
                     'format_id': format_id,
-                    'preference': 2,
+                    'preference': -1,
                 })
         self._sort_formats(formats)
 
index d966e8403dfe9e03765d6a2eb0ab895a0da4100a..ec6d96adaeff666bf0fea7fe78e766c6a6ac2808 100644 (file)
@@ -37,7 +37,7 @@ class FunnyOrDieIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
-        links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage)
+        links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)
         if not links:
             raise ExtractorError('No media links available for %s' % video_id)
 
index 122763a23ff7e6461cf661ea692c14b09112af1c..9b64988943b16bc93eedce5c878ebdc088d95906 100644 (file)
@@ -380,6 +380,17 @@ class GenericIE(InfoExtractor):
                 'uploader': 'education-portal.com',
             },
         },
+        {
+            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
+            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
+            'info_dict': {
+                'id': 'uxjb0lwrcz',
+                'ext': 'mp4',
+                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
+                'duration': 1715.0,
+                'uploader': 'thoughtworks.wistia.com',
+            },   
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -476,7 +487,8 @@ class GenericIE(InfoExtractor):
                      'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
                     ) % (url, url), expected=True)
             else:
-                assert ':' in default_search
+                if ':' not in default_search:
+                    default_search += ':'
                 return self.url_result(default_search + url)
 
         url, smuggled_data = unsmuggle_url(url)
@@ -609,13 +621,13 @@ class GenericIE(InfoExtractor):
         if mobj:
             player_url = unescapeHTML(mobj.group('url'))
             surl = smuggle_url(player_url, {'Referer': url})
-            return self.url_result(surl, 'Vimeo')
+            return self.url_result(surl)
 
         # Look for embedded (swf embed) Vimeo player
         mobj = re.search(
-            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
+            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
         if mobj:
-            return self.url_result(mobj.group(1), 'Vimeo')
+            return self.url_result(mobj.group(1))
 
         # Look for embedded YouTube player
         matches = re.findall(r'''(?x)
@@ -652,17 +664,20 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded Wistia player
         match = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
         if match:
+            embed_url = self._proto_relative_url(
+                unescapeHTML(match.group('url')))
             return {
                 '_type': 'url_transparent',
-                'url': unescapeHTML(match.group('url')),
+                'url': embed_url,
                 'ie_key': 'Wistia',
                 'uploader': video_uploader,
                 'title': video_title,
                 'id': video_id,
             }
-        match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
+            
+        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
         if match:
             return {
                 '_type': 'url_transparent',
diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py
deleted file mode 100644 (file)
index 27017e8..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-from __future__ import unicode_literals
-
-import itertools
-import json
-import os
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    compat_str,
-    ExtractorError,
-    formatSeconds,
-)
-
-
-class JustinTVIE(InfoExtractor):
-    """Information extractor for justin.tv and twitch.tv"""
-    # TODO: One broadcast may be split into multiple videos. The key
-    # 'broadcast_id' is the same for all parts, and 'broadcast_part'
-    # starts at 1 and increases. Can we treat all parts as one video?
-
-    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
-        (?:
-            (?P<channelid>[^/]+)|
-            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
-            (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
-        )
-        /?(?:\#.*)?$
-        """
-    _JUSTIN_PAGE_LIMIT = 100
-    IE_NAME = 'justin.tv'
-    IE_DESC = 'justin.tv and twitch.tv'
-    _TEST = {
-        'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
-        'md5': 'ecaa8a790c22a40770901460af191c9a',
-        'info_dict': {
-            'id': '296128360',
-            'ext': 'flv',
-            'upload_date': '20110927',
-            'uploader_id': 25114803,
-            'uploader': 'thegamedevhub',
-            'title': 'Beginner Series - Scripting With Python Pt.1'
-        }
-    }
-
-    # Return count of items, list of *valid* items
-    def _parse_page(self, url, video_id, counter):
-        info_json = self._download_webpage(
-            url, video_id,
-            'Downloading video info JSON on page %d' % counter,
-            'Unable to download video info JSON %d' % counter)
-
-        response = json.loads(info_json)
-        if type(response) != list:
-            error_text = response.get('error', 'unknown error')
-            raise ExtractorError('Justin.tv API: %s' % error_text)
-        info = []
-        for clip in response:
-            video_url = clip['video_file_url']
-            if video_url:
-                video_extension = os.path.splitext(video_url)[1][1:]
-                video_date = re.sub('-', '', clip['start_time'][:10])
-                video_uploader_id = clip.get('user_id', clip.get('channel_id'))
-                video_id = clip['id']
-                video_title = clip.get('title', video_id)
-                info.append({
-                    'id': compat_str(video_id),
-                    'url': video_url,
-                    'title': video_title,
-                    'uploader': clip.get('channel_name', video_uploader_id),
-                    'uploader_id': video_uploader_id,
-                    'upload_date': video_date,
-                    'ext': video_extension,
-                })
-        return (len(response), info)
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        api_base = 'http://api.justin.tv'
-        paged = False
-        if mobj.group('channelid'):
-            paged = True
-            video_id = mobj.group('channelid')
-            api = api_base + '/channel/archives/%s.json' % video_id
-        elif mobj.group('chapterid'):
-            chapter_id = mobj.group('chapterid')
-
-            webpage = self._download_webpage(url, chapter_id)
-            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
-            if not m:
-                raise ExtractorError('Cannot find archive of a chapter')
-            archive_id = m.group(1)
-
-            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            doc = self._download_xml(
-                api, chapter_id,
-                note='Downloading chapter information',
-                errnote='Chapter information download failed')
-            for a in doc.findall('.//archive'):
-                if archive_id == a.find('./id').text:
-                    break
-            else:
-                raise ExtractorError('Could not find chapter in chapter information')
-
-            video_url = a.find('./video_file_url').text
-            video_ext = video_url.rpartition('.')[2] or 'flv'
-
-            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
-            chapter_info = self._download_json(
-                chapter_api_url, 'c' + chapter_id,
-                note='Downloading chapter metadata',
-                errnote='Download of chapter metadata failed')
-
-            bracket_start = int(doc.find('.//bracket_start').text)
-            bracket_end = int(doc.find('.//bracket_end').text)
-
-            # TODO determine start (and probably fix up file)
-            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
-            #video_url += '?start=' + TODO:start_timestamp
-            # bracket_start is 13290, but we want 51670615
-            self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
-                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
-
-            info = {
-                'id': 'c' + chapter_id,
-                'url': video_url,
-                'ext': video_ext,
-                'title': chapter_info['title'],
-                'thumbnail': chapter_info['preview'],
-                'description': chapter_info['description'],
-                'uploader': chapter_info['channel']['display_name'],
-                'uploader_id': chapter_info['channel']['name'],
-            }
-            return info
-        else:
-            video_id = mobj.group('videoid')
-            api = api_base + '/broadcast/by_archive/%s.json' % video_id
-
-        entries = []
-        offset = 0
-        limit = self._JUSTIN_PAGE_LIMIT
-        for counter in itertools.count(1):
-            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
-            page_count, page_info = self._parse_page(
-                page_url, video_id, counter)
-            entries.extend(page_info)
-            if not paged or page_count != limit:
-                break
-            offset += limit
-        return {
-            '_type': 'playlist',
-            'id': video_id,
-            'entries': entries,
-        }
index 979f3d692a0707fdf2a6a6617b75581e047679dd..6691521e58435682a74af87559ce1d1fd9046fbf 100644 (file)
@@ -6,6 +6,7 @@ import json
 from .common import InfoExtractor
 from ..utils import (
     compat_urllib_parse,
+    compat_urlparse,
     get_element_by_attribute,
     parse_duration,
     strip_jsonp,
@@ -39,13 +40,21 @@ class MiTeleIE(InfoExtractor):
         ).replace('\'', '"')
         embed_data = json.loads(embed_data_json)
 
-        info_url = embed_data['flashvars']['host']
+        domain = embed_data['mediaUrl']
+        if not domain.startswith('http'):
+            # only happens in telecinco.es videos
+            domain = 'http://' + domain
+        info_url = compat_urlparse.urljoin(
+            domain,
+            compat_urllib_parse.unquote(embed_data['flashvars']['host'])
+        )
         info_el = self._download_xml(info_url, episode).find('./video/info')
 
         video_link = info_el.find('videoUrl/link').text
         token_query = compat_urllib_parse.urlencode({'id': video_link})
         token_info = self._download_json(
-            'http://token.mitele.es/?' + token_query, episode,
+            embed_data['flashvars']['ov_tk'] + '?' + token_query,
+            episode,
             transform_source=strip_jsonp
         )
 
index a4564d3de80957e04e8468d437a8c8d5f428347e..bb8937c4d53d33df6b560aff7d56df80740bf1cc 100644 (file)
@@ -33,22 +33,22 @@ class MixcloudIE(InfoExtractor):
         },
     }
 
-    def check_urls(self, url_list):
-        """Returns 1st active url from list"""
-        for url in url_list:
+    def _get_url(self, track_id, template_url):
+        server_count = 30
+        for i in range(server_count):
+            url = template_url % i
             try:
                 # We only want to know if the request succeed
                 # don't download the whole file
-                self._request_webpage(HEADRequest(url), None, False)
+                self._request_webpage(
+                    HEADRequest(url), track_id,
+                    'Checking URL %d/%d ...' % (i + 1, server_count + 1))
                 return url
             except ExtractorError:
-                url = None
+                pass
 
         return None
 
-    def _get_url(self, template_url):
-        return self.check_urls(template_url % i for i in range(30))
-
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         uploader = mobj.group(1)
@@ -61,11 +61,11 @@ class MixcloudIE(InfoExtractor):
             r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
         song_url = preview_url.replace('/previews/', '/c/originals/')
         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
-        final_song_url = self._get_url(template_url)
+        final_song_url = self._get_url(track_id, template_url)
         if final_song_url is None:
             self.to_screen('Trying with m4a extension')
             template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
-            final_song_url = self._get_url(template_url)
+            final_song_url = self._get_url(track_id, template_url)
         if final_song_url is None:
             raise ExtractorError('Unable to extract track url')
 
index 8f140d62660b896f5a6f819d621a762d13fbdb69..6118ed5c2021492ee91e22dccd642d564918604c 100644 (file)
@@ -80,8 +80,14 @@ class PBSIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'upload_date': '20140122',
             }
+        },
+        {
+            'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
+            'info_dict': {
+                'id': 'united-states-of-secrets',
+            },
+            'playlist_count': 2,
         }
-
     ]
 
     def _extract_webpage(self, url):
@@ -96,6 +102,12 @@ class PBSIE(InfoExtractor):
                 r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
                 webpage, 'upload date', default=None))
 
+            # tabbed frontline videos
+            tabbed_videos = re.findall(
+                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
+            if tabbed_videos:
+                return tabbed_videos, presumptive_id, upload_date
+
             MEDIA_ID_REGEXES = [
                 r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed
                 r'class="coveplayerid">([^<]+)<',                       # coveplayer
@@ -130,6 +142,12 @@ class PBSIE(InfoExtractor):
     def _real_extract(self, url):
         video_id, display_id, upload_date = self._extract_webpage(url)
 
+        if isinstance(video_id, list):
+            entries = [self.url_result(
+                'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id)
+                for vid_id in video_id]
+            return self.playlist_result(entries, display_id)
+
         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
         info = self._download_json(info_url, display_id)
 
diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py
new file mode 100644 (file)
index 0000000..c833fc8
--- /dev/null
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    unified_strdate,
+    parse_duration,
+    int_or_none,
+)
+
+
+class SexyKarmaIE(InfoExtractor):
+    IE_DESC = 'Sexy Karma and Watch Indian Porn'
+    _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
+        'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
+        'info_dict': {
+            'id': 'yHI70cOyIHt',
+            'display_id': 'taking-a-quick-pee',
+            'ext': 'mp4',
+            'title': 'Taking a quick pee.',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'wildginger7',
+            'upload_date': '20141007',
+            'duration': 22,
+            'view_count': int,
+            'comment_count': int,
+            'categories': list,
+        }
+    }, {
+        'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
+        'md5': 'dd216c68d29b49b12842b9babe762a5d',
+        'info_dict': {
+            'id': '8Id6EZPbuHf',
+            'display_id': 'pot-pixie-tribute',
+            'ext': 'mp4',
+            'title': 'pot_pixie tribute',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'banffite',
+            'upload_date': '20141013',
+            'duration': 16,
+            'view_count': int,
+            'comment_count': int,
+            'categories': list,
+        }
+    }, {
+        'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
+        'md5': '9afb80675550406ed9a63ac2819ef69d',
+        'info_dict': {
+            'id': 'dW2mtctxJfs',
+            'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number',
+            'ext': 'mp4',
+            'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'Don',
+            'upload_date': '20140213',
+            'duration': 83,
+            'view_count': int,
+            'comment_count': int,
+            'categories': list,
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_url = self._html_search_regex(
+            r"url: escape\('([^']+)'\)", webpage, 'url')
+
+        title = self._html_search_regex(
+            r'<h2 class="he2"><span>(.*?)</span>',
+            webpage, 'title')
+        thumbnail = self._html_search_regex(
+            r'<span id="container"><img\s+src="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+
+        uploader = self._html_search_regex(
+            r'class="aupa">\s*(.*?)</a>',
+            webpage, 'uploader')
+        upload_date = unified_strdate(self._html_search_regex(
+            r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
+
+        duration = parse_duration(self._search_regex(
+            r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
+            webpage, 'duration', fatal=False))
+
+        view_count = int_or_none(self._search_regex(
+            r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
+            webpage, 'view count', fatal=False))
+        comment_count = int_or_none(self._search_regex(
+            r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
+            webpage, 'comment count', fatal=False))
+
+        categories = re.findall(
+            r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
+            webpage)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'upload_date': upload_date,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'categories': categories,
+        }
index d5e28efada55a91a480ce031df0bc2774de2ccc6..cd4af96fdb02b2f4ea392fe19482371b6a02acad 100644 (file)
@@ -65,6 +65,22 @@ class TEDIE(SubtitlesInfoExtractor):
             'title': 'Who are the hackers?',
         },
         'playlist_mincount': 6,
+    }, {
+        # contains a youtube video
+        'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything',
+        'add_ie': ['Youtube'],
+        'info_dict': {
+            'id': '_ZG8HBuDjgc',
+            'ext': 'mp4',
+            'title': 'Douglas Adams: Parrots the Universe and Everything',
+            'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
+            'uploader': 'University of California Television (UCTV)',
+            'uploader_id': 'UCtelevision',
+            'upload_date': '20080522',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     _NATIVE_FORMATS = {
@@ -114,6 +130,13 @@ class TEDIE(SubtitlesInfoExtractor):
 
         talk_info = self._extract_info(webpage)['talks'][0]
 
+        if talk_info.get('external') is not None:
+            self.to_screen('Found video from %s' % talk_info['external']['service'])
+            return {
+                '_type': 'url',
+                'url': talk_info['external']['uri'],
+            }
+
         formats = [{
             'url': format_url,
             'format_id': format_id,
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py
new file mode 100644 (file)
index 0000000..db9788c
--- /dev/null
@@ -0,0 +1,19 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .mitele import MiTeleIE
+
+
+class TelecincoIE(MiTeleIE):
+    IE_NAME = 'telecinco.es'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html'
+
+    _TEST = {
+        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
+        'info_dict': {
+            'id': 'MDSVID20141015_0058',
+            'ext': 'mp4',
+            'title': 'Con Martín Berasategui, hacer un bacalao al ...',
+            'duration': 662,
+        },
+    }
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
new file mode 100644 (file)
index 0000000..36aa1ad
--- /dev/null
@@ -0,0 +1,187 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+)
+
+
+class TwitchIE(InfoExtractor):
+    # TODO: One broadcast may be split into multiple videos. The key
+    # 'broadcast_id' is the same for all parts, and 'broadcast_part'
+    # starts at 1 and increases. Can we treat all parts as one video?
+    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
+        (?:
+            (?P<channelid>[^/]+)|
+            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
+            (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
+        )
+        /?(?:\#.*)?$
+        """
+    _PAGE_LIMIT = 100
+    _API_BASE = 'https://api.twitch.tv'
+    _TESTS = [{
+        'url': 'http://www.twitch.tv/riotgames/b/577357806',
+        'info_dict': {
+            'id': 'a577357806',
+            'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
+        },
+        'playlist_mincount': 12,
+    }, {
+        'url': 'http://www.twitch.tv/acracingleague/c/5285812',
+        'info_dict': {
+            'id': 'c5285812',
+            'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
+        },
+        'playlist_mincount': 3,
+    }, {
+        'url': 'http://www.twitch.tv/vanillatv',
+        'info_dict': {
+            'id': 'vanillatv',
+            'title': 'VanillaTV',
+        },
+        'playlist_mincount': 412,
+    }]
+
+    def _handle_error(self, response):
+        if not isinstance(response, dict):
+            return
+        error = response.get('error')
+        if error:
+            raise ExtractorError(
+                '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
+                expected=True)
+
+    def _download_json(self, url, video_id, note='Downloading JSON metadata'):
+        response = super(TwitchIE, self)._download_json(url, video_id, note)
+        self._handle_error(response)
+        return response
+
+    def _extract_media(self, item, item_id):
+        ITEMS = {
+            'a': 'video',
+            'c': 'chapter',
+        }
+        info = self._extract_info(self._download_json(
+            '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
+            'Downloading %s info JSON' % ITEMS[item]))
+        response = self._download_json(
+            '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
+            'Downloading %s playlist JSON' % ITEMS[item])
+        entries = []
+        chunks = response['chunks']
+        qualities = list(chunks.keys())
+        for num, fragment in enumerate(zip(*chunks.values()), start=1):
+            formats = []
+            for fmt_num, fragment_fmt in enumerate(fragment):
+                format_id = qualities[fmt_num]
+                fmt = {
+                    'url': fragment_fmt['url'],
+                    'format_id': format_id,
+                    'quality': 1 if format_id == 'live' else 0,
+                }
+                m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
+                if m:
+                    fmt['height'] = int(m.group('height'))
+                formats.append(fmt)
+            self._sort_formats(formats)
+            entry = dict(info)
+            entry['id'] = '%s_%d' % (entry['id'], num)
+            entry['title'] = '%s part %d' % (entry['title'], num)
+            entry['formats'] = formats
+            entries.append(entry)
+        return self.playlist_result(entries, info['id'], info['title'])
+
+    def _extract_info(self, info):
+        return {
+            'id': info['_id'],
+            'title': info['title'],
+            'description': info['description'],
+            'duration': info['length'],
+            'thumbnail': info['preview'],
+            'uploader': info['channel']['display_name'],
+            'uploader_id': info['channel']['name'],
+            'timestamp': parse_iso8601(info['recorded_at']),
+            'view_count': info['views'],
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj.group('chapterid'):
+            return self._extract_media('c', mobj.group('chapterid'))
+
+            """
+            webpage = self._download_webpage(url, chapter_id)
+            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
+            if not m:
+                raise ExtractorError('Cannot find archive of a chapter')
+            archive_id = m.group(1)
+
+            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
+            doc = self._download_xml(
+                api, chapter_id,
+                note='Downloading chapter information',
+                errnote='Chapter information download failed')
+            for a in doc.findall('.//archive'):
+                if archive_id == a.find('./id').text:
+                    break
+            else:
+                raise ExtractorError('Could not find chapter in chapter information')
+
+            video_url = a.find('./video_file_url').text
+            video_ext = video_url.rpartition('.')[2] or 'flv'
+
+            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
+            chapter_info = self._download_json(
+                chapter_api_url, 'c' + chapter_id,
+                note='Downloading chapter metadata',
+                errnote='Download of chapter metadata failed')
+
+            bracket_start = int(doc.find('.//bracket_start').text)
+            bracket_end = int(doc.find('.//bracket_end').text)
+
+            # TODO determine start (and probably fix up file)
+            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
+            #video_url += '?start=' + TODO:start_timestamp
+            # bracket_start is 13290, but we want 51670615
+            self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
+                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
+
+            info = {
+                'id': 'c' + chapter_id,
+                'url': video_url,
+                'ext': video_ext,
+                'title': chapter_info['title'],
+                'thumbnail': chapter_info['preview'],
+                'description': chapter_info['description'],
+                'uploader': chapter_info['channel']['display_name'],
+                'uploader_id': chapter_info['channel']['name'],
+            }
+            return info
+            """
+        elif mobj.group('videoid'):
+            return self._extract_media('a', mobj.group('videoid'))
+        elif mobj.group('channelid'):
+            channel_id = mobj.group('channelid')
+            info = self._download_json(
+                '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
+                channel_id, 'Downloading channel info JSON')
+            channel_name = info.get('display_name') or info.get('name')
+            entries = []
+            offset = 0
+            limit = self._PAGE_LIMIT
+            for counter in itertools.count(1):
+                response = self._download_json(
+                    '%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
+                    % (self._API_BASE, channel_id, offset, limit),
+                    channel_id, 'Downloading channel videos JSON page %d' % counter)
+                videos = response['videos']
+                if not videos:
+                    break
+                entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
+                offset += limit
+            return self.playlist_result(entries, channel_id, channel_name)
diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py
new file mode 100644 (file)
index 0000000..669979e
--- /dev/null
@@ -0,0 +1,33 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class VidziIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
+    _TEST = {
+        'url': 'http://vidzi.tv/cghql9yq6emu.html',
+        'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
+        'info_dict': {
+            'id': 'cghql9yq6emu',
+            'ext': 'mp4',
+            'title': 'youtube-dl test video  1\\\\2\'3/4<5\\\\6ä7↭',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        
+        webpage = self._download_webpage(url, video_id)
+        video_url = self._html_search_regex(
+            r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url')
+        title = self._html_search_regex(
+            r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
+        
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+        }
+        
\ No newline at end of file
index e6a86f18ec5018615344e547cd332892c81fc665..07959d3fe0e4b75a0b58d3605560c2ed450e9ba6 100644 (file)
@@ -157,6 +157,18 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 'duration': 62,
             }
         },
+        {
+            # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
+            'url': 'https://player.vimeo.com/video/98044508',
+            'note': 'The js code contains assignments to the same variable as the config',
+            'info_dict': {
+                'id': '98044508',
+                'ext': 'mp4',
+                'title': 'Pier Solar OUYA Official Trailer',
+                'uploader': 'Tulio Gonçalves',
+                'uploader_id': 'user28849593',
+            },
+        },
     ]
 
     def _verify_video_password(self, url, video_id, webpage):
@@ -244,7 +256,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 # We try to find out to which variable is assigned the config dic
                 m_variable_name = re.search('(\w)\.video\.id', webpage)
                 if m_variable_name is not None:
-                    config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1))
+                    config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
                 else:
                     config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
                 config = self._search_regex(config_re, webpage, 'info section',
index cfae2de8990ff7f6ae1f7ff37fdc0b559db3d290..4ab56e0ac6baf7f59f1c8892b5dbe560d96cb195 100644 (file)
@@ -191,8 +191,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     def _real_initialize(self):
         if self._downloader is None:
             return
-        if not self._set_language():
-            return
+        if self._get_login_info()[0] is not None:
+            if not self._set_language():
+                return
         if not self._login():
             return
         self._confirm_age()
index 649361bde3a9eee3bd1222393b069ffb8ac0976d..2b1cd7438669584ea79512e6def4f2272419f004 100644 (file)
@@ -159,6 +159,11 @@ def parseOpts(overrideArguments=None):
         '--ignore-config',
         action='store_true',
         help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+    general.add_option(
+        '--flat-playlist',
+        action='store_const', dest='extract_flat', const='in_playlist',
+        default=False,
+        help='Do not extract the videos of a playlist, only list them.')
 
     selection = optparse.OptionGroup(parser, 'Video Selection')
     selection.add_option(
index 416e2fd856a9c1413a17dafdeebdeff59ba207b0..59cb3b1a1aaa60361d697d4abd79687392d86a8c 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.10.13'
+__version__ = '2014.10.24'