Merge remote-tracking branch 'capital-G/master'
authorPhilipp Hagemeister <phihag@phihag.de>
Fri, 24 Oct 2014 13:02:50 +0000 (15:02 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Fri, 24 Oct 2014 13:02:50 +0000 (15:02 +0200)
17 files changed:
README.md
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/cinemassacre.py
youtube_dl/extractor/cnn.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/telecinco.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/version.py

index 90ba928c3b7beb3de2a4ed7cc7fa09aef03c3d1c..e772fc22aa9f557f64a13b629adb6a75ea4d613d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -69,6 +69,8 @@ which means you can modify it, redistribute it or use it however you like.
                                      configuration in ~/.config/youtube-dl.conf
                                      (%APPDATA%/youtube-dl/config.txt on
                                      Windows)
+    --flat-playlist                  Do not extract the videos of a playlist,
+                                     only list them.
 
 ## Video Selection:
     --playlist-start NUMBER          playlist video to start at (default is 1)
index dec0e20e7907d9fcf0110d6c992f14456336580d..623f9d6fe1cdf98e5a192168ecdef36ab1c4c136 100755 (executable)
@@ -165,6 +165,8 @@ class YoutubeDL(object):
                        'auto' for elaborate guessing
     encoding:          Use this encoding instead of the system-specified.
     extract_flat:      Do not resolve URLs, return the immediate result.
+                       Pass in 'in_playlist' to only show this behavior for
+                       playlist items.
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
@@ -568,8 +570,13 @@ class YoutubeDL(object):
 
         result_type = ie_result.get('_type', 'video')
 
-        if self.params.get('extract_flat', False):
-            if result_type in ('url', 'url_transparent'):
+        if result_type in ('url', 'url_transparent'):
+            extract_flat = self.params.get('extract_flat', False)
+            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
+                    extract_flat is True):
+                self.add_extra_info(ie_result, extra_info)
+                if self.params.get('forcejson', False):
+                    self.to_stdout(json.dumps(ie_result))
                 return ie_result
 
         if result_type == 'video':
index 7f2b4dfcc60ddada121b7b662a61fc10c62de580..c4e1d32db1825dcd1f539344828d2cfde9aecb9e 100644 (file)
@@ -255,8 +255,6 @@ def _real_main(argv=None):
         date = DateRange.day(opts.date)
     else:
         date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
-        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
 
     # Do not download videos when there are audio-only formats
     if opts.extractaudio and not opts.keepvideo and opts.format is None:
@@ -369,6 +367,7 @@ def _real_main(argv=None):
         'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
         'encoding': opts.encoding,
         'exec_cmd': opts.exec_cmd,
+        'extract_flat': opts.extract_flat,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
index e0957987c77349e363a7c567bcf07327c89f28c3..3023c3095dc1854a6e8c9b5f303ad22edcc6c38a 100644 (file)
@@ -368,6 +368,7 @@ from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
+from .telecinco import TelecincoIE
 from .telemb import TeleMBIE
 from .tenplay import TenPlayIE
 from .testurl import TestURLIE
index 3a34d1ecc67e590c568d10131b557b1d3022fe4d..b9a9440c09b85365a2997bd5feddbae017601c2d 100644 (file)
@@ -10,8 +10,8 @@ from ..utils import (
     unified_strdate,
     determine_ext,
     get_element_by_id,
-    compat_str,
     get_element_by_attribute,
+    int_or_none,
 )
 
 # There are different sources of video in arte.tv, the extraction process 
@@ -90,15 +90,24 @@ class ArteTVPlus7IE(InfoExtractor):
         if not upload_date_str:
             upload_date_str = player_info.get('VDA', '').split(' ')[0]
 
+        title = player_info['VTI'].strip()
+        subtitle = player_info.get('VSU', '').strip()
+        if subtitle:
+            title += ' - %s' % subtitle
+
         info_dict = {
             'id': player_info['VID'],
-            'title': player_info['VTI'],
+            'title': title,
             'description': player_info.get('VDE'),
             'upload_date': unified_strdate(upload_date_str),
             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
         }
 
-        all_formats = player_info['VSR'].values()
+        all_formats = []
+        for format_id, format_dict in player_info['VSR'].items():
+            fmt = dict(format_dict)
+            fmt['format_id'] = format_id
+            all_formats.append(fmt)
         # Some formats use the m3u8 protocol
         all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
         def _match_lang(f):
@@ -149,25 +158,12 @@ class ArteTVPlus7IE(InfoExtractor):
                 )
         formats = sorted(formats, key=sort_key)
         def _format(format_info):
-            quality = ''
-            height = format_info.get('height')
-            if height is not None:
-                quality = compat_str(height)
-            bitrate = format_info.get('bitrate')
-            if bitrate is not None:
-                quality += '-%d' % bitrate
-            if format_info.get('versionCode') is not None:
-                format_id = '%s-%s' % (quality, format_info['versionCode'])
-            else:
-                format_id = quality
-            media_type = format_info.get('mediaType')
-            if media_type is not None:
-                format_id += '-%s' % media_type
             info = {
-                'format_id': format_id,
-                'format_note': format_info.get('versionLibelle'),
-                'width': format_info.get('width'),
-                'height': height,
+                'format_id': format_info['format_id'],
+                'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
+                'width': int_or_none(format_info.get('width')),
+                'height': int_or_none(format_info.get('height')),
+                'tbr': int_or_none(format_info.get('bitrate')),
             }
             if format_info['mediaType'] == 'rtmp':
                 info['url'] = format_info['streamer']
index 496271be4e5f7170ad3d814ec5e2c0b99d15538d..d064a28f97920933f30cc11ec323858d5c5ee5f0 100644 (file)
@@ -42,7 +42,7 @@ class CinemassacreIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
+        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
         if not mobj:
             raise ExtractorError('Can\'t extract embed url and video id')
         playerdata_url = mobj.group('embed_url')
@@ -53,17 +53,22 @@ class CinemassacreIE(InfoExtractor):
         video_description = self._html_search_regex(
             r'<div class="entry-content">(?P<description>.+?)</div>',
             webpage, 'description', flags=re.DOTALL, fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
 
         playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
-        video_thumbnail = self._search_regex(
-            r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
-        sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
-        videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
 
+        vidurl = self._search_regex(
+            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
+        vidid = self._search_regex(
+            r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid')
+        videoserver = self._html_search_regex(
+            r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver')
+
+        videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
         videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
 
         formats = []
-        baseurl = sd_url[:sd_url.rfind('/')+1]
+        baseurl = vidurl[:vidurl.rfind('/')+1]
         for video in videolist.findall('.//video'):
             src = video.get('src')
             if not src:
index dae40c136bae20fd54cae401e711b9233c750e14..78877b1cf1ee5bbf2dce05c28762e066b48a0178 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class CNNIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
-        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''
 
     _TESTS = [{
         'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
index f99888ecc378ea2a5404fe42d8d32a6a8c4093fb..e3057d90036575b8ef4dad2f8605ee44e0c9c558 100644 (file)
@@ -39,6 +39,7 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
             'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
             'uploader': 'Yomiuri Telecasting Corporation (YTV)',
             'upload_date': '20131013',
+            'url': 're:(?!.*&amp)',
         },
         'params': {
             # rtmp
@@ -237,12 +238,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
             streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
             streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
-            streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
-            video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
-            video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
+            streamdata = self._download_xml(
+                streamdata_req, video_id,
+                note='Downloading media info for %s' % video_format)
+            video_url = streamdata.find('.//host').text
+            video_play_path = streamdata.find('.//file').text
             formats.append({
                 'url': video_url,
-                'play_path':   video_play_path,
+                'play_path': video_play_path,
                 'ext': 'flv',
                 'format': video_format,
                 'format_id': video_format,
index 0b3374d97d7c72a559afc1ed6906549c092491d9..566e20d76fbad33c7879b31027da5f956cd33bbb 100644 (file)
@@ -46,7 +46,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                         f4m_format['preference'] = 1
                     formats.extend(f4m_formats)
             elif video_url.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(video_url, video_id))
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
             elif video_url.startswith('rtmp'):
                 formats.append({
                     'url': video_url,
@@ -58,7 +58,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                 formats.append({
                     'url': video_url,
                     'format_id': format_id,
-                    'preference': 2,
+                    'preference': -1,
                 })
         self._sort_formats(formats)
 
index d966e8403dfe9e03765d6a2eb0ab895a0da4100a..ec6d96adaeff666bf0fea7fe78e766c6a6ac2808 100644 (file)
@@ -37,7 +37,7 @@ class FunnyOrDieIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
-        links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage)
+        links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)
         if not links:
             raise ExtractorError('No media links available for %s' % video_id)
 
index 9057a6beb97a0d0cc2fca33d1d0ccc30c07b8101..9b64988943b16bc93eedce5c878ebdc088d95906 100644 (file)
@@ -380,6 +380,17 @@ class GenericIE(InfoExtractor):
                 'uploader': 'education-portal.com',
             },
         },
+        {
+            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
+            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
+            'info_dict': {
+                'id': 'uxjb0lwrcz',
+                'ext': 'mp4',
+                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
+                'duration': 1715.0,
+                'uploader': 'thoughtworks.wistia.com',
+            },   
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -476,7 +487,8 @@ class GenericIE(InfoExtractor):
                      'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
                     ) % (url, url), expected=True)
             else:
-                assert ':' in default_search
+                if ':' not in default_search:
+                    default_search += ':'
                 return self.url_result(default_search + url)
 
         url, smuggled_data = unsmuggle_url(url)
@@ -652,7 +664,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded Wistia player
         match = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
         if match:
             embed_url = self._proto_relative_url(
                 unescapeHTML(match.group('url')))
@@ -664,6 +676,7 @@ class GenericIE(InfoExtractor):
                 'title': video_title,
                 'id': video_id,
             }
+            
         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
         if match:
             return {
index 979f3d692a0707fdf2a6a6617b75581e047679dd..6691521e58435682a74af87559ce1d1fd9046fbf 100644 (file)
@@ -6,6 +6,7 @@ import json
 from .common import InfoExtractor
 from ..utils import (
     compat_urllib_parse,
+    compat_urlparse,
     get_element_by_attribute,
     parse_duration,
     strip_jsonp,
@@ -39,13 +40,21 @@ class MiTeleIE(InfoExtractor):
         ).replace('\'', '"')
         embed_data = json.loads(embed_data_json)
 
-        info_url = embed_data['flashvars']['host']
+        domain = embed_data['mediaUrl']
+        if not domain.startswith('http'):
+            # only happens in telecinco.es videos
+            domain = 'http://' + domain
+        info_url = compat_urlparse.urljoin(
+            domain,
+            compat_urllib_parse.unquote(embed_data['flashvars']['host'])
+        )
         info_el = self._download_xml(info_url, episode).find('./video/info')
 
         video_link = info_el.find('videoUrl/link').text
         token_query = compat_urllib_parse.urlencode({'id': video_link})
         token_info = self._download_json(
-            'http://token.mitele.es/?' + token_query, episode,
+            embed_data['flashvars']['ov_tk'] + '?' + token_query,
+            episode,
             transform_source=strip_jsonp
         )
 
index 8f140d62660b896f5a6f819d621a762d13fbdb69..6118ed5c2021492ee91e22dccd642d564918604c 100644 (file)
@@ -80,8 +80,14 @@ class PBSIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'upload_date': '20140122',
             }
+        },
+        {
+            'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
+            'info_dict': {
+                'id': 'united-states-of-secrets',
+            },
+            'playlist_count': 2,
         }
-
     ]
 
     def _extract_webpage(self, url):
@@ -96,6 +102,12 @@ class PBSIE(InfoExtractor):
                 r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
                 webpage, 'upload date', default=None))
 
+            # tabbed frontline videos
+            tabbed_videos = re.findall(
+                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
+            if tabbed_videos:
+                return tabbed_videos, presumptive_id, upload_date
+
             MEDIA_ID_REGEXES = [
                 r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed
                 r'class="coveplayerid">([^<]+)<',                       # coveplayer
@@ -130,6 +142,12 @@ class PBSIE(InfoExtractor):
     def _real_extract(self, url):
         video_id, display_id, upload_date = self._extract_webpage(url)
 
+        if isinstance(video_id, list):
+            entries = [self.url_result(
+                'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id)
+                for vid_id in video_id]
+            return self.playlist_result(entries, display_id)
+
         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
         info = self._download_json(info_url, display_id)
 
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py
new file mode 100644 (file)
index 0000000..db9788c
--- /dev/null
@@ -0,0 +1,19 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .mitele import MiTeleIE
+
+
+class TelecincoIE(MiTeleIE):
+    IE_NAME = 'telecinco.es'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html'
+
+    _TEST = {
+        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
+        'info_dict': {
+            'id': 'MDSVID20141015_0058',
+            'ext': 'mp4',
+            'title': 'Con Martín Berasategui, hacer un bacalao al ...',
+            'duration': 662,
+        },
+    }
index cfae2de8990ff7f6ae1f7ff37fdc0b559db3d290..4ab56e0ac6baf7f59f1c8892b5dbe560d96cb195 100644 (file)
@@ -191,8 +191,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     def _real_initialize(self):
         if self._downloader is None:
             return
-        if not self._set_language():
-            return
+        if self._get_login_info()[0] is not None:
+            if not self._set_language():
+                return
         if not self._login():
             return
         self._confirm_age()
index 649361bde3a9eee3bd1222393b069ffb8ac0976d..2b1cd7438669584ea79512e6def4f2272419f004 100644 (file)
@@ -159,6 +159,11 @@ def parseOpts(overrideArguments=None):
         '--ignore-config',
         action='store_true',
         help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+    general.add_option(
+        '--flat-playlist',
+        action='store_const', dest='extract_flat', const='in_playlist',
+        default=False,
+        help='Do not extract the videos of a playlist, only list them.')
 
     selection = optparse.OptionGroup(parser, 'Video Selection')
     selection.add_option(
index e7f6adef126bb6c3f053260b78f27d998df6e270..59cb3b1a1aaa60361d697d4abd79687392d86a8c 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.10.18'
+__version__ = '2014.10.24'