Merge remote-tracking branch 'gabeos/crunchyroll-show-playlist'
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 26 Oct 2014 16:06:35 +0000 (17:06 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 26 Oct 2014 16:06:35 +0000 (17:06 +0100)
36 files changed:
README.md
test/helper.py
test/test_utils.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/cache.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/audiomack.py [new file with mode: 0644]
youtube_dl/extractor/bild.py [new file with mode: 0644]
youtube_dl/extractor/cinemassacre.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/glide.py [new file with mode: 0644]
youtube_dl/extractor/hark.py
youtube_dl/extractor/lrt.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/motherless.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/sportbox.py
youtube_dl/extractor/telecinco.py [new file with mode: 0644]
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/viddler.py
youtube_dl/extractor/vidzi.py [new file with mode: 0644]
youtube_dl/extractor/vrt.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/postprocessor/__init__.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

index 90ba928c3b7beb3de2a4ed7cc7fa09aef03c3d1c..d625464334a02b37dbe1706fcff4653d2006c130 100644 (file)
--- a/README.md
+++ b/README.md
@@ -69,6 +69,8 @@ which means you can modify it, redistribute it or use it however you like.
                                      configuration in ~/.config/youtube-dl.conf
                                      (%APPDATA%/youtube-dl/config.txt on
                                      Windows)
+    --flat-playlist                  Do not extract the videos of a playlist,
+                                     only list them.
 
 ## Video Selection:
     --playlist-start NUMBER          playlist video to start at (default is 1)
@@ -197,6 +199,10 @@ which means you can modify it, redistribute it or use it however you like.
     -j, --dump-json                  simulate, quiet but print JSON information.
                                      See --output for a description of available
                                      keys.
+    -J, --dump-single-json           simulate, quiet but print JSON information
+                                     for each command-line argument. If the URL
+                                     refers to a playlist, dump the whole
+                                     playlist information in a single line.
     --newline                        output progress bar as new lines
     --no-progress                    do not print progress bar
     --console-title                  display progress in console titlebar
index 62cb3ce0219ba46dadc1cc0c08891bf0941d2304..2fa45631adba0144bf0260117c33f944e53f66eb 100644 (file)
@@ -145,7 +145,7 @@ def expect_info_dict(self, expected_dict, got_dict):
         info_dict_str = ''.join(
             '    %s: %s,\n' % (_repr(k), _repr(v))
             for k, v in test_info_dict.items())
-        write_string('\n"info_dict": {' + info_dict_str + '}\n', out=sys.stderr)
+        write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr)
         self.assertFalse(
             missing_keys,
             'Missing keys in test definition: %s' % (
index bcca0efead42b85f39337a4c28f0d654447cd8e2..19f9fce20e0b39bad42de8555876b7804799aa19 100644 (file)
@@ -45,6 +45,9 @@ from youtube_dl.utils import (
     escape_rfc3986,
     escape_url,
     js_to_json,
+    get_filesystem_encoding,
+    compat_getenv,
+    compat_expanduser,
 )
 
 
@@ -355,5 +358,15 @@ class TestUtil(unittest.TestCase):
         on = js_to_json('{"abc": true}')
         self.assertEqual(json.loads(on), {'abc': True})
 
+    def test_compat_getenv(self):
+        test_str = 'тест'
+        os.environ['YOUTUBE-DL-TEST'] = test_str.encode(get_filesystem_encoding())
+        self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
+
+    def test_compat_expanduser(self):
+        test_str = 'C:\Documents and Settings\тест\Application Data'
+        os.environ['HOME'] = test_str.encode(get_filesystem_encoding())
+        self.assertEqual(compat_expanduser('~'), test_str)
+
 if __name__ == '__main__':
     unittest.main()
index dec0e20e7907d9fcf0110d6c992f14456336580d..28dcc0195b5d243a9f1e7672043c164bf6f20fea 100755 (executable)
@@ -24,6 +24,7 @@ if os.name == 'nt':
 
 from .utils import (
     compat_cookiejar,
+    compat_expanduser,
     compat_http_client,
     compat_str,
     compat_urllib_error,
@@ -61,7 +62,7 @@ from .utils import (
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
 from .downloader import get_suitable_downloader
-from .postprocessor import FFmpegMergerPP
+from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
 from .version import __version__
 
 
@@ -107,6 +108,8 @@ class YoutubeDL(object):
     forcefilename:     Force printing final filename.
     forceduration:     Force printing duration.
     forcejson:         Force printing info_dict as JSON.
+    dump_single_json:  Force printing the info_dict of the whole playlist
+                       (or video) as a single JSON line.
     simulate:          Do not download the video files.
     format:            Video format code.
     format_limit:      Highest quality format to try.
@@ -165,6 +168,8 @@ class YoutubeDL(object):
                        'auto' for elaborate guessing
     encoding:          Use this encoding instead of the system-specified.
     extract_flat:      Do not resolve URLs, return the immediate result.
+                       Pass in 'in_playlist' to only show this behavior for
+                       playlist items.
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
@@ -447,7 +452,7 @@ class YoutubeDL(object):
             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
-            tmpl = os.path.expanduser(outtmpl)
+            tmpl = compat_expanduser(outtmpl)
             filename = tmpl % template_dict
             return filename
         except ValueError as err:
@@ -568,8 +573,12 @@ class YoutubeDL(object):
 
         result_type = ie_result.get('_type', 'video')
 
-        if self.params.get('extract_flat', False):
-            if result_type in ('url', 'url_transparent'):
+        if result_type in ('url', 'url_transparent'):
+            extract_flat = self.params.get('extract_flat', False)
+            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
+                    extract_flat is True):
+                if self.params.get('forcejson', False):
+                    self.to_stdout(json.dumps(ie_result))
                 return ie_result
 
         if result_type == 'video':
@@ -897,6 +906,8 @@ class YoutubeDL(object):
         if self.params.get('forcejson', False):
             info_dict['_filename'] = filename
             self.to_stdout(json.dumps(info_dict))
+        if self.params.get('dump_single_json', False):
+            info_dict['_filename'] = filename
 
         # Do nothing else if in simulate mode
         if self.params.get('simulate', False):
@@ -1015,7 +1026,7 @@ class YoutubeDL(object):
                         downloaded = []
                         success = True
                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
-                        if not merger._get_executable():
+                        if not merger._executable:
                             postprocessors = []
                             self.report_warning('You have requested multiple '
                                 'formats but ffmpeg or avconv are not installed.'
@@ -1064,12 +1075,15 @@ class YoutubeDL(object):
         for url in url_list:
             try:
                 #It also downloads the videos
-                self.extract_info(url)
+                res = self.extract_info(url)
             except UnavailableVideoError:
                 self.report_error('unable to download video')
             except MaxDownloadsReached:
                 self.to_screen('[info] Maximum number of downloaded files reached.')
                 raise
+            else:
+                if self.params.get('dump_single_json', False):
+                    self.to_stdout(json.dumps(res))
 
         return self._download_retcode
 
@@ -1297,8 +1311,18 @@ class YoutubeDL(object):
                 sys.exc_clear()
             except:
                 pass
-        self._write_string('[debug] Python version %s - %s' %
-                     (platform.python_version(), platform_name()) + '\n')
+        self._write_string('[debug] Python version %s - %s\n' % (
+            platform.python_version(), platform_name()))
+
+        exe_versions = FFmpegPostProcessor.get_versions()
+        exe_str = ', '.join(
+            '%s %s' % (exe, v)
+            for exe, v in sorted(exe_versions.items())
+            if v
+        )
+        if not exe_str:
+            exe_str = 'none'
+        self._write_string('[debug] exe versions: %s\n' % exe_str)
 
         proxy_map = {}
         for handler in self._opener.handlers:
index 7f2b4dfcc60ddada121b7b662a61fc10c62de580..cb4f2e41c71de8ba3c4b60b18d12f4bbc388bd36 100644 (file)
@@ -79,6 +79,10 @@ __authors__  = (
     'Carlos Ramos',
     '5moufl',
     'lenaten',
+    'Dennis Scheiba',
+    'Damon Timm',
+    'winwon',
+    'Xavier Beynon'
 )
 
 __license__ = 'Public Domain'
@@ -94,6 +98,7 @@ from .options import (
     parseOpts,
 )
 from .utils import (
+    compat_expanduser,
     compat_getpass,
     compat_print,
     DateRange,
@@ -255,8 +260,6 @@ def _real_main(argv=None):
         date = DateRange.day(opts.date)
     else:
         date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
-        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
 
     # Do not download videos when there are audio-only formats
     if opts.extractaudio and not opts.keepvideo and opts.format is None:
@@ -284,8 +287,8 @@ def _real_main(argv=None):
                      u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
                      u' template'.format(outtmpl))
 
-    any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
-    download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+    any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+    download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
 
     ydl_opts = {
         'usenetrc': opts.usenetrc,
@@ -304,8 +307,9 @@ def _real_main(argv=None):
         'forcefilename': opts.getfilename,
         'forceformat': opts.getformat,
         'forcejson': opts.dumpjson,
-        'simulate': opts.simulate,
-        'skip_download': (opts.skip_download or opts.simulate or any_printing),
+        'dump_single_json': opts.dump_single_json,
+        'simulate': opts.simulate or any_printing,
+        'skip_download': opts.skip_download,
         'format': opts.format,
         'format_limit': opts.format_limit,
         'listformats': opts.listformats,
@@ -369,6 +373,7 @@ def _real_main(argv=None):
         'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
         'encoding': opts.encoding,
         'exec_cmd': opts.exec_cmd,
+        'extract_flat': opts.extract_flat,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
index 79ff09f7897c2987d0824becca088f2e4cd3a164..ac5925d32012d5dcac2e912d6d0ac3415560b447 100644 (file)
@@ -9,6 +9,7 @@ import shutil
 import traceback
 
 from .utils import (
+    compat_expanduser,
     write_json_file,
 )
 
@@ -22,7 +23,7 @@ class Cache(object):
         if res is None:
             cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache')
             res = os.path.join(cache_root, 'youtube-dl')
-        return os.path.expanduser(res)
+        return compat_expanduser(res)
 
     def _get_cache_fn(self, section, key, dtype):
         assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
index 0dd763006b9fcffdfa3812f8885794f48da60c0c..8e31de93dd5a8e0a7a51bc9ac65d3fc15dbf7a76 100644 (file)
@@ -20,12 +20,14 @@ from .arte import (
     ArteTVDDCIE,
     ArteTVEmbedIE,
 )
+from .audiomack import AudiomackIE
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
+from .bild import BildIE
 from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
@@ -137,6 +139,7 @@ from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
+from .glide import GlideIE
 from .globo import GloboIE
 from .godtube import GodTubeIE
 from .golem import GolemIE
@@ -370,6 +373,7 @@ from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
+from .telecinco import TelecincoIE
 from .telemb import TeleMBIE
 from .tenplay import TenPlayIE
 from .testurl import TestURLIE
@@ -424,6 +428,7 @@ from .videopremium import VideoPremiumIE
 from .videott import VideoTtIE
 from .videoweed import VideoWeedIE
 from .vidme import VidmeIE
+from .vidzi import VidziIE
 from .vimeo import (
     VimeoIE,
     VimeoAlbumIE,
@@ -443,6 +448,7 @@ from .viki import VikiIE
 from .vk import VKIE
 from .vodlocker import VodlockerIE
 from .vporn import VpornIE
+from .vrt import VRTIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
 from .vulture import VultureIE
@@ -492,10 +498,8 @@ from .youtube import (
     YoutubeUserIE,
     YoutubeWatchLaterIE,
 )
-
 from .zdf import ZDFIE
 
-
 _ALL_CLASSES = [
     klass
     for name, klass in globals().items()
index 3a34d1ecc67e590c568d10131b557b1d3022fe4d..b9a9440c09b85365a2997bd5feddbae017601c2d 100644 (file)
@@ -10,8 +10,8 @@ from ..utils import (
     unified_strdate,
     determine_ext,
     get_element_by_id,
-    compat_str,
     get_element_by_attribute,
+    int_or_none,
 )
 
 # There are different sources of video in arte.tv, the extraction process 
@@ -90,15 +90,24 @@ class ArteTVPlus7IE(InfoExtractor):
         if not upload_date_str:
             upload_date_str = player_info.get('VDA', '').split(' ')[0]
 
+        title = player_info['VTI'].strip()
+        subtitle = player_info.get('VSU', '').strip()
+        if subtitle:
+            title += ' - %s' % subtitle
+
         info_dict = {
             'id': player_info['VID'],
-            'title': player_info['VTI'],
+            'title': title,
             'description': player_info.get('VDE'),
             'upload_date': unified_strdate(upload_date_str),
             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
         }
 
-        all_formats = player_info['VSR'].values()
+        all_formats = []
+        for format_id, format_dict in player_info['VSR'].items():
+            fmt = dict(format_dict)
+            fmt['format_id'] = format_id
+            all_formats.append(fmt)
         # Some formats use the m3u8 protocol
         all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
         def _match_lang(f):
@@ -149,25 +158,12 @@ class ArteTVPlus7IE(InfoExtractor):
                 )
         formats = sorted(formats, key=sort_key)
         def _format(format_info):
-            quality = ''
-            height = format_info.get('height')
-            if height is not None:
-                quality = compat_str(height)
-            bitrate = format_info.get('bitrate')
-            if bitrate is not None:
-                quality += '-%d' % bitrate
-            if format_info.get('versionCode') is not None:
-                format_id = '%s-%s' % (quality, format_info['versionCode'])
-            else:
-                format_id = quality
-            media_type = format_info.get('mediaType')
-            if media_type is not None:
-                format_id += '-%s' % media_type
             info = {
-                'format_id': format_id,
-                'format_note': format_info.get('versionLibelle'),
-                'width': format_info.get('width'),
-                'height': height,
+                'format_id': format_info['format_id'],
+                'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
+                'width': int_or_none(format_info.get('width')),
+                'height': int_or_none(format_info.get('height')),
+                'tbr': int_or_none(format_info.get('bitrate')),
             }
             if format_info['mediaType'] == 'rtmp':
                 info['url'] = format_info['streamer']
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
new file mode 100644 (file)
index 0000000..57446fd
--- /dev/null
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .soundcloud import SoundcloudIE
+from ..utils import ExtractorError
+import datetime
+import time
+
+
+class AudiomackIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
+    IE_NAME = 'audiomack'
+    _TESTS = [
+        #hosted on audiomack
+        {
+            'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
+            'info_dict':
+            {
+                'id' : 'roosh-williams/extraordinary',
+                'ext': 'mp3',
+                'title': 'Roosh Williams - Extraordinary'
+            }
+        },
+        #hosted on soundcloud via audiomack
+        {
+            'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
+            'file': '172419696.mp3',
+            'info_dict':
+            {
+                'ext': 'mp3',
+                'title': 'Young Thug ft Lil Wayne - Take Kare',
+                "upload_date": "20141016",
+                "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n",
+                "uploader": "Young Thug World"
+            }
+        }
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        api_response = self._download_json(
+            "http://www.audiomack.com/api/music/url/song/%s?_=%d" % (
+                video_id, time.time()),
+            video_id)
+
+        if "url" not in api_response:
+            raise ExtractorError("Unable to deduce api url of song")
+        realurl = api_response["url"]
+
+        #Audiomack wraps a lot of soundcloud tracks in their branded wrapper
+        # - if so, pass the work off to the soundcloud extractor
+        if SoundcloudIE.suitable(realurl):
+            return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
+
+        webpage = self._download_webpage(url, video_id)
+        artist = self._html_search_regex(
+            r'<span class="artist">(.*?)</span>', webpage, "artist")
+        songtitle = self._html_search_regex(
+            r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
+            webpage, "title")
+        title = artist + " - " + songtitle
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': realurl,
+        }
diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py
new file mode 100644 (file)
index 0000000..0269d11
--- /dev/null
@@ -0,0 +1,39 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class BildIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
+    IE_DESC = 'Bild.de'
+    _TEST = {
+        'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
+        'md5': 'dd495cbd99f2413502a1713a1156ac8a',
+        'info_dict': {
+            'id': '38184146',
+            'ext': 'mp4',
+            'title': 'BILD hat sie getestet',
+            'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',
+            'duration': 196,
+            'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml"
+        doc = self._download_xml(xml_url, video_id)
+
+        duration = int_or_none(doc.attrib.get('duration'), scale=1000)
+
+        return {
+            'id': video_id,
+            'title': doc.attrib['ueberschrift'],
+            'description': doc.attrib.get('text'),
+            'url': doc.attrib['src'],
+            'thumbnail': doc.attrib.get('img'),
+            'duration': duration,
+        }
index 496271be4e5f7170ad3d814ec5e2c0b99d15538d..d064a28f97920933f30cc11ec323858d5c5ee5f0 100644 (file)
@@ -42,7 +42,7 @@ class CinemassacreIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
+        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
         if not mobj:
             raise ExtractorError('Can\'t extract embed url and video id')
         playerdata_url = mobj.group('embed_url')
@@ -53,17 +53,22 @@ class CinemassacreIE(InfoExtractor):
         video_description = self._html_search_regex(
             r'<div class="entry-content">(?P<description>.+?)</div>',
             webpage, 'description', flags=re.DOTALL, fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
 
         playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
-        video_thumbnail = self._search_regex(
-            r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
-        sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
-        videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
 
+        vidurl = self._search_regex(
+            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
+        vidid = self._search_regex(
+            r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid')
+        videoserver = self._html_search_regex(
+            r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver')
+
+        videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
         videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
 
         formats = []
-        baseurl = sd_url[:sd_url.rfind('/')+1]
+        baseurl = vidurl[:vidurl.rfind('/')+1]
         for video in videolist.findall('.//video'):
             src = video.get('src')
             if not src:
index e8366f7f91c663f1f7bdf70db0588016f49da3de..e1bd6bb49303c5c9ecb959eba5d3a84a3f813da3 100644 (file)
@@ -89,6 +89,10 @@ class InfoExtractor(object):
                                  format, irrespective of the file format.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
+                    * source_preference  Order number for this video source
+                                  (quality takes higher priority)
+                                 -1 for default (order by other properties),
+                                 -2 or smaller for less than default.
                     * http_referer  HTTP Referer header value to set.
                     * http_method  HTTP method to use for the download.
                     * http_headers  A dictionary of additional HTTP headers
@@ -238,7 +242,6 @@ class InfoExtractor(object):
 
     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
         """ Returns a tuple (page content as string, URL handle) """
-
         # Strip hashes from the URL (#1038)
         if isinstance(url_or_request, (compat_str, str)):
             url_or_request = url_or_request.partition('#')[0]
@@ -247,6 +250,10 @@ class InfoExtractor(object):
         if urlh is False:
             assert not fatal
             return False
+        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+        return (content, urlh)
+
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
         content_type = urlh.headers.get('Content-Type', '')
         webpage_bytes = urlh.read()
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
@@ -305,7 +312,7 @@ class InfoExtractor(object):
                 msg += ' Visit %s for more details' % blocked_iframe
             raise ExtractorError(msg, expected=True)
 
-        return (content, urlh)
+        return content
 
     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
         """ Returns the data of the page as a string """
@@ -613,12 +620,13 @@ class InfoExtractor(object):
                 audio_ext_preference,
                 f.get('filesize') if f.get('filesize') is not None else -1,
                 f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+                f.get('source_preference') if f.get('source_preference') is not None else -1,
                 f.get('format_id'),
             )
         formats.sort(key=_formats_key)
 
     def http_scheme(self):
-        """ Either "https:" or "https:", depending on the user's preferences """
+        """ Either "http:" or "https:", depending on the user's preferences """
         return (
             'http:'
             if self._downloader.params.get('prefer_insecure', False)
index 9ac86c2be786cddc6e655a6178ba2346fc0b822d..2dca5266000db8ed558f149142e3b235e267a910 100644 (file)
@@ -40,6 +40,7 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
             'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
             'uploader': 'Yomiuri Telecasting Corporation (YTV)',
             'upload_date': '20131013',
+            'url': 're:(?!.*&amp)',
         },
         'params': {
             # rtmp
@@ -238,12 +239,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
             streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
             streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
-            streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
-            video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
-            video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
+            streamdata = self._download_xml(
+                streamdata_req, video_id,
+                note='Downloading media info for %s' % video_format)
+            video_url = streamdata.find('.//host').text
+            video_play_path = streamdata.find('.//file').text
             formats.append({
                 'url': video_url,
-                'play_path':   video_play_path,
+                'play_path': video_play_path,
                 'ext': 'flv',
                 'format': video_format,
                 'format_id': video_format,
index 0b3374d97d7c72a559afc1ed6906549c092491d9..566e20d76fbad33c7879b31027da5f956cd33bbb 100644 (file)
@@ -46,7 +46,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                         f4m_format['preference'] = 1
                     formats.extend(f4m_formats)
             elif video_url.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(video_url, video_id))
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
             elif video_url.startswith('rtmp'):
                 formats.append({
                     'url': video_url,
@@ -58,7 +58,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                 formats.append({
                     'url': video_url,
                     'format_id': format_id,
-                    'preference': 2,
+                    'preference': -1,
                 })
         self._sort_formats(formats)
 
index d966e8403dfe9e03765d6a2eb0ab895a0da4100a..ec6d96adaeff666bf0fea7fe78e766c6a6ac2808 100644 (file)
@@ -37,7 +37,7 @@ class FunnyOrDieIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
-        links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage)
+        links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)
         if not links:
             raise ExtractorError('No media links available for %s' % video_id)
 
index 9057a6beb97a0d0cc2fca33d1d0ccc30c07b8101..51dbbc8db13fb680822eac81b526e7397d18b6d2 100644 (file)
@@ -380,6 +380,17 @@ class GenericIE(InfoExtractor):
                 'uploader': 'education-portal.com',
             },
         },
+        {
+            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
+            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
+            'info_dict': {
+                'id': 'uxjb0lwrcz',
+                'ext': 'mp4',
+                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
+                'duration': 1715.0,
+                'uploader': 'thoughtworks.wistia.com',
+            },   
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -476,7 +487,8 @@ class GenericIE(InfoExtractor):
                      'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
                     ) % (url, url), expected=True)
             else:
-                assert ':' in default_search
+                if ':' not in default_search:
+                    default_search += ':'
                 return self.url_result(default_search + url)
 
         url, smuggled_data = unsmuggle_url(url)
@@ -491,14 +503,14 @@ class GenericIE(InfoExtractor):
         self.to_screen('%s: Requesting header' % video_id)
 
         head_req = HEADRequest(url)
-        response = self._request_webpage(
+        head_response = self._request_webpage(
             head_req, video_id,
             note=False, errnote='Could not send HEAD request to %s' % url,
             fatal=False)
 
-        if response is not False:
+        if head_response is not False:
             # Check for redirect
-            new_url = response.geturl()
+            new_url = head_response.geturl()
             if url != new_url:
                 self.report_following_redirect(new_url)
                 if force_videoid:
@@ -506,34 +518,35 @@ class GenericIE(InfoExtractor):
                         new_url, {'force_videoid': force_videoid})
                 return self.url_result(new_url)
 
-            # Check for direct link to a video
-            content_type = response.headers.get('Content-Type', '')
-            m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
-            if m:
-                upload_date = response.headers.get('Last-Modified')
-                if upload_date:
-                    upload_date = unified_strdate(upload_date)
-                return {
-                    'id': video_id,
-                    'title': os.path.splitext(url_basename(url))[0],
-                    'formats': [{
-                        'format_id': m.group('format_id'),
-                        'url': url,
-                        'vcodec': 'none' if m.group('type') == 'audio' else None
-                    }],
-                    'upload_date': upload_date,
-                }
+        full_response = None
+        if head_response is False:
+            full_response = self._request_webpage(url, video_id)
+            head_response = full_response
+
+        # Check for direct link to a video
+        content_type = head_response.headers.get('Content-Type', '')
+        m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
+        if m:
+            upload_date = unified_strdate(
+                head_response.headers.get('Last-Modified'))
+            return {
+                'id': video_id,
+                'title': os.path.splitext(url_basename(url))[0],
+                'formats': [{
+                    'format_id': m.group('format_id'),
+                    'url': url,
+                    'vcodec': 'none' if m.group('type') == 'audio' else None
+                }],
+                'upload_date': upload_date,
+            }
 
         if not self._downloader.params.get('test', False) and not is_intentional:
             self._downloader.report_warning('Falling back on generic information extractor.')
 
-        try:
+        if full_response:
+            webpage = _webpage_read_content(url, video_id)
+        else:
             webpage = self._download_webpage(url, video_id)
-        except ValueError:
-            # since this is the last-resort InfoExtractor, if
-            # this error is thrown, it'll be thrown here
-            raise ExtractorError('Failed to download URL: %s' % url)
-
         self.report_extraction(video_id)
 
         # Is it an RSS feed?
@@ -623,7 +636,8 @@ class GenericIE(InfoExtractor):
                 <iframe[^>]+?src=|
                 data-video-url=|
                 <embed[^>]+?src=|
-                embedSWF\(?:\s*
+                embedSWF\(?:\s*|
+                new\s+SWFObject\(
             )
             (["\'])
                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
@@ -652,7 +666,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded Wistia player
         match = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
         if match:
             embed_url = self._proto_relative_url(
                 unescapeHTML(match.group('url')))
@@ -664,6 +678,7 @@ class GenericIE(InfoExtractor):
                 'title': video_title,
                 'id': video_id,
             }
+            
         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
         if match:
             return {
diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py
new file mode 100644 (file)
index 0000000..9561ed5
--- /dev/null
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class GlideIE(InfoExtractor):
+    IE_DESC = 'Glide mobile video messages (glide.me)'
+    _VALID_URL = r'https?://share\.glide\.me/(?P<id>[A-Za-z0-9\-=_+]+)'
+    _TEST = {
+        'url': 'http://share.glide.me/UZF8zlmuQbe4mr+7dCiQ0w==',
+        'md5': '4466372687352851af2d131cfaa8a4c7',
+        'info_dict': {
+            'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
+            'ext': 'mp4',
+            'title': 'Damon Timm\'s Glide message',
+            'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(
+            r'<title>(.*?)</title>', webpage, 'title')
+        video_url = self.http_scheme() + self._search_regex(
+            r'<source src="(.*?)" type="video/mp4">', webpage, 'video URL')
+        thumbnail_url = self._search_regex(
+            r'<img id="video-thumbnail" src="(.*?)"',
+            webpage, 'thumbnail url', fatal=False)
+        thumbnail = (
+            thumbnail_url if thumbnail_url is None
+            else self.http_scheme() + thumbnail_url)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'thumbnail': thumbnail,
+        }
index 5bdd08afabd27474fc2f5b1ed2afb371efdf8d9d..b6cc15b6fbad25c43fe0699668bd3ec452ed944d 100644 (file)
@@ -1,37 +1,33 @@
 # -*- coding: utf-8 -*-
-
-import re
-import json
+from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import determine_ext
+
 
 class HarkIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
+    _VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+'
     _TEST = {
-        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
-        u'file': u'mmbzyhkgny.mp3',
-        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
-        u'info_dict': {
-            u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
-            u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
-            u'duration': 11,
+        'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
+        'md5': '6783a58491b47b92c7c1af5a77d4cbee',
+        'info_dict': {
+            'id': 'mmbzyhkgny',
+            'ext': 'mp3',
+            'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013',
+            'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
+            'duration': 11,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
-        json_url = "http://www.hark.com/clips/%s.json" %(video_id)
-        info_json = self._download_webpage(json_url, video_id)
-        info = json.loads(info_json)
-        final_url = info['url']
+        video_id = self._match_id(url)
+        data = self._download_json(
+            'http://www.hark.com/clips/%s.json' % video_id, video_id)
 
-        return {'id': video_id,
-                'url' : final_url,
-                'title': info['name'],
-                'ext': determine_ext(final_url),
-                'description': info['description'],
-                'thumbnail': info['image_original'],
-                'duration': info['duration'],
-                }
+        return {
+            'id': video_id,
+            'url': data['url'],
+            'title': data['name'],
+            'description': data.get('description'),
+            'thumbnail': data.get('image_original'),
+            'duration': data.get('duration'),
+        }
index fca0bfef0726b472e484d7be2fd890fb5ce50f8c..db5df40789c5167e1062364a860fa9bc1d369218 100644 (file)
@@ -22,7 +22,7 @@ class LRTIE(InfoExtractor):
             'id': '54391',
             'ext': 'mp4',
             'title': 'Septynios Kauno dienos',
-            'description': 'Kauno miesto ir apskrities naujienos',
+            'description': 'md5:24d84534c7dc76581e59f5689462411a',
             'duration': 1783,
         },
         'params': {
index 979f3d692a0707fdf2a6a6617b75581e047679dd..6691521e58435682a74af87559ce1d1fd9046fbf 100644 (file)
@@ -6,6 +6,7 @@ import json
 from .common import InfoExtractor
 from ..utils import (
     compat_urllib_parse,
+    compat_urlparse,
     get_element_by_attribute,
     parse_duration,
     strip_jsonp,
@@ -39,13 +40,21 @@ class MiTeleIE(InfoExtractor):
         ).replace('\'', '"')
         embed_data = json.loads(embed_data_json)
 
-        info_url = embed_data['flashvars']['host']
+        domain = embed_data['mediaUrl']
+        if not domain.startswith('http'):
+            # only happens in telecinco.es videos
+            domain = 'http://' + domain
+        info_url = compat_urlparse.urljoin(
+            domain,
+            compat_urllib_parse.unquote(embed_data['flashvars']['host'])
+        )
         info_el = self._download_xml(info_url, episode).find('./video/info')
 
         video_link = info_el.find('videoUrl/link').text
         token_query = compat_urllib_parse.urlencode({'id': video_link})
         token_info = self._download_json(
-            'http://token.mitele.es/?' + token_query, episode,
+            embed_data['flashvars']['ov_tk'] + '?' + token_query,
+            episode,
             transform_source=strip_jsonp
         )
 
index 6229b21732b70525b832ab2f3370594736cae8df..3621ff99e76da1bffabda1a81f6181fd4a6ed61c 100644 (file)
@@ -5,20 +5,20 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    int_or_none,
+    str_to_int,
     unified_strdate,
 )
 
 
 class MotherlessIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)'
+    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
     _TESTS = [
         {
             'url': 'http://motherless.com/AC3FFE1',
-            'md5': '5527fef81d2e529215dad3c2d744a7d9',
+            'md5': '310f62e325a9fafe64f68c0bccb6e75f',
             'info_dict': {
                 'id': 'AC3FFE1',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'Fucked in the ass while playing PS3',
                 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
                 'upload_date': '20100913',
@@ -40,33 +40,51 @@ class MotherlessIE(InfoExtractor):
                 'thumbnail': 're:http://.*\.jpg',
                 'age_limit': 18,
             }
+        },
+        {
+            'url': 'http://motherless.com/g/cosplay/633979F',
+            'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
+            'info_dict': {
+                'id': '633979F',
+                'ext': 'mp4',
+                'title': 'Turtlette',
+                'categories': ['superheroine heroine  superher'],
+                'upload_date': '20140827',
+                'uploader_id': 'shade0230',
+                'thumbnail': 're:http://.*\.jpg',
+                'age_limit': 18,
+            }
         }
     ]
 
-    def _real_extract(self,url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
-        
-        video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url')
+        title = self._html_search_regex(
+            r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
+        video_url = self._html_search_regex(
+            r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')
         age_limit = self._rta_search(webpage)
-
-        view_count = self._html_search_regex(r'<strong>Views</strong>\s+([^<]+)<', webpage, 'view_count')
+        view_count = str_to_int(self._html_search_regex(
+            r'<strong>Views</strong>\s+([^<]+)<',
+            webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._html_search_regex(
+            r'<strong>Favorited</strong>\s+([^<]+)<',
+            webpage, 'like count', fatal=False))
  
-        upload_date = self._html_search_regex(r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload_date')
+        upload_date = self._html_search_regex(
+            r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
         if 'Ago' in upload_date:
             days = int(re.search(r'([0-9]+)', upload_date).group(1))
             upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
         else:
             upload_date = unified_strdate(upload_date)
 
-        like_count = self._html_search_regex(r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like_count')
-
         comment_count = webpage.count('class="media-comment-contents"')
-        uploader_id = self._html_search_regex(r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id')
+        uploader_id = self._html_search_regex(
+            r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
+            webpage, 'uploader_id')
 
         categories = self._html_search_meta('keywords', webpage)
         if categories:
@@ -79,8 +97,8 @@ class MotherlessIE(InfoExtractor):
             'uploader_id': uploader_id,
             'thumbnail': self._og_search_thumbnail(webpage),
             'categories': categories,
-            'view_count': int_or_none(view_count.replace(',', '')),
-            'like_count': int_or_none(like_count.replace(',', '')),
+            'view_count': view_count,
+            'like_count': like_count,
             'comment_count': comment_count,
             'age_limit': age_limit,
             'url': video_url,
index 072d9cf8e49438f5688a9eef6ef43d0efd0cf8c2..d66c2c6f833852a0143cda250f724222c69207d5 100644 (file)
@@ -22,21 +22,23 @@ class NHLBaseInfoExtractor(InfoExtractor):
         self.report_extraction(video_id)
 
         initial_video_url = info['publishPoint']
-        data = compat_urllib_parse.urlencode({
-            'type': 'fvod',
-            'path': initial_video_url.replace('.mp4', '_sd.mp4'),
-        })
-        path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
-        path_doc = self._download_xml(
-            path_url, video_id, 'Downloading final video url')
-        video_url = path_doc.find('path').text
+        if info['formats'] == '1':
+            data = compat_urllib_parse.urlencode({
+                'type': 'fvod',
+                'path': initial_video_url.replace('.mp4', '_sd.mp4'),
+            })
+            path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
+            path_doc = self._download_xml(
+                path_url, video_id, 'Downloading final video url')
+            video_url = path_doc.find('path').text
+        else:
+           video_url = initial_video_url
 
         join = compat_urlparse.urljoin
         return {
             'id': video_id,
             'title': info['name'],
             'url': video_url,
-            'ext': determine_ext(video_url),
             'description': info['description'],
             'duration': int(info['duration']),
             'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
@@ -46,10 +48,11 @@ class NHLBaseInfoExtractor(InfoExtractor):
 
 class NHLIE(NHLBaseInfoExtractor):
     IE_NAME = 'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9a-z-]+)'
 
     _TESTS = [{
         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
+        'md5': 'db704a4ea09e8d3988c85e36cc892d09',
         'info_dict': {
             'id': '453614',
             'ext': 'mp4',
@@ -58,6 +61,17 @@ class NHLIE(NHLBaseInfoExtractor):
             'duration': 18,
             'upload_date': '20131006',
         },
+    }, {
+        'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
+        'md5': 'd22e82bc592f52d37d24b03531ee9696',
+        'info_dict': {
+            'id': '2014020024-628-h',
+            'ext': 'mp4',
+            'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
+            'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
+            'duration': 0,
+            'upload_date': '20141011',
+        },
     }, {
         'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
         'only_matching': True,
index 8f140d62660b896f5a6f819d621a762d13fbdb69..6118ed5c2021492ee91e22dccd642d564918604c 100644 (file)
@@ -80,8 +80,14 @@ class PBSIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'upload_date': '20140122',
             }
+        },
+        {
+            'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
+            'info_dict': {
+                'id': 'united-states-of-secrets',
+            },
+            'playlist_count': 2,
         }
-
     ]
 
     def _extract_webpage(self, url):
@@ -96,6 +102,12 @@ class PBSIE(InfoExtractor):
                 r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
                 webpage, 'upload date', default=None))
 
+            # tabbed frontline videos
+            tabbed_videos = re.findall(
+                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
+            if tabbed_videos:
+                return tabbed_videos, presumptive_id, upload_date
+
             MEDIA_ID_REGEXES = [
                 r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed
                 r'class="coveplayerid">([^<]+)<',                       # coveplayer
@@ -130,6 +142,12 @@ class PBSIE(InfoExtractor):
     def _real_extract(self, url):
         video_id, display_id, upload_date = self._extract_webpage(url)
 
+        if isinstance(video_id, list):
+            entries = [self.url_result(
+                'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id)
+                for vid_id in video_id]
+            return self.playlist_result(entries, display_id)
+
         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
         info = self._download_json(info_url, display_id)
 
index 4719ba45c5f754338f11da9a038c506ad001023c..c77671fd38c0e69ba6d362296f803489fbf0d15e 100644 (file)
@@ -40,14 +40,15 @@ class SoundcloudIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
-            'file': '62986583.mp3',
             'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
             'info_dict': {
-                "upload_date": "20121011",
-                "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
-                "uploader": "E.T. ExTerrestrial Music",
-                "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1",
-                "duration": 143,
+                'id': '62986583',
+                'ext': 'mp3',
+                'upload_date': '20121011',
+                'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
+                'uploader': 'E.T. ExTerrestrial Music',
+                'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
+                'duration': 143,
             }
         },
         # not streamable song
@@ -103,7 +104,7 @@ class SoundcloudIE(InfoExtractor):
                 'id': '128590877',
                 'ext': 'mp3',
                 'title': 'Bus Brakes',
-                'description': 'md5:0170be75dd395c96025d210d261c784e',
+                'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
                 'uploader': 'oddsamples',
                 'upload_date': '20140109',
                 'duration': 17,
index 19cc976e3d3c9c7cff875f26e63e25a827fb8073..b9cd35109158a31fa238c5d07a942008b36c0750 100644 (file)
@@ -26,7 +26,6 @@ class SportBoxIE(InfoExtractor):
                 'timestamp': 1411896237,
                 'upload_date': '20140928',
                 'duration': 4846,
-                'view_count': int,
             },
             'params': {
                 # m3u8 download
@@ -65,8 +64,6 @@ class SportBoxIE(InfoExtractor):
             r'<span itemprop="uploadDate">([^<]+)</span>', webpage, 'timestamp', fatal=False))
         duration = parse_duration(self._html_search_regex(
             r'<meta itemprop="duration" content="PT([^"]+)">', webpage, 'duration', fatal=False))
-        view_count = int_or_none(self._html_search_regex(
-            r'<span>Просмотров: (\d+)</span>', player, 'view count', fatal=False))
 
         return {
             'id': video_id,
@@ -76,6 +73,5 @@ class SportBoxIE(InfoExtractor):
             'thumbnail': thumbnail,
             'timestamp': timestamp,
             'duration': duration,
-            'view_count': view_count,
             'formats': formats,
         }
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py
new file mode 100644 (file)
index 0000000..db9788c
--- /dev/null
@@ -0,0 +1,19 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .mitele import MiTeleIE
+
+
+class TelecincoIE(MiTeleIE):
+    IE_NAME = 'telecinco.es'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html'
+
+    _TEST = {
+        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
+        'info_dict': {
+            'id': 'MDSVID20141015_0058',
+            'ext': 'mp4',
+            'title': 'Con Martín Berasategui, hacer un bacalao al ...',
+            'duration': 662,
+        },
+    }
index 306fe89741cce8b3c281c94349be266f221028b3..40c53ff17ec1e6eb8a05bcf683d55d00f2455101 100644 (file)
@@ -4,9 +4,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
 
 
 class TumblrIE(InfoExtractor):
@@ -18,7 +15,7 @@ class TumblrIE(InfoExtractor):
             'id': '54196191430',
             'ext': 'mp4',
             'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
-            'description': 'md5:dfac39636969fe6bf1caa2d50405f069',
+            'description': 'md5:37db8211e40b50c7c44e95da14f630b7',
             'thumbnail': 're:http://.*\.jpg',
         }
     }, {
@@ -27,7 +24,7 @@ class TumblrIE(InfoExtractor):
         'info_dict': {
             'id': '90208453769',
             'ext': 'mp4',
-            'title': '5SOS STRUM ;)',
+            'title': '5SOS STRUM ;]',
             'description': 'md5:dba62ac8639482759c8eb10ce474586a',
             'thumbnail': 're:http://.*\.jpg',
         }
@@ -41,18 +38,12 @@ class TumblrIE(InfoExtractor):
         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
         webpage = self._download_webpage(url, video_id)
 
-        re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
-        video = re.search(re_video, webpage)
-        if video is None:
-            raise ExtractorError('Unable to extract video')
-        video_url = video.group('video_url')
-        ext = video.group('ext')
-
-        video_thumbnail = self._search_regex(
-            r'posters.*?\[\\x22(.*?)\\x22',
-            webpage, 'thumbnail', fatal=False)  # We pick the first poster
-        if video_thumbnail:
-            video_thumbnail = video_thumbnail.replace('\\\\/', '/')
+        iframe_url = self._search_regex(
+            r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
+            webpage, 'iframe url')
+        iframe = self._download_webpage(iframe_url, video_id)
+        video_url = self._search_regex(r'<source src="([^"]+)"',
+            iframe, 'video url')
 
         # The only place where you can get a title, it's not complete,
         # but searching in other places doesn't work for all videos
@@ -62,9 +53,9 @@ class TumblrIE(InfoExtractor):
 
         return {
             'id': video_id,
-             'url': video_url,
-             'title': video_title,
-             'description': self._html_search_meta('description', webpage),
-             'thumbnail': video_thumbnail,
-             'ext': ext,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': video_title,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
         }
index 9328ef4a2121f091c256e9324d0de0e8b7dcbecd..0faa729c60f916d69b885cfc76580104b226f84b 100644 (file)
@@ -1,55 +1,85 @@
-import json
-import re
+from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    int_or_none,
+)
 
 
 class ViddlerIE(InfoExtractor):
-    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
     _TEST = {
-        u"url": u"http://www.viddler.com/v/43903784",
-        u'file': u'43903784.mp4',
-        u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
-        u'info_dict': {
-            u"title": u"Video Made Easy",
-            u"uploader": u"viddler",
-            u"duration": 100.89,
+        "url": "http://www.viddler.com/v/43903784",
+        'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
+        'info_dict': {
+            'id': '43903784',
+            'ext': 'mp4',
+            "title": "Video Made Easy",
+            'description': 'You don\'t need to be a professional to make high-quality video content. Viddler provides some quick and easy tips on how to produce great video content with limited resources. ',
+            "uploader": "viddler",
+            'timestamp': 1335371429,
+            'upload_date': '20120425',
+            "duration": 100.89,
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'view_count': int,
+            'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        embed_url = mobj.group('domain') + u'/embed/' + video_id
-        webpage = self._download_webpage(embed_url, video_id)
-
-        video_sources_code = self._search_regex(
-            r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
-        video_sources = json.loads(video_sources_code.replace("'", '"'))
-
-        formats = [{
-            'url': video_url,
-            'format': format_id,
-        } for video_url, format_id in video_sources.items()]
-
-        title = self._html_search_regex(
-            r"title\s*:\s*'([^']*)'", webpage, u'title')
-        uploader = self._html_search_regex(
-            r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
-        duration_s = self._html_search_regex(
-            r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
-        duration = float(duration_s) if duration_s else None
-        thumbnail = self._html_search_regex(
-            r"thumbnail\s*:\s*'([^']*)'",
-            webpage, u'thumbnail', fatal=False)
+        video_id = self._match_id(url)
+
+        json_url = (
+            'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
+            video_id)
+        data = self._download_json(json_url, video_id)['video']
+
+        formats = []
+        for filed in data['files']:
+            if filed.get('status', 'ready') != 'ready':
+                continue
+            f = {
+                'format_id': filed['profile_id'],
+                'format_note': filed['profile_name'],
+                'url': self._proto_relative_url(filed['url']),
+                'width': int_or_none(filed.get('width')),
+                'height': int_or_none(filed.get('height')),
+                'filesize': int_or_none(filed.get('size')),
+                'ext': filed.get('ext'),
+                'source_preference': -1,
+            }
+            formats.append(f)
+
+            if filed.get('cdn_url'):
+                f = f.copy()
+                f['url'] = self._proto_relative_url(filed['cdn_url'])
+                f['format_id'] = filed['profile_id'] + '-cdn'
+                f['source_preference'] = 1
+                formats.append(f)
+
+            if filed.get('html5_video_source'):
+                f = f.copy()
+                f['url'] = self._proto_relative_url(
+                    filed['html5_video_source'])
+                f['format_id'] = filed['profile_id'] + '-html5'
+                f['source_preference'] = 0
+                formats.append(f)
+        self._sort_formats(formats)
+
+        categories = [
+            t.get('text') for t in data.get('tags', []) if 'text' in t]
 
         return {
             '_type': 'video',
             'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'uploader': uploader,
-            'duration': duration,
+            'title': data['title'],
             'formats': formats,
+            'description': data.get('description'),
+            'timestamp': int_or_none(data.get('upload_time')),
+            'thumbnail': self._proto_relative_url(data.get('thumbnail_url')),
+            'uploader': data.get('author'),
+            'duration': float_or_none(data.get('length')),
+            'view_count': int_or_none(data.get('view_count')),
+            'categories': categories,
         }
diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py
new file mode 100644 (file)
index 0000000..669979e
--- /dev/null
@@ -0,0 +1,33 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class VidziIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
+    _TEST = {
+        'url': 'http://vidzi.tv/cghql9yq6emu.html',
+        'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
+        'info_dict': {
+            'id': 'cghql9yq6emu',
+            'ext': 'mp4',
+            'title': 'youtube-dl test video  1\\\\2\'3/4<5\\\\6ä7↭',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        
+        webpage = self._download_webpage(url, video_id)
+        video_url = self._html_search_regex(
+            r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url')
+        title = self._html_search_regex(
+            r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
+        
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+        }
+        
\ No newline at end of file
diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py
new file mode 100644 (file)
index 0000000..57ef8dc
--- /dev/null
@@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class VRTIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:deredactie|sporza|cobra)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
+    _TESTS = [
+        # deredactie.be
+        {
+            'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL',
+            'md5': '4cebde1eb60a53782d4f3992cbd46ec8',
+            'info_dict': {
+                'id': '2129880',
+                'ext': 'flv',
+                'title': 'Het journaal L - 25/10/14',
+                'description': None,
+                'timestamp': 1414271750.949,
+                'upload_date': '20141025',
+                'duration': 929,
+            }
+        },
+        # sporza.be
+        {
+            'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time',
+            'md5': '11f53088da9bf8e7cfc42456697953ff',
+            'info_dict': {
+                'id': '2124639',
+                'ext': 'flv',
+                'title': 'Bekijk Extra Time van 20 oktober',
+                'description': 'md5:83ac5415a4f1816c6a93f8138aef2426',
+                'timestamp': 1413835980.560,
+                'upload_date': '20141020',
+                'duration': 3238,
+            }  
+        },
+        # cobra.be
+        {
+            'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari',
+            'md5': '78a2b060a5083c4f055449a72477409d',
+            'info_dict': {
+                'id': '2126050',
+                'ext': 'flv',
+                'title': 'Bret Easton Ellis in Café Corsari',
+                'description': 'md5:f699986e823f32fd6036c1855a724ee9',
+                'timestamp': 1413967500.494,
+                'upload_date': '20141022',
+                'duration': 661,
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_id = self._search_regex(
+            r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False)
+
+        formats = []
+        mobj = re.search(
+            r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"',
+            webpage)
+        if mobj:
+            formats.extend(self._extract_m3u8_formats(
+                '%s/%s' % (mobj.group('server'), mobj.group('path')),
+                video_id, 'mp4'))
+        mobj = re.search(r'data-video-src="(?P<src>[^"]+)"', webpage)
+        if mobj:
+            formats.extend(self._extract_f4m_formats(
+                '%s/manifest.f4m' % mobj.group('src'), video_id))
+        self._sort_formats(formats)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage, default=None)
+        thumbnail = self._og_search_thumbnail(webpage)
+        timestamp = float_or_none(self._search_regex(
+            r'data-video-sitestat-pubdate="(\d+)"', webpage, 'timestamp', fatal=False), 1000)
+        duration = float_or_none(self._search_regex(
+            r'data-video-duration="(\d+)"', webpage, 'duration', fatal=False), 1000)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'formats': formats,
+        }
\ No newline at end of file
index cfae2de8990ff7f6ae1f7ff37fdc0b559db3d290..4ab56e0ac6baf7f59f1c8892b5dbe560d96cb195 100644 (file)
@@ -191,8 +191,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     def _real_initialize(self):
         if self._downloader is None:
             return
-        if not self._set_language():
-            return
+        if self._get_login_info()[0] is not None:
+            if not self._set_language():
+                return
         if not self._login():
             return
         self._confirm_age()
index 649361bde3a9eee3bd1222393b069ffb8ac0976d..98e20d5494b2a0285e98e97b26c828af857dc6ae 100644 (file)
@@ -6,6 +6,8 @@ import shlex
 import sys
 
 from .utils import (
+    compat_expanduser,
+    compat_getenv,
     get_term_width,
     write_string,
 )
@@ -27,19 +29,19 @@ def parseOpts(overrideArguments=None):
         return res
 
     def _readUserConf():
-        xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
+        xdg_config_home = compat_getenv('XDG_CONFIG_HOME')
         if xdg_config_home:
             userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
             if not os.path.isfile(userConfFile):
                 userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
         else:
-            userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
+            userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl', 'config')
             if not os.path.isfile(userConfFile):
-                userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
+                userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl.conf')
         userConf = _readOptions(userConfFile, None)
 
         if userConf is None:
-            appdata_dir = os.environ.get('appdata')
+            appdata_dir = compat_getenv('appdata')
             if appdata_dir:
                 userConf = _readOptions(
                     os.path.join(appdata_dir, 'youtube-dl', 'config'),
@@ -51,11 +53,11 @@ def parseOpts(overrideArguments=None):
 
         if userConf is None:
             userConf = _readOptions(
-                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
+                os.path.join(compat_expanduser('~'), 'youtube-dl.conf'),
                 default=None)
         if userConf is None:
             userConf = _readOptions(
-                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
+                os.path.join(compat_expanduser('~'), 'youtube-dl.conf.txt'),
                 default=None)
 
         if userConf is None:
@@ -159,6 +161,11 @@ def parseOpts(overrideArguments=None):
         '--ignore-config',
         action='store_true',
         help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+    general.add_option(
+        '--flat-playlist',
+        action='store_const', dest='extract_flat', const='in_playlist',
+        default=False,
+        help='Do not extract the videos of a playlist, only list them.')
 
     selection = optparse.OptionGroup(parser, 'Video Selection')
     selection.add_option(
@@ -412,6 +419,10 @@ def parseOpts(overrideArguments=None):
         '-j', '--dump-json',
         action='store_true', dest='dumpjson', default=False,
         help='simulate, quiet but print JSON information. See --output for a description of available keys.')
+    verbosity.add_option(
+        '-J', '--dump-single-json',
+        action='store_true', dest='dump_single_json', default=False,
+        help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
     verbosity.add_option(
         '--newline',
         action='store_true', dest='progress_with_newline', default=False,
index 15aa0daa9b7b69b5710096ecb099f62e6ab51f3d..6ac67cbaed2517d6c908634619c81e0a5bcf36d8 100644 (file)
@@ -1,24 +1,26 @@
 
 from .atomicparsley import AtomicParsleyPP
 from .ffmpeg import (
+    FFmpegPostProcessor,
     FFmpegAudioFixPP,
+    FFmpegEmbedSubtitlePP,
+    FFmpegExtractAudioPP,
     FFmpegMergerPP,
     FFmpegMetadataPP,
     FFmpegVideoConvertor,
-    FFmpegExtractAudioPP,
-    FFmpegEmbedSubtitlePP,
 )
 from .xattrpp import XAttrMetadataPP
 from .execafterdownload import ExecAfterDownloadPP
 
 __all__ = [
     'AtomicParsleyPP',
+    'ExecAfterDownloadPP',
     'FFmpegAudioFixPP',
+    'FFmpegEmbedSubtitlePP',
+    'FFmpegExtractAudioPP',
     'FFmpegMergerPP',
     'FFmpegMetadataPP',
+    'FFmpegPostProcessor',
     'FFmpegVideoConvertor',
-    'FFmpegExtractAudioPP',
-    'FFmpegEmbedSubtitlePP',
     'XAttrMetadataPP',
-    'ExecAfterDownloadPP',
 ]
index 6f010a9c782181fbeae23be7e5a4bd23b03a0a81..e13deab4b7b1f549a5048dc90c1fb867f69b5b2d 100644 (file)
@@ -1,4 +1,5 @@
 import os
+import re
 import subprocess
 import sys
 import time
@@ -11,6 +12,7 @@ from ..utils import (
     compat_subprocess_get_DEVNULL,
     encodeArgument,
     encodeFilename,
+    is_outdated_version,
     PostProcessingError,
     prepend_extension,
     shell_quote,
@@ -18,38 +20,70 @@ from ..utils import (
 )
 
 
+def get_version(executable):
+    """ Returns the version of the specified executable,
+    or False if the executable is not present """
+    try:
+        out, err = subprocess.Popen(
+            [executable, '-version'],
+            stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+    except OSError:
+        return False
+    firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
+    m = re.search(r'version\s+([0-9._-a-zA-Z]+)', firstline)
+    if not m:
+        return u'present'
+    else:
+        return m.group(1)
+
+
 class FFmpegPostProcessorError(PostProcessingError):
     pass
 
 
 class FFmpegPostProcessor(PostProcessor):
-    def __init__(self, downloader=None, deletetempfiles=False):
+    def __init__(self, downloader, deletetempfiles=False):
         PostProcessor.__init__(self, downloader)
-        self._exes = self.detect_executables()
+        self._versions = self.get_versions()
         self._deletetempfiles = deletetempfiles
 
+    def check_version(self):
+        if not self._executable:
+            raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
+
+        REQUIRED_VERSION = '1.0'
+        if is_outdated_version(
+                self._versions[self._executable], REQUIRED_VERSION):
+            warning = u'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
+                self._executable, self._executable, REQUIRED_VERSION)
+            self._downloader.report_warning(warning)
+
     @staticmethod
-    def detect_executables():
+    def get_versions():
         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
-        return dict((program, check_executable(program, ['-version'])) for program in programs)
+        return dict((program, get_version(program)) for program in programs)
 
-    def _get_executable(self):
+    @property
+    def _executable(self):
         if self._downloader.params.get('prefer_ffmpeg', False):
-            return self._exes['ffmpeg'] or self._exes['avconv']
+            prefs = ('ffmpeg', 'avconv')
         else:
-            return self._exes['avconv'] or self._exes['ffmpeg']
+            prefs = ('avconv', 'ffmpeg')
+        for p in prefs:
+            if self._versions[p]:
+                return p
+        return None
 
     def _uses_avconv(self):
-        return self._get_executable() == self._exes['avconv']
+        return self._executable == 'avconv'
 
     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
-        if not self._get_executable():
-            raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
+        self.check_version()
 
         files_cmd = []
         for path in input_paths:
             files_cmd.extend(['-i', encodeFilename(path, True)])
-        cmd = ([self._get_executable(), '-y'] + files_cmd
+        cmd = ([self._executable, '-y'] + files_cmd
                + [encodeArgument(o) for o in opts] +
                [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
 
@@ -182,14 +216,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
             if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
                 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
             else:
-                self._downloader.to_screen(u'[' + self._get_executable() + '] Destination: ' + new_path)
+                self._downloader.to_screen(u'[' + self._executable + '] Destination: ' + new_path)
                 self.run_ffmpeg(path, new_path, acodec, more_opts)
         except:
             etype,e,tb = sys.exc_info()
             if isinstance(e, AudioConversionError):
                 msg = u'audio conversion failed: ' + e.msg
             else:
-                msg = u'error running ' + self._get_executable()
+                msg = u'error running ' + self._executable
             raise PostProcessingError(msg)
 
         # Try to update the date time for extracted audio file.
index d7ae5a90a8480f6c825303039b357ff53e7f8d56..6c0c39ca5263c8f7a3d36323dc2f15d559806ef0 100644 (file)
@@ -203,6 +203,82 @@ def compat_ord(c):
     if type(c) is int: return c
     else: return ord(c)
 
+
+if sys.version_info >= (3, 0):
+    compat_getenv = os.getenv
+    compat_expanduser = os.path.expanduser
+else:
+    # Environment variables should be decoded with filesystem encoding.
+    # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
+
+    def compat_getenv(key, default=None):
+        env = os.getenv(key, default)
+        if env:
+            env = env.decode(get_filesystem_encoding())
+        return env
+
+    # HACK: The default implementations of os.path.expanduser from cpython do not decode
+    # environment variables with filesystem encoding. We will work around this by
+    # providing adjusted implementations.
+    # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
+    # for different platforms with correct environment variables decoding.
+
+    if os.name == 'posix':
+        def compat_expanduser(path):
+            """Expand ~ and ~user constructions.  If user or $HOME is unknown,
+            do nothing."""
+            if not path.startswith('~'):
+                return path
+            i = path.find('/', 1)
+            if i < 0:
+                i = len(path)
+            if i == 1:
+                if 'HOME' not in os.environ:
+                    import pwd
+                    userhome = pwd.getpwuid(os.getuid()).pw_dir
+                else:
+                    userhome = compat_getenv('HOME')
+            else:
+                import pwd
+                try:
+                    pwent = pwd.getpwnam(path[1:i])
+                except KeyError:
+                    return path
+                userhome = pwent.pw_dir
+            userhome = userhome.rstrip('/')
+            return (userhome + path[i:]) or '/'
+    elif os.name == 'nt' or os.name == 'ce':
+        def compat_expanduser(path):
+            """Expand ~ and ~user constructs.
+
+            If user or $HOME is unknown, do nothing."""
+            if path[:1] != '~':
+                return path
+            i, n = 1, len(path)
+            while i < n and path[i] not in '/\\':
+                i = i + 1
+
+            if 'HOME' in os.environ:
+                userhome = compat_getenv('HOME')
+            elif 'USERPROFILE' in os.environ:
+                userhome = compat_getenv('USERPROFILE')
+            elif not 'HOMEPATH' in os.environ:
+                return path
+            else:
+                try:
+                    drive = compat_getenv('HOMEDRIVE')
+                except KeyError:
+                    drive = ''
+                userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
+
+            if i != 1: #~user
+                userhome = os.path.join(os.path.dirname(userhome), path[1:i])
+
+            return userhome + path[i:]
+    else:
+        compat_expanduser = os.path.expanduser
+
+
 # This is not clearly defined otherwise
 compiled_regex_type = type(re.compile(''))
 
@@ -1207,11 +1283,14 @@ class locked_file(object):
         return self.f.read(*args)
 
 
+def get_filesystem_encoding():
+    encoding = sys.getfilesystemencoding()
+    return encoding if encoding is not None else 'utf-8'
+
+
 def shell_quote(args):
     quoted_args = []
-    encoding = sys.getfilesystemencoding()
-    if encoding is None:
-        encoding = 'utf-8'
+    encoding = get_filesystem_encoding()
     for a in args:
         if isinstance(a, bytes):
             # We may get a filename encoded with 'encodeFilename'
@@ -1261,7 +1340,7 @@ def format_bytes(bytes):
 
 
 def get_term_width():
-    columns = os.environ.get('COLUMNS', None)
+    columns = compat_getenv('COLUMNS', None)
     if columns:
         return int(columns)
 
@@ -1644,3 +1723,16 @@ def limit_length(s, length):
     if len(s) > length:
         return s[:length - len(ELLIPSES)] + ELLIPSES
     return s
+
+
+def version_tuple(v):
+    return [int(e) for e in v.split('.')]
+
+
+def is_outdated_version(version, limit, assume_new=True):
+    if not version:
+        return not assume_new
+    try:
+        return version_tuple(version) < version_tuple(limit)
+    except ValueError:
+        return not assume_new
index e7f6adef126bb6c3f053260b78f27d998df6e270..d822ae3305fef7a25a7894dc5b318b8f971ef318 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.10.18'
+__version__ = '2014.10.25'