]> git.bitcoin.ninja Git - youtube-dl/commitdiff
Merge branch 'gfycat' of https://github.com/julianrichen/youtube-dl into julianrichen...
authorSergey M․ <dstftw@gmail.com>
Fri, 17 Apr 2015 21:49:32 +0000 (03:49 +0600)
committerSergey M․ <dstftw@gmail.com>
Fri, 17 Apr 2015 21:49:32 +0000 (03:49 +0600)
23 files changed:
docs/supportedsites.md
youtube_dl/YoutubeDL.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/miomio.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/pladform.py
youtube_dl/extractor/qqmusic.py [new file with mode: 0644]
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spike.py
youtube_dl/extractor/srf.py [new file with mode: 0644]
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/udn.py
youtube_dl/extractor/vimple.py
youtube_dl/postprocessor/atomicparsley.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

index c85a3991800e5c86d19bee8ed8fb8d1a9c6d96b9..80e86c1b652448a7c92c8907fe62cef8a86b3dcd 100644 (file)
@@ -98,6 +98,7 @@
  - **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
  - **Cracked**
  - **Criterion**
+ - **CrooksAndLiars**
  - **Crunchyroll**
  - **crunchyroll:playlist**
  - **CSpan**: C-SPAN
  - **prosiebensat1**: ProSiebenSat.1 Digital
  - **Puls4**
  - **Pyvideo**
+ - **QQMusic**
+ - **QQMusicAlbum**
+ - **QQMusicSinger**
  - **QuickVid**
  - **R7**
  - **radio.de**
  - **Sport5**
  - **SportBox**
  - **SportDeutschland**
+ - **Srf**
  - **SRMediathek**: Saarländischer Rundfunk
  - **SSA**
  - **stanfordoc**: Stanford Open ClassRoom
index a68b24ab485d6ef8e064ee7533a4a15379eb87cf..6ac85f4e7ecd505499d91b9f51759281287c3a1c 100755 (executable)
@@ -1486,16 +1486,9 @@ class YoutubeDL(object):
             pps_chain.extend(ie_info['__postprocessors'])
         pps_chain.extend(self._pps)
         for pp in pps_chain:
-            keep_video = None
             old_filename = info['filepath']
             try:
-                keep_video_wish, info = pp.run(info)
-                if keep_video_wish is not None:
-                    if keep_video_wish:
-                        keep_video = keep_video_wish
-                    elif keep_video is None:
-                        # No clear decision yet, let IE decide
-                        keep_video = keep_video_wish
+                keep_video, info = pp.run(info)
             except PostProcessingError as e:
                 self.report_error(e.msg)
             if keep_video is False and not self.params.get('keepvideo', False):
index d32f1cbd2892901f84e859dc16fd5ba25cbc27f1..9e9e20589f08ad5fd73f13e8c89fa0670eba282a 100644 (file)
@@ -397,6 +397,11 @@ from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .puls4 import Puls4IE
 from .pyvideo import PyvideoIE
+from .qqmusic import (
+    QQMusicIE,
+    QQMusicSingerIE,
+    QQMusicAlbumIE,
+)
 from .quickvid import QuickVidIE
 from .r7 import R7IE
 from .radiode import RadioDeIE
@@ -481,6 +486,7 @@ from .spike import SpikeIE
 from .sport5 import Sport5IE
 from .sportbox import SportBoxIE
 from .sportdeutschland import SportDeutschlandIE
+from .srf import SrfIE
 from .srmediathek import SRMediathekIE
 from .ssa import SSAIE
 from .stanfordoc import StanfordOpenClassroomIE
index 0733bece7c45880ab5c20b916d5bd8c9700da548..4f60d53660fa7777b9e1b6152967ce2e7e567ec9 100644 (file)
@@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor):
         object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
         object_str = fix_xml_ampersands(object_str)
 
-        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+        try:
+            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+        except xml.etree.ElementTree.ParseError:
+            return
 
         fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
         if fv_el is not None:
@@ -183,9 +186,9 @@ class BrightcoveIE(InfoExtractor):
             (?:
                 [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
                 [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
-            ).+?</object>''',
+            ).+?>\s*</object>''',
             webpage)
-        return [cls._build_brighcove_url(m) for m in matches]
+        return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
index e5edcc84b69ef7bdffdbb7ed158c901c560a7575..91ebb0ce57136dc0076927acdca4e250774746e1 100644 (file)
@@ -201,7 +201,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
 
         uri = mMovieParams[0][1]
         # Correct cc.com in uri
-        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
+        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
 
         index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
         idoc = self._download_xml(
index 8ed97f8dddfc5d9b51dab2630008c36e054a50bf..7757bf9502169b79e498a7f26ba0b227b44efe91 100644 (file)
@@ -23,6 +23,7 @@ from ..compat import (
 )
 from ..utils import (
     age_restricted,
+    bug_reports_message,
     clean_html,
     compiled_regex_type,
     ExtractorError,
@@ -324,7 +325,7 @@ class InfoExtractor(object):
                 self._downloader.report_warning(errmsg)
                 return False
 
-    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
         """ Returns a tuple (page content as string, URL handle) """
         # Strip hashes from the URL (#1038)
         if isinstance(url_or_request, (compat_str, str)):
@@ -334,14 +335,11 @@ class InfoExtractor(object):
         if urlh is False:
             assert not fatal
             return False
-        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
         return (content, urlh)
 
-    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
-        content_type = urlh.headers.get('Content-Type', '')
-        webpage_bytes = urlh.read()
-        if prefix is not None:
-            webpage_bytes = prefix + webpage_bytes
+    @staticmethod
+    def _guess_encoding_from_content(content_type, webpage_bytes):
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
         if m:
             encoding = m.group(1)
@@ -354,6 +352,16 @@ class InfoExtractor(object):
                 encoding = 'utf-16'
             else:
                 encoding = 'utf-8'
+
+        return encoding
+
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+        content_type = urlh.headers.get('Content-Type', '')
+        webpage_bytes = urlh.read()
+        if prefix is not None:
+            webpage_bytes = prefix + webpage_bytes
+        if not encoding:
+            encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
         if self._downloader.params.get('dump_intermediate_pages', False):
             try:
                 url = url_or_request.get_full_url()
@@ -410,13 +418,13 @@ class InfoExtractor(object):
 
         return content
 
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
+    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
         """ Returns the data of the page as a string """
         success = False
         try_count = 0
         while success is False:
             try:
-                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
                 success = True
             except compat_http_client.IncompleteRead as e:
                 try_count += 1
@@ -431,10 +439,10 @@ class InfoExtractor(object):
 
     def _download_xml(self, url_or_request, video_id,
                       note='Downloading XML', errnote='Unable to download XML',
-                      transform_source=None, fatal=True):
+                      transform_source=None, fatal=True, encoding=None):
         """Return the xml as an xml.etree.ElementTree.Element"""
         xml_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal)
+            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
         if xml_string is False:
             return xml_string
         if transform_source:
@@ -445,9 +453,10 @@ class InfoExtractor(object):
                        note='Downloading JSON metadata',
                        errnote='Unable to download JSON metadata',
                        transform_source=None,
-                       fatal=True):
+                       fatal=True, encoding=None):
         json_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal)
+            url_or_request, video_id, note, errnote, fatal=fatal,
+            encoding=encoding)
         if (not fatal) and json_string is False:
             return None
         return self._parse_json(
@@ -548,8 +557,7 @@ class InfoExtractor(object):
         elif fatal:
             raise RegexNotFoundError('Unable to extract %s' % _name)
         else:
-            self._downloader.report_warning('unable to extract %s; '
-                                            'please report this issue on http://yt-dl.org/bug' % _name)
+            self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
             return None
 
     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
index 7ad555e9f58bf9434c804dda38cbf8e7b53c9f93..e645d1bb32d26f1e93ba57c93bd4b211d362d2fa 100644 (file)
@@ -615,13 +615,24 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': '100183293',
                 'ext': 'mp4',
-                'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+                'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 694,
                 'age_limit': 0,
             },
         },
+        # Playwire embed
+        {
+            'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
+            'info_dict': {
+                'id': '3519514',
+                'ext': 'mp4',
+                'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
+                'thumbnail': 're:^https?://.*\.png$',
+                'duration': 45.115,
+            },
+        },
         # 5min embed
         {
             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
@@ -681,13 +692,41 @@ class GenericIE(InfoExtractor):
         # UDN embed
         {
             'url': 'http://www.udn.com/news/story/7314/822787',
-            'md5': 'de06b4c90b042c128395a88f0384817e',
+            'md5': 'fd2060e988c326991037b9aff9df21a6',
             'info_dict': {
-                'id': '300040',
+                'id': '300346',
                 'ext': 'mp4',
-                'title': '生物老師男變女 全校挺"做自己"',
+                'title': '中一中男師變性 全校師生力挺',
                 'thumbnail': 're:^https?://.*\.jpg$',
             }
+        },
+        # Ooyala embed
+        {
+            'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
+            'info_dict': {
+                'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
+                'ext': 'mp4',
+                'description': 'VIDEO: Index/Match versus VLOOKUP.',
+                'title': 'This is what separates the Excel masters from the wannabes',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            }
+        },
+        # Contains a SMIL manifest
+        {
+            'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
+            'info_dict': {
+                'id': 'file',
+                'ext': 'flv',
+                'title': '+ Football: Lottery Champions League Europe',
+                'uploader': 'www.telewebion.com',
+            },
+            'params': {
+                # rtmpe downloads
+                'skip_download': True,
+            }
         }
     ]
 
@@ -1092,7 +1131,8 @@ class GenericIE(InfoExtractor):
         # Look for Ooyala videos
         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
-                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
+                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
+                re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
         if mobj is not None:
             return OoyalaIE._build_url_result(mobj.group('ec'))
 
@@ -1295,6 +1335,12 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'Pladform')
 
+        # Look for Playwire embeds
+        mobj = re.search(
+            r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
         # Look for 5min embeds
         mobj = re.search(
             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
@@ -1408,13 +1454,22 @@ class GenericIE(InfoExtractor):
             # here's a fun little line of code for you:
             video_id = os.path.splitext(video_id)[0]
 
-            entries.append({
-                'id': video_id,
-                'url': video_url,
-                'uploader': video_uploader,
-                'title': video_title,
-                'age_limit': age_limit,
-            })
+            if determine_ext(video_url) == 'smil':
+                entries.append({
+                    'id': video_id,
+                    'formats': self._extract_smil_formats(video_url, video_id),
+                    'uploader': video_uploader,
+                    'title': video_title,
+                    'age_limit': age_limit,
+                })
+            else:
+                entries.append({
+                    'id': video_id,
+                    'url': video_url,
+                    'uploader': video_uploader,
+                    'title': video_title,
+                    'age_limit': age_limit,
+                })
 
         if len(entries) == 1:
             return entries[0]
index cc3f2719484d7011e5f88a882af90d95dafe905e..d41195a9647a7cdd329009c5ce448388f0ec8f20 100644 (file)
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
     xpath_text,
     int_or_none,
+    ExtractorError,
 )
 
 
@@ -14,13 +15,14 @@ class MioMioIE(InfoExtractor):
     IE_NAME = 'miomio.tv'
     _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
     _TESTS = [{
-        'url': 'http://www.miomio.tv/watch/cc179734/',
-        'md5': '48de02137d0739c15b440a224ad364b9',
+        # "type=video" in flashvars
+        'url': 'http://www.miomio.tv/watch/cc88912/',
+        'md5': '317a5f7f6b544ce8419b784ca8edae65',
         'info_dict': {
-            'id': '179734',
+            'id': '88912',
             'ext': 'flv',
-            'title': 'æ\89\8bç»\98å\8a¨æ¼«é¬¼æ³£ä½\86ä¸\81å\85¨ç¨\8bç\94»æ³\95',
-            'duration': 354,
+            'title': 'ã\80\90SKYã\80\91å­\97å¹\95 é\93 æ­¦æ\98­å\92\8cVSå¹³æ\88\90 å\81\87é\9d¢éª\91士大æ\88\98FEATæ\88\98é\98\9f é­\94æ\98\9få­\97å¹\95ç»\84 å­\97å¹\95',
+            'duration': 5923,
         },
     }, {
         'url': 'http://www.miomio.tv/watch/cc184024/',
@@ -42,7 +44,7 @@ class MioMioIE(InfoExtractor):
             r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
 
         xml_config = self._search_regex(
-            r'flashvars="type=sina&amp;(.+?)&amp;',
+            r'flashvars="type=(?:sina|video)&amp;(.+?)&amp;',
             webpage, 'xml config')
 
         # skipping the following page causes lags and eventually connection drop-outs
@@ -59,6 +61,9 @@ class MioMioIE(InfoExtractor):
             'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
         }
 
+        if not int_or_none(xpath_text(vid_config, 'timelength')):
+            raise ExtractorError('Unable to load videos!', expected=True)
+
         entries = []
         for f in vid_config.findall('./durl'):
             segment_url = xpath_text(f, 'url', 'video url')
index 84f29155841007f3088a86470040407073726067..425a4ccf16fff96b1bface874748b93762d2194b 100644 (file)
@@ -1,7 +1,6 @@
 from __future__ import unicode_literals
 
 import re
-import itertools
 
 from .common import InfoExtractor
 from ..compat import (
@@ -46,20 +45,16 @@ class MixcloudIE(InfoExtractor):
         },
     }]
 
-    def _get_url(self, track_id, template_url, server_number):
-        boundaries = (1, 30)
-        for nr in server_numbers(server_number, boundaries):
-            url = template_url % nr
-            try:
-                # We only want to know if the request succeed
-                # don't download the whole file
-                self._request_webpage(
-                    HEADRequest(url), track_id,
-                    'Checking URL %d/%d ...' % (nr, boundaries[-1]))
-                return url
-            except ExtractorError:
-                pass
-        return None
+    def _check_url(self, url, track_id, ext):
+        try:
+            # We only want to know if the request succeed
+            # don't download the whole file
+            self._request_webpage(
+                HEADRequest(url), track_id,
+                'Trying %s URL' % ext)
+            return True
+        except ExtractorError:
+            return False
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -72,15 +67,10 @@ class MixcloudIE(InfoExtractor):
         preview_url = self._search_regex(
             r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
         song_url = preview_url.replace('/previews/', '/c/originals/')
-        server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number'))
-        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
-        final_song_url = self._get_url(track_id, template_url, server_number)
-        if final_song_url is None:
-            self.to_screen('Trying with m4a extension')
-            template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
-            final_song_url = self._get_url(track_id, template_url, server_number)
-        if final_song_url is None:
-            raise ExtractorError('Unable to extract track url')
+        if not self._check_url(song_url, track_id, 'mp3'):
+            song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
+            if not self._check_url(song_url, track_id, 'm4a'):
+                raise ExtractorError('Unable to extract track url')
 
         PREFIX = (
             r'm-play-on-spacebar[^>]+'
@@ -107,7 +97,7 @@ class MixcloudIE(InfoExtractor):
         return {
             'id': track_id,
             'title': title,
-            'url': final_song_url,
+            'url': song_url,
             'description': description,
             'thumbnail': thumbnail,
             'uploader': uploader,
@@ -115,35 +105,3 @@ class MixcloudIE(InfoExtractor):
             'view_count': view_count,
             'like_count': like_count,
         }
-
-
-def server_numbers(first, boundaries):
-    """ Server numbers to try in descending order of probable availability.
-    Starting from first (i.e. the number of the server hosting the preview file)
-    and going further and further up to the higher boundary and down to the
-    lower one in an alternating fashion. Namely:
-
-        server_numbers(2, (1, 5))
-
-        # Where the preview server is 2, min number is 1 and max is 5.
-        # Yields: 2, 3, 1, 4, 5
-
-    Why not random numbers or increasing sequences? Since from what I've seen,
-    full length files seem to be hosted on servers whose number is closer to
-    that of the preview; to be confirmed.
-    """
-    zip_longest = getattr(itertools, 'zip_longest', None)
-    if zip_longest is None:
-        # python 2.x
-        zip_longest = itertools.izip_longest
-
-    if len(boundaries) != 2:
-        raise ValueError("boundaries should be a two-element tuple")
-    min, max = boundaries
-    highs = range(first + 1, max + 1)
-    lows = range(first - 1, min - 1, -1)
-    rest = filter(
-        None, itertools.chain.from_iterable(zip_longest(highs, lows)))
-    yield first
-    for n in rest:
-        yield n
index c11de1cb61b28d03ab2430ff1db3a82d317dc718..4430b3416afc8af1fab70e47fb597710d6c5a75b 100644 (file)
@@ -118,6 +118,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
         mediagen_doc = self._download_xml(mediagen_url, video_id,
                                           'Downloading video urls')
 
+        item = mediagen_doc.find('./video/item')
+        if item is not None and item.get('type') == 'text':
+            message = '%s returned error: ' % self.IE_NAME
+            if item.get('code') is not None:
+                message += '%s - ' % item.get('code')
+            message += item.text
+            raise ExtractorError(message, expected=True)
+
         description_node = itemdoc.find('description')
         if description_node is not None:
             description = description_node.text.strip()
index abde34b94659574041469e8f2f4a8b0e4e903bb1..551c8c9f0fef4566afd5691628b2c216c157fd0c 100644 (file)
@@ -30,7 +30,7 @@ class PladformIE(InfoExtractor):
         'info_dict': {
             'id': '100183293',
             'ext': 'mp4',
-            'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+            'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
             'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 694,
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py
new file mode 100644 (file)
index 0000000..174c8e0
--- /dev/null
@@ -0,0 +1,170 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import time
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    strip_jsonp,
+    unescapeHTML,
+)
+from ..compat import compat_urllib_request
+
+
+class QQMusicIE(InfoExtractor):
+    _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
+    _TESTS = [{
+        'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
+        'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
+        'info_dict': {
+            'id': '004295Et37taLD',
+            'ext': 'm4a',
+            'title': '可惜没如果',
+            'upload_date': '20141227',
+            'creator': '林俊杰',
+            'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
+        }
+    }]
+
+    # Reference: m_r_GetRUin() in top_player.js
+    # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
+    @staticmethod
+    def m_r_get_ruin():
+        curMs = int(time.time() * 1000) % 1000
+        return int(round(random.random() * 2147483647) * curMs % 1E10)
+
+    def _real_extract(self, url):
+        mid = self._match_id(url)
+
+        detail_info_page = self._download_webpage(
+            'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
+            mid, note='Download song detail info',
+            errnote='Unable to get song detail info', encoding='gbk')
+
+        song_name = self._html_search_regex(
+            r"songname:\s*'([^']+)'", detail_info_page, 'song name')
+
+        publish_time = self._html_search_regex(
+            r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
+            'publish time', default=None)
+        if publish_time:
+            publish_time = publish_time.replace('-', '')
+
+        singer = self._html_search_regex(
+            r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
+
+        lrc_content = self._html_search_regex(
+            r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
+            detail_info_page, 'LRC lyrics', default=None)
+
+        guid = self.m_r_get_ruin()
+
+        vkey = self._download_json(
+            'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
+            mid, note='Retrieve vkey', errnote='Unable to get vkey',
+            transform_source=strip_jsonp)['key']
+        song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
+
+        return {
+            'id': mid,
+            'url': song_url,
+            'title': song_name,
+            'upload_date': publish_time,
+            'creator': singer,
+            'description': lrc_content,
+        }
+
+
+class QQPlaylistBaseIE(InfoExtractor):
+    @staticmethod
+    def qq_static_url(category, mid):
+        return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
+
+    @classmethod
+    def get_entries_from_page(cls, page):
+        entries = []
+
+        for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
+            song_mid = unescapeHTML(item).split('|')[-5]
+            entries.append(cls.url_result(
+                'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
+                song_mid))
+
+        return entries
+
+
+class QQMusicSingerIE(QQPlaylistBaseIE):
+    _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
+    _TEST = {
+        'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
+        'info_dict': {
+            'id': '001BLpXF2DyJe2',
+            'title': '林俊杰',
+            'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
+        },
+        'playlist_count': 12,
+    }
+
+    def _real_extract(self, url):
+        mid = self._match_id(url)
+
+        singer_page = self._download_webpage(
+            self.qq_static_url('singer', mid), mid, 'Download singer page')
+
+        entries = self.get_entries_from_page(singer_page)
+
+        singer_name = self._html_search_regex(
+            r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
+            default=None)
+
+        singer_id = self._html_search_regex(
+            r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
+            default=None)
+
+        singer_desc = None
+
+        if singer_id:
+            req = compat_urllib_request.Request(
+                'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
+            req.add_header(
+                'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
+            singer_desc_page = self._download_xml(
+                req, mid, 'Donwload singer description XML')
+
+            singer_desc = singer_desc_page.find('./data/info/desc').text
+
+        return self.playlist_result(entries, mid, singer_name, singer_desc)
+
+
+class QQMusicAlbumIE(QQPlaylistBaseIE):
+    _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
+
+    _TEST = {
+        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
+        'info_dict': {
+            'id': '000gXCTb2AhRR1',
+            'title': '我们都是这样长大的',
+            'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
+        },
+        'playlist_count': 4,
+    }
+
+    def _real_extract(self, url):
+        mid = self._match_id(url)
+
+        album_page = self._download_webpage(
+            self.qq_static_url('album', mid), mid, 'Download album page')
+
+        entries = self.get_entries_from_page(album_page)
+
+        album_name = self._html_search_regex(
+            r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
+            default=None)
+
+        album_detail = self._html_search_regex(
+            r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
+            album_page, 'album details', default=None)
+
+        return self.playlist_result(entries, mid, album_name, album_detail)
index 316b2c90f110770299084889552b8137e072a617..183ff50f4fc2b15a2ca99704e9277b8c68abd576 100644 (file)
@@ -221,7 +221,12 @@ class SoundcloudIE(InfoExtractor):
                 info_json_url += "&secret_token=" + token
         elif mobj.group('player'):
             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-            return self.url_result(query['url'][0])
+            real_url = query['url'][0]
+            # If the token is in the query of the original url we have to
+            # manually add it
+            if 'secret_token' in query:
+                real_url += '?secret_token=' + query['secret_token'][0]
+            return self.url_result(real_url)
         else:
             # extract uploader (which is in the url)
             uploader = mobj.group('uploader')
@@ -274,9 +279,8 @@ class SoundcloudSetIE(SoundcloudIE):
         info = self._download_json(resolv_url, full_title)
 
         if 'errors' in info:
-            for err in info['errors']:
-                self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
-            return
+            msgs = (compat_str(err['error_message']) for err in info['errors'])
+            raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
 
         return {
             '_type': 'playlist',
index e529bb55ccccb1beefdf12d2df1ea689dd0d6f2e..182f286dfefc4023483c422fbf6c6a73203b86ff 100644 (file)
@@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor
 
 class SpikeIE(MTVServicesInfoExtractor):
     _VALID_URL = r'''(?x)https?://
-        (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
+        (?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+|
          m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
         '''
     _TEST = {
diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py
new file mode 100644 (file)
index 0000000..77eec0b
--- /dev/null
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    parse_iso8601,
+    xpath_text,
+)
+
+
+class SrfIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})'
+    _TESTS = [{
+        'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+        'md5': '4cd93523723beff51bb4bee974ee238d',
+        'info_dict': {
+            'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+            'display_id': 'snowden-beantragt-asyl-in-russland',
+            'ext': 'm4v',
+            'upload_date': '20130701',
+            'title': 'Snowden beantragt Asyl in Russland',
+            'timestamp': 1372713995,
+        }
+    }, {
+        # No Speichern (Save) button
+        'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa',
+        'md5': 'd97e236e80d1d24729e5d0953d276a4f',
+        'info_dict': {
+            'id': '677f5829-e473-4823-ac83-a1087fe97faa',
+            'display_id': 'jaguar-xk120-shadow-und-tornado-dampflokomotive',
+            'ext': 'flv',
+            'upload_date': '20130710',
+            'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
+            'timestamp': 1373493600,
+        },
+    }, {
+        'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+        'only_matching': True,
+    }, {
+        'url': 'https://tp.srgssr.ch/p/flash?urn=urn:srf:ais:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        display_id = re.match(self._VALID_URL, url).group('display_id') or video_id
+
+        video_data = self._download_xml(
+            'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id,
+            display_id)
+
+        title = xpath_text(
+            video_data, './AssetMetadatas/AssetMetadata/title', fatal=True)
+        thumbnails = [{
+            'url': s.text
+        } for s in video_data.findall('.//ImageRepresentation/url')]
+        timestamp = parse_iso8601(xpath_text(video_data, './createdDate'))
+        # The <duration> field in XML is different from the exact duration, skipping
+
+        formats = []
+        for item in video_data.findall('./Playlists/Playlist') + video_data.findall('./Downloads/Download'):
+            for url_node in item.findall('url'):
+                quality = url_node.attrib['quality']
+                full_url = url_node.text
+                original_ext = determine_ext(full_url)
+                format_id = '%s-%s' % (quality, item.attrib['protocol'])
+                if original_ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        full_url + '?hdcore=3.4.0', display_id, f4m_id=format_id))
+                elif original_ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        full_url, display_id, 'mp4', m3u8_id=format_id))
+                else:
+                    formats.append({
+                        'url': full_url,
+                        'ext': original_ext,
+                        'format_id': format_id,
+                        'quality': 0 if 'HD' in quality else -1,
+                        'preference': 1,
+                    })
+
+        self._sort_formats(formats)
+
+        subtitles = {}
+        subtitles_data = video_data.find('Subtitles')
+        if subtitles_data is not None:
+            subtitles_list = [{
+                'url': sub.text,
+                'ext': determine_ext(sub.text),
+            } for sub in subtitles_data]
+            if subtitles_list:
+                subtitles['de'] = subtitles_list
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'formats': formats,
+            'title': title,
+            'thumbnails': thumbnails,
+            'timestamp': timestamp,
+            'subtitles': subtitles,
+        }
index 1caf08cb752d201066f0a1d4679efffc26598d1e..2381676b4e8ef9680bf7c54faafef278c3db8142 100644 (file)
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
 import base64
@@ -35,6 +36,17 @@ class TeamcocoIE(InfoExtractor):
                 'duration': 288,
                 'age_limit': 0,
             }
+        }, {
+            'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
+            'info_dict': {
+                'id': '88748',
+                'ext': 'mp4',
+                'title': 'Timothy Olyphant Raises A Toast To “Justified”',
+                'description': 'md5:15501f23f020e793aeca761205e42c24',
+            },
+            'params': {
+                'skip_download': True,  # m3u8 downloads
+            }
         }
     ]
     _VIDEO_ID_REGEXES = (
@@ -54,10 +66,23 @@ class TeamcocoIE(InfoExtractor):
             video_id = self._html_search_regex(
                 self._VIDEO_ID_REGEXES, webpage, 'video id')
 
+        preload = None
         preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage)
-        if not preloads:
-            raise ExtractorError('Preload information could not be extracted')
-        preload = max([(len(p), p) for p in preloads])[1]
+        if preloads:
+            preload = max([(len(p), p) for p in preloads])[1]
+
+        if not preload:
+            preload = ''.join(re.findall(r'this\.push\("([^"]+)"\);', webpage))
+
+        if not preload:
+            preload = self._html_search_regex([
+                r'player,\[?"([^"]+)"\]?', r'player.init\(\[?"([^"]+)"\]?\)'
+            ], webpage.replace('","', ''), 'preload data', default=None)
+
+        if not preload:
+            raise ExtractorError(
+                'Preload information could not be extracted', expected=True)
+
         data = self._parse_json(
             base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id)
 
index 2a1ae5a717cf7b2af16bf5a1ce3ef7494e28a7a6..828c808a6456b6b99b134cb7ae9d9017de9ad3aa 100644 (file)
@@ -56,6 +56,6 @@ class TumblrIE(InfoExtractor):
             'url': video_url,
             'ext': 'mp4',
             'title': video_title,
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': self._og_search_description(webpage, default=None),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
         }
index bba25bb58041ddca902749d32c72ca3ad3d619a1..c08428acfab446dff6157035ef032ae326199ebf 100644 (file)
@@ -3,12 +3,15 @@ from __future__ import unicode_literals
 
 import json
 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+    js_to_json,
+    ExtractorError,
+)
 from ..compat import compat_urlparse
 
 
 class UDNEmbedIE(InfoExtractor):
-    _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
+    _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://video.udn.com/embed/news/300040',
         'md5': 'de06b4c90b042c128395a88f0384817e',
@@ -19,7 +22,11 @@ class UDNEmbedIE(InfoExtractor):
             'thumbnail': 're:^https?://.*\.jpg$',
         }
     }, {
-        'url': '//video.udn.com/embed/news/300040',
+        'url': 'https://video.udn.com/embed/news/300040',
+        'only_matching': True,
+    }, {
+        # From https://video.udn.com/news/303776
+        'url': 'https://video.udn.com/play/news/303776',
         'only_matching': True,
     }]
 
@@ -47,7 +54,10 @@ class UDNEmbedIE(InfoExtractor):
                 'retrieve url for %s video' % video_type),
             'format_id': video_type,
             'preference': 0 if video_type == 'mp4' else -1,
-        } for video_type, api_url in video_urls.items()]
+        } for video_type, api_url in video_urls.items() if api_url]
+
+        if not formats:
+            raise ExtractorError('No videos found', expected=True)
 
         self._sort_formats(formats)
 
index ee3d86117e625cca66303aeeee229f1a091b4602..aa3d6ddfd2420524fd87f85819d2611225224e79 100644 (file)
@@ -1,75 +1,54 @@
-# coding: utf-8
 from __future__ import unicode_literals
 
-import base64
-import re
-import xml.etree.ElementTree
-import zlib
-
 from .common import InfoExtractor
 from ..utils import int_or_none
 
 
 class VimpleIE(InfoExtractor):
-    IE_DESC = 'Vimple.ru'
-    _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
+    IE_DESC = 'Vimple - one-click video hosting'
+    _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
     _TESTS = [
         {
             'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
             'md5': '2e750a330ed211d3fd41821c6ad9a279',
             'info_dict': {
-                'id': 'c0f6b1687dcd4000a97ebe70068039cf',
+                'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
                 'ext': 'mp4',
                 'title': 'Sunset',
                 'duration': 20,
                 'thumbnail': 're:https?://.*?\.jpg',
             },
-        },
+        }, {
+            'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id
-
-        iframe = self._download_webpage(
-            iframe_url, video_id,
-            note='Downloading iframe', errnote='unable to fetch iframe')
-        player_url = self._html_search_regex(
-            r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url')
+        video_id = self._match_id(url)
 
-        player = self._request_webpage(
-            player_url, video_id, note='Downloading swf player').read()
+        webpage = self._download_webpage(
+            'http://player.vimple.ru/iframe/%s' % video_id, video_id)
 
-        player = zlib.decompress(player[8:])
+        playlist = self._parse_json(
+            self._search_regex(
+                r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
+            video_id)['playlist'][0]
 
-        xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player)
-        xml_pieces = [piece[1:-1] for piece in xml_pieces]
+        title = playlist['title']
+        video_id = playlist.get('videoId') or video_id
+        thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
+        duration = int_or_none(playlist.get('duration'))
 
-        xml_data = b''.join(xml_pieces)
-        xml_data = base64.b64decode(xml_data)
-
-        xml_data = xml.etree.ElementTree.fromstring(xml_data)
-
-        video = xml_data.find('Video')
-        quality = video.get('quality')
-        q_tag = video.find(quality.capitalize())
-
-        formats = [
-            {
-                'url': q_tag.get('url'),
-                'tbr': int(q_tag.get('bitrate')),
-                'filesize': int(q_tag.get('filesize')),
-                'format_id': quality,
-            },
-        ]
+        formats = [{
+            'url': f['url'],
+        } for f in playlist['video']]
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': video.find('Title').text,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
             'formats': formats,
-            'thumbnail': video.find('Poster').get('url'),
-            'duration': int_or_none(video.get('duration')),
-            'webpage_url': video.find('Share').get('videoPageUrl'),
         }
index 448ccc5f342e42959aae0619854fa80d1e1cd978..a5dfc136afddb983d23cbf6c3f87fedaacbb1999 100644 (file)
@@ -50,8 +50,13 @@ class AtomicParsleyPP(PostProcessor):
             msg = stderr.decode('utf-8', 'replace').strip()
             raise AtomicParsleyPPError(msg)
 
-        os.remove(encodeFilename(filename))
         os.remove(encodeFilename(temp_thumbnail))
-        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+        # for formats that don't support thumbnails (like 3gp) AtomicParsley
+        # won't create to the temporary file
+        if b'No changes' in stdout:
+            self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
+        else:
+            os.remove(encodeFilename(filename))
+            os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 
         return True, info
index 8e99a3c2c461d300dbf077236907e0f80bd16e9b..4c4a038f9ca181d7d865e9e751345a0b673a1a08 100644 (file)
@@ -264,15 +264,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         new_path = prefix + sep + extension
 
         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
-        if new_path == path:
-            self._nopostoverwrites = True
+        if (new_path == path or
+                (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
+            self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
+            return True, information
 
         try:
-            if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
-                self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
-            else:
-                self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
-                self.run_ffmpeg(path, new_path, acodec, more_opts)
+            self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
+            self.run_ffmpeg(path, new_path, acodec, more_opts)
         except AudioConversionError as e:
             raise PostProcessingError(
                 'audio conversion failed: ' + e.msg)
@@ -286,7 +285,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                 errnote='Cannot update utime of audio file')
 
         information['filepath'] = new_path
-        return self._nopostoverwrites, information
+        return False, information
 
 
 class FFmpegVideoConvertorPP(FFmpegPostProcessor):
index 52f0dd09aac2ef0103212086a280fc317b36b82d..edeee1853e30c2b409fe53809ae7912e7966446c 100644 (file)
@@ -312,17 +312,17 @@ def sanitize_path(s):
     """Sanitizes and normalizes path on Windows"""
     if sys.platform != 'win32':
         return s
-    drive, _ = os.path.splitdrive(s)
-    unc, _ = os.path.splitunc(s)
-    unc_or_drive = unc or drive
-    norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
-    if unc_or_drive:
+    drive_or_unc, _ = os.path.splitdrive(s)
+    if sys.version_info < (2, 7) and not drive_or_unc:
+        drive_or_unc, _ = os.path.splitunc(s)
+    norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
+    if drive_or_unc:
         norm_path.pop(0)
     sanitized_path = [
         path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
         for path_part in norm_path]
-    if unc_or_drive:
-        sanitized_path.insert(0, unc_or_drive + os.path.sep)
+    if drive_or_unc:
+        sanitized_path.insert(0, drive_or_unc + os.path.sep)
     return os.path.join(*sanitized_path)
 
 
@@ -452,6 +452,17 @@ def make_HTTPS_handler(params, **kwargs):
         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 
 
+def bug_reports_message():
+    if ytdl_is_updateable():
+        update_cmd = 'type  youtube-dl -U  to update'
+    else:
+        update_cmd = 'see  https://yt-dl.org/update  on how to update'
+    msg = '; please report this issue on https://yt-dl.org/bug .'
+    msg += ' Make sure you are using the latest version; %s.' % update_cmd
+    msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+    return msg
+
+
 class ExtractorError(Exception):
     """Error during info extraction."""
 
@@ -467,13 +478,7 @@ class ExtractorError(Exception):
         if cause:
             msg += ' (caused by %r)' % cause
         if not expected:
-            if ytdl_is_updateable():
-                update_cmd = 'type  youtube-dl -U  to update'
-            else:
-                update_cmd = 'see  https://yt-dl.org/update  on how to update'
-            msg += '; please report this issue on https://yt-dl.org/bug .'
-            msg += ' Make sure you are using the latest version; %s.' % update_cmd
-            msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+            msg += bug_reports_message()
         super(ExtractorError, self).__init__(msg)
 
         self.traceback = tb
index 1095fea2fe908fe59d28b3489b7916d05b8e6354..3fd0e7e566751ed1b6d39902d9165dfa3c8aef6e 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.04.09'
+__version__ = '2015.04.17'