Merge branch 'gfycat' of https://github.com/julianrichen/youtube-dl into julianrichen...

author Sergey M․ <dstftw@gmail.com>

Fri, 17 Apr 2015 21:49:32 +0000 (03:49 +0600)

committer Sergey M․ <dstftw@gmail.com>

Fri, 17 Apr 2015 21:49:32 +0000 (03:49 +0600)
author Sergey M․ <dstftw@gmail.com>
Fri, 17 Apr 2015 21:49:32 +0000 (03:49 +0600)
committer Sergey M․ <dstftw@gmail.com>
Fri, 17 Apr 2015 21:49:32 +0000 (03:49 +0600)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index c85a3991800e5c86d19bee8ed8fb8d1a9c6d96b9..80e86c1b652448a7c92c8907fe62cef8a86b3dcd 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -98,6 +98,7 @@
   - **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
   - **Cracked**
   - **Criterion**
+ - **CrooksAndLiars**
   - **Crunchyroll**
   - **crunchyroll:playlist**
   - **CSpan**: C-SPAN
@@ -359,6 +360,9 @@
   - **prosiebensat1**: ProSiebenSat.1 Digital
   - **Puls4**
   - **Pyvideo**
+ - **QQMusic**
+ - **QQMusicAlbum**
+ - **QQMusicSinger**
   - **QuickVid**
   - **R7**
   - **radio.de**
@@ -434,6 +438,7 @@
   - **Sport5**
   - **SportBox**
   - **SportDeutschland**
+ - **Srf**
   - **SRMediathek**: Saarländischer Rundfunk
   - **SSA**
   - **stanfordoc**: Stanford Open ClassRoom
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index a68b24ab485d6ef8e064ee7533a4a15379eb87cf..6ac85f4e7ecd505499d91b9f51759281287c3a1c 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1486,16 +1486,9 @@ class YoutubeDL(object):
              pps_chain.extend(ie_info['__postprocessors'])
          pps_chain.extend(self._pps)
          for pp in pps_chain:
-            keep_video = None
              old_filename = info['filepath']
              try:
-                keep_video_wish, info = pp.run(info)
-                if keep_video_wish is not None:
-                    if keep_video_wish:
-                        keep_video = keep_video_wish
-                    elif keep_video is None:
-                        # No clear decision yet, let IE decide
-                        keep_video = keep_video_wish
+                keep_video, info = pp.run(info)
              except PostProcessingError as e:
                  self.report_error(e.msg)
              if keep_video is False and not self.params.get('keepvideo', False):
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index d32f1cbd2892901f84e859dc16fd5ba25cbc27f1..9e9e20589f08ad5fd73f13e8c89fa0670eba282a 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -397,6 +397,11 @@ from .promptfile import PromptFileIE
  from .prosiebensat1 import ProSiebenSat1IE
  from .puls4 import Puls4IE
  from .pyvideo import PyvideoIE
+from .qqmusic import (
+    QQMusicIE,
+    QQMusicSingerIE,
+    QQMusicAlbumIE,
+)
  from .quickvid import QuickVidIE
  from .r7 import R7IE
  from .radiode import RadioDeIE
@@ -481,6 +486,7 @@ from .spike import SpikeIE
  from .sport5 import Sport5IE
  from .sportbox import SportBoxIE
  from .sportdeutschland import SportDeutschlandIE
+from .srf import SrfIE
  from .srmediathek import SRMediathekIE
  from .ssa import SSAIE
  from .stanfordoc import StanfordOpenClassroomIE
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index 0733bece7c45880ab5c20b916d5bd8c9700da548..4f60d53660fa7777b9e1b6152967ce2e7e567ec9 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor):
          object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
          object_str = fix_xml_ampersands(object_str)
  
-        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+        try:
+            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+        except xml.etree.ElementTree.ParseError:
+            return
  
          fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
          if fv_el is not None:
@@ -183,9 +186,9 @@ class BrightcoveIE(InfoExtractor):
              (?:
                  [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
                  [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
-            ).+?</object>''',
+            ).+?>\s*</object>''',
              webpage)
-        return [cls._build_brighcove_url(m) for m in matches]
+        return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
  
      def _real_extract(self, url):
          url, smuggled_data = unsmuggle_url(url, {})
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index e5edcc84b69ef7bdffdbb7ed158c901c560a7575..91ebb0ce57136dc0076927acdca4e250774746e1 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -201,7 +201,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
  
          uri = mMovieParams[0][1]
          # Correct cc.com in uri
-        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
+        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
  
          index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
          idoc = self._download_xml(
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 8ed97f8dddfc5d9b51dab2630008c36e054a50bf..7757bf9502169b79e498a7f26ba0b227b44efe91 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -23,6 +23,7 @@ from ..compat import (
  )
  from ..utils import (
      age_restricted,
+    bug_reports_message,
      clean_html,
      compiled_regex_type,
      ExtractorError,
@@ -324,7 +325,7 @@ class InfoExtractor(object):
                  self._downloader.report_warning(errmsg)
                  return False
  
-    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
          """ Returns a tuple (page content as string, URL handle) """
          # Strip hashes from the URL (#1038)
          if isinstance(url_or_request, (compat_str, str)):
@@ -334,14 +335,11 @@ class InfoExtractor(object):
          if urlh is False:
              assert not fatal
              return False
-        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
          return (content, urlh)
  
-    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
-        content_type = urlh.headers.get('Content-Type', '')
-        webpage_bytes = urlh.read()
-        if prefix is not None:
-            webpage_bytes = prefix + webpage_bytes
+    @staticmethod
+    def _guess_encoding_from_content(content_type, webpage_bytes):
          m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
          if m:
              encoding = m.group(1)
@@ -354,6 +352,16 @@ class InfoExtractor(object):
                  encoding = 'utf-16'
              else:
                  encoding = 'utf-8'
+
+        return encoding
+
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+        content_type = urlh.headers.get('Content-Type', '')
+        webpage_bytes = urlh.read()
+        if prefix is not None:
+            webpage_bytes = prefix + webpage_bytes
+        if not encoding:
+            encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
          if self._downloader.params.get('dump_intermediate_pages', False):
              try:
                  url = url_or_request.get_full_url()
@@ -410,13 +418,13 @@ class InfoExtractor(object):
  
          return content
  
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
+    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
          """ Returns the data of the page as a string """
          success = False
          try_count = 0
          while success is False:
              try:
-                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
                  success = True
              except compat_http_client.IncompleteRead as e:
                  try_count += 1
@@ -431,10 +439,10 @@ class InfoExtractor(object):
  
      def _download_xml(self, url_or_request, video_id,
                        note='Downloading XML', errnote='Unable to download XML',
-                      transform_source=None, fatal=True):
+                      transform_source=None, fatal=True, encoding=None):
          """Return the xml as an xml.etree.ElementTree.Element"""
          xml_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal)
+            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
          if xml_string is False:
              return xml_string
          if transform_source:
@@ -445,9 +453,10 @@ class InfoExtractor(object):
                         note='Downloading JSON metadata',
                         errnote='Unable to download JSON metadata',
                         transform_source=None,
-                       fatal=True):
+                       fatal=True, encoding=None):
          json_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal)
+            url_or_request, video_id, note, errnote, fatal=fatal,
+            encoding=encoding)
          if (not fatal) and json_string is False:
              return None
          return self._parse_json(
@@ -548,8 +557,7 @@ class InfoExtractor(object):
          elif fatal:
              raise RegexNotFoundError('Unable to extract %s' % _name)
          else:
-            self._downloader.report_warning('unable to extract %s; '
-                                            'please report this issue on http://yt-dl.org/bug' % _name)
+            self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
              return None
  
      def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 7ad555e9f58bf9434c804dda38cbf8e7b53c9f93..e645d1bb32d26f1e93ba57c93bd4b211d362d2fa 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -615,13 +615,24 @@ class GenericIE(InfoExtractor):
              'info_dict': {
                  'id': '100183293',
                  'ext': 'mp4',
-                'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+                'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
                  'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
                  'thumbnail': 're:^https?://.*\.jpg$',
                  'duration': 694,
                  'age_limit': 0,
              },
          },
+        # Playwire embed
+        {
+            'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
+            'info_dict': {
+                'id': '3519514',
+                'ext': 'mp4',
+                'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
+                'thumbnail': 're:^https?://.*\.png$',
+                'duration': 45.115,
+            },
+        },
          # 5min embed
          {
              'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
@@ -681,13 +692,41 @@ class GenericIE(InfoExtractor):
          # UDN embed
          {
              'url': 'http://www.udn.com/news/story/7314/822787',
-            'md5': 'de06b4c90b042c128395a88f0384817e',
+            'md5': 'fd2060e988c326991037b9aff9df21a6',
              'info_dict': {
-                'id': '300040',
+                'id': '300346',
                  'ext': 'mp4',
-                'title': '生物老師男變女 全校挺"做自己"',
+                'title': '中一中男師變性 全校師生力挺',
                  'thumbnail': 're:^https?://.*\.jpg$',
              }
+        },
+        # Ooyala embed
+        {
+            'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
+            'info_dict': {
+                'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
+                'ext': 'mp4',
+                'description': 'VIDEO: Index/Match versus VLOOKUP.',
+                'title': 'This is what separates the Excel masters from the wannabes',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            }
+        },
+        # Contains a SMIL manifest
+        {
+            'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
+            'info_dict': {
+                'id': 'file',
+                'ext': 'flv',
+                'title': '+ Football: Lottery Champions League Europe',
+                'uploader': 'www.telewebion.com',
+            },
+            'params': {
+                # rtmpe downloads
+                'skip_download': True,
+            }
          }
      ]
  
@@ -1092,7 +1131,8 @@ class GenericIE(InfoExtractor):
          # Look for Ooyala videos
          mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                  re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
-                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
+                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
+                re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
          if mobj is not None:
              return OoyalaIE._build_url_result(mobj.group('ec'))
  
@@ -1295,6 +1335,12 @@ class GenericIE(InfoExtractor):
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'Pladform')
  
+        # Look for Playwire embeds
+        mobj = re.search(
+            r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
          # Look for 5min embeds
          mobj = re.search(
              r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
@@ -1408,13 +1454,22 @@ class GenericIE(InfoExtractor):
              # here's a fun little line of code for you:
              video_id = os.path.splitext(video_id)[0]
  
-            entries.append({
-                'id': video_id,
-                'url': video_url,
-                'uploader': video_uploader,
-                'title': video_title,
-                'age_limit': age_limit,
-            })
+            if determine_ext(video_url) == 'smil':
+                entries.append({
+                    'id': video_id,
+                    'formats': self._extract_smil_formats(video_url, video_id),
+                    'uploader': video_uploader,
+                    'title': video_title,
+                    'age_limit': age_limit,
+                })
+            else:
+                entries.append({
+                    'id': video_id,
+                    'url': video_url,
+                    'uploader': video_uploader,
+                    'title': video_title,
+                    'age_limit': age_limit,
+                })
  
          if len(entries) == 1:
              return entries[0]
diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py

index cc3f2719484d7011e5f88a882af90d95dafe905e..d41195a9647a7cdd329009c5ce448388f0ec8f20 100644 (file)
--- a/youtube_dl/extractor/miomio.py
+++ b/youtube_dl/extractor/miomio.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
  from ..utils import (
      xpath_text,
      int_or_none,
+    ExtractorError,
  )
  
  
@@ -14,13 +15,14 @@ class MioMioIE(InfoExtractor):
      IE_NAME = 'miomio.tv'
      _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
      _TESTS = [{
-        'url': 'http://www.miomio.tv/watch/cc179734/',
-        'md5': '48de02137d0739c15b440a224ad364b9',
+        # "type=video" in flashvars
+        'url': 'http://www.miomio.tv/watch/cc88912/',
+        'md5': '317a5f7f6b544ce8419b784ca8edae65',
          'info_dict': {
-            'id': '179734',
+            'id': '88912',
              'ext': 'flv',
-            'title': 'æ\89\8bç»\98å\8a¨æ¼«é¬¼æ³£ä½\86ä¸\81å\85¨ç¨\8bç\94»æ³\95',
-            'duration': 354,
+            'title': 'ã\80\90SKYã\80\91å\97å¹\95 é\93 æ¦æ\98å\92\8cVSå¹³æ\88\90 å\81\87é\9d¢éª\91å£«å¤§æ\88\98FEATæ\88\98é\98\9f é\94æ\98\9få\97å¹\95ç»\84 å\97å¹\95',
+            'duration': 5923,
          },
      }, {
          'url': 'http://www.miomio.tv/watch/cc184024/',
@@ -42,7 +44,7 @@ class MioMioIE(InfoExtractor):
              r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
  
          xml_config = self._search_regex(
-            r'flashvars="type=sina&amp;(.+?)&amp;',
+            r'flashvars="type=(?:sina|video)&amp;(.+?)&amp;',
              webpage, 'xml config')
  
          # skipping the following page causes lags and eventually connection drop-outs
@@ -59,6 +61,9 @@ class MioMioIE(InfoExtractor):
              'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
          }
  
+        if not int_or_none(xpath_text(vid_config, 'timelength')):
+            raise ExtractorError('Unable to load videos!', expected=True)
+
          entries = []
          for f in vid_config.findall('./durl'):
              segment_url = xpath_text(f, 'url', 'video url')
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py

index 84f29155841007f3088a86470040407073726067..425a4ccf16fff96b1bface874748b93762d2194b 100644 (file)
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -1,7 +1,6 @@
  from __future__ import unicode_literals
  
  import re
-import itertools
  
  from .common import InfoExtractor
  from ..compat import (
@@ -46,20 +45,16 @@ class MixcloudIE(InfoExtractor):
          },
      }]
  
-    def _get_url(self, track_id, template_url, server_number):
-        boundaries = (1, 30)
-        for nr in server_numbers(server_number, boundaries):
-            url = template_url % nr
-            try:
-                # We only want to know if the request succeed
-                # don't download the whole file
-                self._request_webpage(
-                    HEADRequest(url), track_id,
-                    'Checking URL %d/%d ...' % (nr, boundaries[-1]))
-                return url
-            except ExtractorError:
-                pass
-        return None
+    def _check_url(self, url, track_id, ext):
+        try:
+            # We only want to know if the request succeed
+            # don't download the whole file
+            self._request_webpage(
+                HEADRequest(url), track_id,
+                'Trying %s URL' % ext)
+            return True
+        except ExtractorError:
+            return False
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -72,15 +67,10 @@ class MixcloudIE(InfoExtractor):
          preview_url = self._search_regex(
              r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
          song_url = preview_url.replace('/previews/', '/c/originals/')
-        server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number'))
-        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
-        final_song_url = self._get_url(track_id, template_url, server_number)
-        if final_song_url is None:
-            self.to_screen('Trying with m4a extension')
-            template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
-            final_song_url = self._get_url(track_id, template_url, server_number)
-        if final_song_url is None:
-            raise ExtractorError('Unable to extract track url')
+        if not self._check_url(song_url, track_id, 'mp3'):
+            song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
+            if not self._check_url(song_url, track_id, 'm4a'):
+                raise ExtractorError('Unable to extract track url')
  
          PREFIX = (
              r'm-play-on-spacebar[^>]+'
@@ -107,7 +97,7 @@ class MixcloudIE(InfoExtractor):
          return {
              'id': track_id,
              'title': title,
-            'url': final_song_url,
+            'url': song_url,
              'description': description,
              'thumbnail': thumbnail,
              'uploader': uploader,
@@ -115,35 +105,3 @@ class MixcloudIE(InfoExtractor):
              'view_count': view_count,
              'like_count': like_count,
          }
-
-
-def server_numbers(first, boundaries):
-    """ Server numbers to try in descending order of probable availability.
-    Starting from first (i.e. the number of the server hosting the preview file)
-    and going further and further up to the higher boundary and down to the
-    lower one in an alternating fashion. Namely:
-
-        server_numbers(2, (1, 5))
-
-        # Where the preview server is 2, min number is 1 and max is 5.
-        # Yields: 2, 3, 1, 4, 5
-
-    Why not random numbers or increasing sequences? Since from what I've seen,
-    full length files seem to be hosted on servers whose number is closer to
-    that of the preview; to be confirmed.
-    """
-    zip_longest = getattr(itertools, 'zip_longest', None)
-    if zip_longest is None:
-        # python 2.x
-        zip_longest = itertools.izip_longest
-
-    if len(boundaries) != 2:
-        raise ValueError("boundaries should be a two-element tuple")
-    min, max = boundaries
-    highs = range(first + 1, max + 1)
-    lows = range(first - 1, min - 1, -1)
-    rest = filter(
-        None, itertools.chain.from_iterable(zip_longest(highs, lows)))
-    yield first
-    for n in rest:
-        yield n
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index c11de1cb61b28d03ab2430ff1db3a82d317dc718..4430b3416afc8af1fab70e47fb597710d6c5a75b 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -118,6 +118,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
          mediagen_doc = self._download_xml(mediagen_url, video_id,
                                            'Downloading video urls')
  
+        item = mediagen_doc.find('./video/item')
+        if item is not None and item.get('type') == 'text':
+            message = '%s returned error: ' % self.IE_NAME
+            if item.get('code') is not None:
+                message += '%s - ' % item.get('code')
+            message += item.text
+            raise ExtractorError(message, expected=True)
+
          description_node = itemdoc.find('description')
          if description_node is not None:
              description = description_node.text.strip()
diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py

index abde34b94659574041469e8f2f4a8b0e4e903bb1..551c8c9f0fef4566afd5691628b2c216c157fd0c 100644 (file)
--- a/youtube_dl/extractor/pladform.py
+++ b/youtube_dl/extractor/pladform.py
@@ -30,7 +30,7 @@ class PladformIE(InfoExtractor):
          'info_dict': {
              'id': '100183293',
              'ext': 'mp4',
-            'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+            'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
              'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
              'thumbnail': 're:^https?://.*\.jpg$',
              'duration': 694,
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py

new file mode 100644 (file)

index 0000000..174c8e0
--- /dev/null
+++ b/youtube_dl/extractor/qqmusic.py
@@ -0,0 +1,170 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import time
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    strip_jsonp,
+    unescapeHTML,
+)
+from ..compat import compat_urllib_request
+
+
+class QQMusicIE(InfoExtractor):
+    _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
+    _TESTS = [{
+        'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
+        'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
+        'info_dict': {
+            'id': '004295Et37taLD',
+            'ext': 'm4a',
+            'title': '可惜没如果',
+            'upload_date': '20141227',
+            'creator': '林俊杰',
+            'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
+        }
+    }]
+
+    # Reference: m_r_GetRUin() in top_player.js
+    # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
+    @staticmethod
+    def m_r_get_ruin():
+        curMs = int(time.time() * 1000) % 1000
+        return int(round(random.random() * 2147483647) * curMs % 1E10)
+
+    def _real_extract(self, url):
+        mid = self._match_id(url)
+
+        detail_info_page = self._download_webpage(
+            'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
+            mid, note='Download song detail info',
+            errnote='Unable to get song detail info', encoding='gbk')
+
+        song_name = self._html_search_regex(
+            r"songname:\s*'([^']+)'", detail_info_page, 'song name')
+
+        publish_time = self._html_search_regex(
+            r'发行时间：(\d{4}-\d{2}-\d{2})', detail_info_page,
+            'publish time', default=None)
+        if publish_time:
+            publish_time = publish_time.replace('-', '')
+
+        singer = self._html_search_regex(
+            r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
+
+        lrc_content = self._html_search_regex(
+            r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
+            detail_info_page, 'LRC lyrics', default=None)
+
+        guid = self.m_r_get_ruin()
+
+        vkey = self._download_json(
+            'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
+            mid, note='Retrieve vkey', errnote='Unable to get vkey',
+            transform_source=strip_jsonp)['key']
+        song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
+
+        return {
+            'id': mid,
+            'url': song_url,
+            'title': song_name,
+            'upload_date': publish_time,
+            'creator': singer,
+            'description': lrc_content,
+        }
+
+
+class QQPlaylistBaseIE(InfoExtractor):
+    @staticmethod
+    def qq_static_url(category, mid):
+        return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
+
+    @classmethod
+    def get_entries_from_page(cls, page):
+        entries = []
+
+        for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
+            song_mid = unescapeHTML(item).split('|')[-5]
+            entries.append(cls.url_result(
+                'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
+                song_mid))
+
+        return entries
+
+
+class QQMusicSingerIE(QQPlaylistBaseIE):
+    _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
+    _TEST = {
+        'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
+        'info_dict': {
+            'id': '001BLpXF2DyJe2',
+            'title': '林俊杰',
+            'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
+        },
+        'playlist_count': 12,
+    }
+
+    def _real_extract(self, url):
+        mid = self._match_id(url)
+
+        singer_page = self._download_webpage(
+            self.qq_static_url('singer', mid), mid, 'Download singer page')
+
+        entries = self.get_entries_from_page(singer_page)
+
+        singer_name = self._html_search_regex(
+            r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
+            default=None)
+
+        singer_id = self._html_search_regex(
+            r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
+            default=None)
+
+        singer_desc = None
+
+        if singer_id:
+            req = compat_urllib_request.Request(
+                'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
+            req.add_header(
+                'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
+            singer_desc_page = self._download_xml(
+                req, mid, 'Donwload singer description XML')
+
+            singer_desc = singer_desc_page.find('./data/info/desc').text
+
+        return self.playlist_result(entries, mid, singer_name, singer_desc)
+
+
+class QQMusicAlbumIE(QQPlaylistBaseIE):
+    _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
+
+    _TEST = {
+        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
+        'info_dict': {
+            'id': '000gXCTb2AhRR1',
+            'title': '我们都是这样长大的',
+            'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
+        },
+        'playlist_count': 4,
+    }
+
+    def _real_extract(self, url):
+        mid = self._match_id(url)
+
+        album_page = self._download_webpage(
+            self.qq_static_url('album', mid), mid, 'Download album page')
+
+        entries = self.get_entries_from_page(album_page)
+
+        album_name = self._html_search_regex(
+            r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
+            default=None)
+
+        album_detail = self._html_search_regex(
+            r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
+            album_page, 'album details', default=None)
+
+        return self.playlist_result(entries, mid, album_name, album_detail)
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index 316b2c90f110770299084889552b8137e072a617..183ff50f4fc2b15a2ca99704e9277b8c68abd576 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -221,7 +221,12 @@ class SoundcloudIE(InfoExtractor):
                  info_json_url += "&secret_token=" + token
          elif mobj.group('player'):
              query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-            return self.url_result(query['url'][0])
+            real_url = query['url'][0]
+            # If the token is in the query of the original url we have to
+            # manually add it
+            if 'secret_token' in query:
+                real_url += '?secret_token=' + query['secret_token'][0]
+            return self.url_result(real_url)
          else:
              # extract uploader (which is in the url)
              uploader = mobj.group('uploader')
@@ -274,9 +279,8 @@ class SoundcloudSetIE(SoundcloudIE):
          info = self._download_json(resolv_url, full_title)
  
          if 'errors' in info:
-            for err in info['errors']:
-                self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
-            return
+            msgs = (compat_str(err['error_message']) for err in info['errors'])
+            raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
  
          return {
              '_type': 'playlist',
diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py

index e529bb55ccccb1beefdf12d2df1ea689dd0d6f2e..182f286dfefc4023483c422fbf6c6a73203b86ff 100644 (file)
--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor
  
  class SpikeIE(MTVServicesInfoExtractor):
      _VALID_URL = r'''(?x)https?://
-        (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
+        (?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+|
           m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
          '''
      _TEST = {
diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py

new file mode 100644 (file)

index 0000000..77eec0b
--- /dev/null
+++ b/youtube_dl/extractor/srf.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    parse_iso8601,
+    xpath_text,
+)
+
+
+class SrfIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})'
+    _TESTS = [{
+        'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+        'md5': '4cd93523723beff51bb4bee974ee238d',
+        'info_dict': {
+            'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+            'display_id': 'snowden-beantragt-asyl-in-russland',
+            'ext': 'm4v',
+            'upload_date': '20130701',
+            'title': 'Snowden beantragt Asyl in Russland',
+            'timestamp': 1372713995,
+        }
+    }, {
+        # No Speichern (Save) button
+        'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa',
+        'md5': 'd97e236e80d1d24729e5d0953d276a4f',
+        'info_dict': {
+            'id': '677f5829-e473-4823-ac83-a1087fe97faa',
+            'display_id': 'jaguar-xk120-shadow-und-tornado-dampflokomotive',
+            'ext': 'flv',
+            'upload_date': '20130710',
+            'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
+            'timestamp': 1373493600,
+        },
+    }, {
+        'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+        'only_matching': True,
+    }, {
+        'url': 'https://tp.srgssr.ch/p/flash?urn=urn:srf:ais:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        display_id = re.match(self._VALID_URL, url).group('display_id') or video_id
+
+        video_data = self._download_xml(
+            'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id,
+            display_id)
+
+        title = xpath_text(
+            video_data, './AssetMetadatas/AssetMetadata/title', fatal=True)
+        thumbnails = [{
+            'url': s.text
+        } for s in video_data.findall('.//ImageRepresentation/url')]
+        timestamp = parse_iso8601(xpath_text(video_data, './createdDate'))
+        # The <duration> field in XML is different from the exact duration, skipping
+
+        formats = []
+        for item in video_data.findall('./Playlists/Playlist') + video_data.findall('./Downloads/Download'):
+            for url_node in item.findall('url'):
+                quality = url_node.attrib['quality']
+                full_url = url_node.text
+                original_ext = determine_ext(full_url)
+                format_id = '%s-%s' % (quality, item.attrib['protocol'])
+                if original_ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        full_url + '?hdcore=3.4.0', display_id, f4m_id=format_id))
+                elif original_ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        full_url, display_id, 'mp4', m3u8_id=format_id))
+                else:
+                    formats.append({
+                        'url': full_url,
+                        'ext': original_ext,
+                        'format_id': format_id,
+                        'quality': 0 if 'HD' in quality else -1,
+                        'preference': 1,
+                    })
+
+        self._sort_formats(formats)
+
+        subtitles = {}
+        subtitles_data = video_data.find('Subtitles')
+        if subtitles_data is not None:
+            subtitles_list = [{
+                'url': sub.text,
+                'ext': determine_ext(sub.text),
+            } for sub in subtitles_data]
+            if subtitles_list:
+                subtitles['de'] = subtitles_list
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'formats': formats,
+            'title': title,
+            'thumbnails': thumbnails,
+            'timestamp': timestamp,
+            'subtitles': subtitles,
+        }
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index 1caf08cb752d201066f0a1d4679efffc26598d1e..2381676b4e8ef9680bf7c54faafef278c3db8142 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
  from __future__ import unicode_literals
  
  import base64
@@ -35,6 +36,17 @@ class TeamcocoIE(InfoExtractor):
                  'duration': 288,
                  'age_limit': 0,
              }
+        }, {
+            'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
+            'info_dict': {
+                'id': '88748',
+                'ext': 'mp4',
+                'title': 'Timothy Olyphant Raises A Toast To “Justified”',
+                'description': 'md5:15501f23f020e793aeca761205e42c24',
+            },
+            'params': {
+                'skip_download': True,  # m3u8 downloads
+            }
          }
      ]
      _VIDEO_ID_REGEXES = (
@@ -54,10 +66,23 @@ class TeamcocoIE(InfoExtractor):
              video_id = self._html_search_regex(
                  self._VIDEO_ID_REGEXES, webpage, 'video id')
  
+        preload = None
          preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage)
-        if not preloads:
-            raise ExtractorError('Preload information could not be extracted')
-        preload = max([(len(p), p) for p in preloads])[1]
+        if preloads:
+            preload = max([(len(p), p) for p in preloads])[1]
+
+        if not preload:
+            preload = ''.join(re.findall(r'this\.push\("([^"]+)"\);', webpage))
+
+        if not preload:
+            preload = self._html_search_regex([
+                r'player,\[?"([^"]+)"\]?', r'player.init\(\[?"([^"]+)"\]?\)'
+            ], webpage.replace('","', ''), 'preload data', default=None)
+
+        if not preload:
+            raise ExtractorError(
+                'Preload information could not be extracted', expected=True)
+
          data = self._parse_json(
              base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id)
  
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py

index 2a1ae5a717cf7b2af16bf5a1ce3ef7494e28a7a6..828c808a6456b6b99b134cb7ae9d9017de9ad3aa 100644 (file)
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -56,6 +56,6 @@ class TumblrIE(InfoExtractor):
              'url': video_url,
              'ext': 'mp4',
              'title': video_title,
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': self._og_search_description(webpage, default=None),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
          }
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py

index bba25bb58041ddca902749d32c72ca3ad3d619a1..c08428acfab446dff6157035ef032ae326199ebf 100644 (file)
--- a/youtube_dl/extractor/udn.py
+++ b/youtube_dl/extractor/udn.py
@@ -3,12 +3,15 @@ from __future__ import unicode_literals
  
  import json
  from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+    js_to_json,
+    ExtractorError,
+)
  from ..compat import compat_urlparse
  
  
  class UDNEmbedIE(InfoExtractor):
-    _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
+    _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
      _TESTS = [{
          'url': 'http://video.udn.com/embed/news/300040',
          'md5': 'de06b4c90b042c128395a88f0384817e',
@@ -19,7 +22,11 @@ class UDNEmbedIE(InfoExtractor):
              'thumbnail': 're:^https?://.*\.jpg$',
          }
      }, {
-        'url': '//video.udn.com/embed/news/300040',
+        'url': 'https://video.udn.com/embed/news/300040',
+        'only_matching': True,
+    }, {
+        # From https://video.udn.com/news/303776
+        'url': 'https://video.udn.com/play/news/303776',
          'only_matching': True,
      }]
  
@@ -47,7 +54,10 @@ class UDNEmbedIE(InfoExtractor):
                  'retrieve url for %s video' % video_type),
              'format_id': video_type,
              'preference': 0 if video_type == 'mp4' else -1,
-        } for video_type, api_url in video_urls.items()]
+        } for video_type, api_url in video_urls.items() if api_url]
+
+        if not formats:
+            raise ExtractorError('No videos found', expected=True)
  
          self._sort_formats(formats)
  
diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py

index ee3d86117e625cca66303aeeee229f1a091b4602..aa3d6ddfd2420524fd87f85819d2611225224e79 100644 (file)
--- a/youtube_dl/extractor/vimple.py
+++ b/youtube_dl/extractor/vimple.py
@@ -1,75 +1,54 @@
-# coding: utf-8
  from __future__ import unicode_literals
  
-import base64
-import re
-import xml.etree.ElementTree
-import zlib
-
  from .common import InfoExtractor
  from ..utils import int_or_none
  
  
  class VimpleIE(InfoExtractor):
-    IE_DESC = 'Vimple.ru'
-    _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
+    IE_DESC = 'Vimple - one-click video hosting'
+    _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
      _TESTS = [
          {
              'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
              'md5': '2e750a330ed211d3fd41821c6ad9a279',
              'info_dict': {
-                'id': 'c0f6b1687dcd4000a97ebe70068039cf',
+                'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
                  'ext': 'mp4',
                  'title': 'Sunset',
                  'duration': 20,
                  'thumbnail': 're:https?://.*?\.jpg',
              },
-        },
+        }, {
+            'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
+            'only_matching': True,
+        }
      ]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id
-
-        iframe = self._download_webpage(
-            iframe_url, video_id,
-            note='Downloading iframe', errnote='unable to fetch iframe')
-        player_url = self._html_search_regex(
-            r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url')
+        video_id = self._match_id(url)
  
-        player = self._request_webpage(
-            player_url, video_id, note='Downloading swf player').read()
+        webpage = self._download_webpage(
+            'http://player.vimple.ru/iframe/%s' % video_id, video_id)
  
-        player = zlib.decompress(player[8:])
+        playlist = self._parse_json(
+            self._search_regex(
+                r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
+            video_id)['playlist'][0]
  
-        xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player)
-        xml_pieces = [piece[1:-1] for piece in xml_pieces]
+        title = playlist['title']
+        video_id = playlist.get('videoId') or video_id
+        thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
+        duration = int_or_none(playlist.get('duration'))
  
-        xml_data = b''.join(xml_pieces)
-        xml_data = base64.b64decode(xml_data)
-
-        xml_data = xml.etree.ElementTree.fromstring(xml_data)
-
-        video = xml_data.find('Video')
-        quality = video.get('quality')
-        q_tag = video.find(quality.capitalize())
-
-        formats = [
-            {
-                'url': q_tag.get('url'),
-                'tbr': int(q_tag.get('bitrate')),
-                'filesize': int(q_tag.get('filesize')),
-                'format_id': quality,
-            },
-        ]
+        formats = [{
+            'url': f['url'],
+        } for f in playlist['video']]
+        self._sort_formats(formats)
  
          return {
              'id': video_id,
-            'title': video.find('Title').text,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
              'formats': formats,
-            'thumbnail': video.find('Poster').get('url'),
-            'duration': int_or_none(video.get('duration')),
-            'webpage_url': video.find('Share').get('videoPageUrl'),
          }
diff --git a/youtube_dl/postprocessor/atomicparsley.py b/youtube_dl/postprocessor/atomicparsley.py

index 448ccc5f342e42959aae0619854fa80d1e1cd978..a5dfc136afddb983d23cbf6c3f87fedaacbb1999 100644 (file)
--- a/youtube_dl/postprocessor/atomicparsley.py
+++ b/youtube_dl/postprocessor/atomicparsley.py
@@ -50,8 +50,13 @@ class AtomicParsleyPP(PostProcessor):
              msg = stderr.decode('utf-8', 'replace').strip()
              raise AtomicParsleyPPError(msg)
  
-        os.remove(encodeFilename(filename))
          os.remove(encodeFilename(temp_thumbnail))
-        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+        # for formats that don't support thumbnails (like 3gp) AtomicParsley
+        # won't create to the temporary file
+        if b'No changes' in stdout:
+            self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
+        else:
+            os.remove(encodeFilename(filename))
+            os.rename(encodeFilename(temp_filename), encodeFilename(filename))
  
          return True, info
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py

index 8e99a3c2c461d300dbf077236907e0f80bd16e9b..4c4a038f9ca181d7d865e9e751345a0b673a1a08 100644 (file)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -264,15 +264,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
          new_path = prefix + sep + extension
  
          # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
-        if new_path == path:
-            self._nopostoverwrites = True
+        if (new_path == path or
+                (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
+            self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
+            return True, information
  
          try:
-            if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
-                self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
-            else:
-                self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
-                self.run_ffmpeg(path, new_path, acodec, more_opts)
+            self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
+            self.run_ffmpeg(path, new_path, acodec, more_opts)
          except AudioConversionError as e:
              raise PostProcessingError(
                  'audio conversion failed: ' + e.msg)
@@ -286,7 +285,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                  errnote='Cannot update utime of audio file')
  
          information['filepath'] = new_path
-        return self._nopostoverwrites, information
+        return False, information
  
  
  class FFmpegVideoConvertorPP(FFmpegPostProcessor):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 52f0dd09aac2ef0103212086a280fc317b36b82d..edeee1853e30c2b409fe53809ae7912e7966446c 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -312,17 +312,17 @@ def sanitize_path(s):
      """Sanitizes and normalizes path on Windows"""
      if sys.platform != 'win32':
          return s
-    drive, _ = os.path.splitdrive(s)
-    unc, _ = os.path.splitunc(s)
-    unc_or_drive = unc or drive
-    norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
-    if unc_or_drive:
+    drive_or_unc, _ = os.path.splitdrive(s)
+    if sys.version_info < (2, 7) and not drive_or_unc:
+        drive_or_unc, _ = os.path.splitunc(s)
+    norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
+    if drive_or_unc:
          norm_path.pop(0)
      sanitized_path = [
          path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
          for path_part in norm_path]
-    if unc_or_drive:
-        sanitized_path.insert(0, unc_or_drive + os.path.sep)
+    if drive_or_unc:
+        sanitized_path.insert(0, drive_or_unc + os.path.sep)
      return os.path.join(*sanitized_path)
  
  
@@ -452,6 +452,17 @@ def make_HTTPS_handler(params, **kwargs):
          return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
  
  
+def bug_reports_message():
+    if ytdl_is_updateable():
+        update_cmd = 'type  youtube-dl -U  to update'
+    else:
+        update_cmd = 'see  https://yt-dl.org/update  on how to update'
+    msg = '; please report this issue on https://yt-dl.org/bug .'
+    msg += ' Make sure you are using the latest version; %s.' % update_cmd
+    msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+    return msg
+
+
  class ExtractorError(Exception):
      """Error during info extraction."""
  
@@ -467,13 +478,7 @@ class ExtractorError(Exception):
          if cause:
              msg += ' (caused by %r)' % cause
          if not expected:
-            if ytdl_is_updateable():
-                update_cmd = 'type  youtube-dl -U  to update'
-            else:
-                update_cmd = 'see  https://yt-dl.org/update  on how to update'
-            msg += '; please report this issue on https://yt-dl.org/bug .'
-            msg += ' Make sure you are using the latest version; %s.' % update_cmd
-            msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+            msg += bug_reports_message()
          super(ExtractorError, self).__init__(msg)
  
          self.traceback = tb
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 1095fea2fe908fe59d28b3489b7916d05b8e6354..3fd0e7e566751ed1b6d39902d9165dfa3c8aef6e 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.04.09'
+__version__ = '2015.04.17'
author	Sergey M․ <dstftw@gmail.com>
	Fri, 17 Apr 2015 21:49:32 +0000 (03:49 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Fri, 17 Apr 2015 21:49:32 +0000 (03:49 +0600)
docs/supportedsites.md		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/miomio.py		patch \| blob \| history
youtube_dl/extractor/mixcloud.py		patch \| blob \| history
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/pladform.py		patch \| blob \| history
youtube_dl/extractor/qqmusic.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/spike.py		patch \| blob \| history
youtube_dl/extractor/srf.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/teamcoco.py		patch \| blob \| history
youtube_dl/extractor/tumblr.py		patch \| blob \| history
youtube_dl/extractor/udn.py		patch \| blob \| history
youtube_dl/extractor/vimple.py		patch \| blob \| history
youtube_dl/postprocessor/atomicparsley.py		patch \| blob \| history
youtube_dl/postprocessor/ffmpeg.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history