Merge pull request #5680 from ping/qqmusic-toplist-ie
authorYen Chi Hsuan <yan12125@gmail.com>
Thu, 14 May 2015 15:23:32 +0000 (23:23 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Thu, 14 May 2015 15:23:32 +0000 (23:23 +0800)
[qqmusic] Add support for charts / top lists

youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/byutv.py
youtube_dl/extractor/canalplus.py
youtube_dl/extractor/odnoklassniki.py
youtube_dl/extractor/screenwavemedia.py
youtube_dl/extractor/worldstarhiphop.py
youtube_dl/postprocessor/embedthumbnail.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/postprocessor/xattrpp.py

index 4cf83c510496f805da8174707d6b3f37d0aed146..691f3e09f807de52c1c19f334befd0ccc0d4f82c 100755 (executable)
@@ -1085,10 +1085,11 @@ class YoutubeDL(object):
         req_format = self.params.get('format')
         if req_format is None:
             req_format_list = []
-            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-'
-                    and info_dict['extractor'] in ['youtube', 'ted']
-                    and FFmpegMergerPP(self).available):
-                req_format_list.append('bestvideo+bestaudio')
+            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
+                    info_dict['extractor'] in ['youtube', 'ted']):
+                merger = FFmpegMergerPP(self)
+                if merger.available and merger.can_merge():
+                    req_format_list.append('bestvideo+bestaudio')
             req_format_list.append('best')
             req_format = '/'.join(req_format_list)
         formats_to_download = []
@@ -1848,7 +1849,7 @@ class YoutubeDL(object):
             thumb_ext = determine_ext(t['url'], 'jpg')
             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
-            thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+            t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
 
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
index 9cc9f851f41b0259d99cf1751050ae0ba2e10e3c..ace17857c8cb28320ba1fab2988e56c020583af7 100644 (file)
@@ -240,7 +240,13 @@ def _real_main(argv=None):
     if opts.xattrs:
         postprocessors.append({'key': 'XAttrMetadata'})
     if opts.embedthumbnail:
-        postprocessors.append({'key': 'EmbedThumbnail'})
+        already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
+        postprocessors.append({
+            'key': 'EmbedThumbnail',
+            'already_have_thumbnail': already_have_thumbnail
+        })
+        if not already_have_thumbnail:
+            opts.writethumbnail = True
     # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
     # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
     if opts.exec_cmd:
index 6252be05b7f4b57787152b4edae5378675a96847..3b2de517e53da39e06912ce1a97c4aafe7fa250e 100644 (file)
@@ -16,7 +16,7 @@ class BYUtvIE(InfoExtractor):
             'ext': 'mp4',
             'description': 'md5:5438d33774b6bdc662f9485a340401cc',
             'title': 'Season 5 Episode 5',
-            'thumbnail': 're:^https?://.*promo.*'
+            'thumbnail': 're:^https?://.*\.jpg$'
         },
         'params': {
             'skip_download': True,
index 1b14471e57198c2a04833089c174c0c6c3108ab8..699b4f7d08b1928ffa1799adc755774977a84237 100644 (file)
@@ -25,14 +25,14 @@ class CanalplusIE(InfoExtractor):
     }
 
     _TESTS = [{
-        'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
-        'md5': '3db39fb48b9685438ecf33a1078023e4',
+        'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
+        'md5': 'b3481d7ca972f61e37420798d0a9d934',
         'info_dict': {
-            'id': '922470',
+            'id': '1263092',
             'ext': 'flv',
-            'title': 'Zapping - 26/08/13',
-            'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
-            'upload_date': '20130826',
+            'title': 'Le Zapping - 13/05/15',
+            'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
+            'upload_date': '20150513',
         },
     }, {
         'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
@@ -56,7 +56,7 @@ class CanalplusIE(InfoExtractor):
         'skip': 'videos get deleted after a while',
     }, {
         'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
-        'md5': '65aa83ad62fe107ce29e564bb8712580',
+        'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4',
         'info_dict': {
             'id': '1213714',
             'ext': 'flv',
index 155d0ee6a834fa6fb551900e0d0c35dcf0007a5c..fbc521d1aae02077ae62c5cd0a6c5f9cdcff014a 100644 (file)
@@ -6,6 +6,7 @@ from ..utils import (
     unified_strdate,
     int_or_none,
     qualities,
+    unescapeHTML,
 )
 
 
@@ -36,8 +37,8 @@ class OdnoklassnikiIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         player = self._parse_json(
-            self._search_regex(
-                r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
+            unescapeHTML(self._search_regex(
+                r'data-attributes="([^"]+)"', webpage, 'player')),
             video_id)
 
         metadata = self._parse_json(player['flashvars']['metadata'], video_id)
index 74fb1983ac4cbb60f83299e441ecc7ca802b1687..d1ab66b3216d5153a5480769fb0723919f3fdb37 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class ScreenwaveMediaIE(InfoExtractor):
-    _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
+    _VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
 
     _TESTS = [{
         'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
@@ -20,7 +20,10 @@ class ScreenwaveMediaIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
+
+        playerdata = self._download_webpage(
+            'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id,
+            video_id, 'Downloading player webpage')
 
         vidtitle = self._search_regex(
             r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
@@ -99,7 +102,7 @@ class TeamFourIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         playerdata_url = self._search_regex(
-            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
             webpage, 'player data URL')
 
         video_title = self._html_search_regex(
index d5c26a032bcf28a9c8ae79e1d083d67ed29b2726..a3ea26feb38257071c8ae5d3c1702cf0fcd2650a 100644 (file)
@@ -6,8 +6,8 @@ from .common import InfoExtractor
 
 
 class WorldStarHipHopIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
+    _TESTS = [{
         "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
         "md5": "9d04de741161603bf7071bbf4e883186",
         "info_dict": {
@@ -15,7 +15,15 @@ class WorldStarHipHopIE(InfoExtractor):
             "ext": "mp4",
             "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
         }
-    }
+    }, {
+        'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
+        'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
+        'info_dict': {
+            'id': 'wshh6a7q1ny0G34ZwuIO',
+            'ext': 'mp4',
+            "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -26,19 +34,22 @@ class WorldStarHipHopIE(InfoExtractor):
             return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
 
         video_url = self._search_regex(
-            r'so\.addVariable\("file","(.*?)"\)', webpage, 'video URL')
+            [r'so\.addVariable\("file","(.*?)"\)',
+             r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
+            webpage, 'video URL')
 
         if 'youtube' in video_url:
             return self.url_result(video_url, ie='Youtube')
 
         video_title = self._html_search_regex(
-            r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
+            [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
+             r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
             webpage, 'title')
 
         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
         thumbnail = self._html_search_regex(
             r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
-            fatal=False)
+            default=None)
         if not thumbnail:
             _title = r'candytitles.*>(.*)</span>'
             mobj = re.search(_title, webpage)
index 4868a42fdca9f486bed5e1def3ffaf08b26fda39..8f825f7859058c9c40cd55e50ec9832a92858c32 100644 (file)
@@ -7,12 +7,9 @@ import subprocess
 
 from .ffmpeg import FFmpegPostProcessor
 
-from ..compat import (
-    compat_urlretrieve,
-)
 from ..utils import (
-    determine_ext,
     check_executable,
+    encodeArgument,
     encodeFilename,
     PostProcessingError,
     prepend_extension,
@@ -25,26 +22,30 @@ class EmbedThumbnailPPError(PostProcessingError):
 
 
 class EmbedThumbnailPP(FFmpegPostProcessor):
+    def __init__(self, downloader=None, already_have_thumbnail=False):
+        super(EmbedThumbnailPP, self).__init__(downloader)
+        self._already_have_thumbnail = already_have_thumbnail
+
     def run(self, info):
         filename = info['filepath']
         temp_filename = prepend_extension(filename, 'temp')
-        temp_thumbnail = filename + '.' + determine_ext(info['thumbnail'])
 
-        if not info.get('thumbnail'):
+        if not info.get('thumbnails'):
             raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.')
 
-        compat_urlretrieve(info['thumbnail'], temp_thumbnail)
+        thumbnail_filename = info['thumbnails'][-1]['filename']
 
         if info['ext'] == 'mp3':
             options = [
-                '-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1',
+                '-c', 'copy', '-map', '0', '-map', '1',
                 '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
 
             self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
 
-            self.run_ffmpeg(filename, temp_filename, options)
+            self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
 
-            os.remove(encodeFilename(temp_thumbnail))
+            if not self._already_have_thumbnail:
+                os.remove(encodeFilename(thumbnail_filename))
             os.remove(encodeFilename(filename))
             os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 
@@ -52,7 +53,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
             if not check_executable('AtomicParsley', ['-v']):
                 raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
 
-            cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
+            cmd = [encodeFilename('AtomicParsley', True),
+                   encodeFilename(filename, True),
+                   encodeArgument('--artwork'),
+                   encodeFilename(thumbnail_filename, True),
+                   encodeArgument('-o'),
+                   encodeFilename(temp_filename, True)]
 
             self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
 
@@ -66,7 +72,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
                 msg = stderr.decode('utf-8', 'replace').strip()
                 raise EmbedThumbnailPPError(msg)
 
-            os.remove(encodeFilename(temp_thumbnail))
+            if not self._already_have_thumbnail:
+                os.remove(encodeFilename(thumbnail_filename))
             # for formats that don't support thumbnails (like 3gp) AtomicParsley
             # won't create to the temporary file
             if b'No changes' in stdout:
index 214de39f9aa80e6f042b63e2069ded1ab6d123bd..cc65b34e71a28cfb0947b9441d5dcc006baf47ba 100644 (file)
@@ -591,6 +591,23 @@ class FFmpegMergerPP(FFmpegPostProcessor):
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
         return info['__files_to_merge'], info
 
+    def can_merge(self):
+        # TODO: figure out merge-capable ffmpeg version
+        if self.basename != 'avconv':
+            return True
+
+        required_version = '10-0'
+        if is_outdated_version(
+                self._versions[self.basename], required_version):
+            warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
+                       'youtube-dl will download single file media. '
+                       'Update %s to version %s or newer to fix this.') % (
+                           self.basename, self.basename, required_version)
+            if self._downloader:
+                self._downloader.report_warning(warning)
+            return False
+        return True
+
 
 class FFmpegFixupStretchedPP(FFmpegPostProcessor):
     def run(self, info):
index 93d0abcf6d1b563109c5488d10b67a012f916103..7d88e130820e073af1b6fd527390cb1cb5dc8dec 100644 (file)
@@ -3,18 +3,34 @@ from __future__ import unicode_literals
 import os
 import subprocess
 import sys
+import errno
 
 from .common import PostProcessor
-from ..compat import (
-    subprocess_check_output
-)
 from ..utils import (
     check_executable,
     hyphenate_date,
     version_tuple,
+    PostProcessingError,
+    encodeArgument,
+    encodeFilename,
 )
 
 
+class XAttrMetadataError(PostProcessingError):
+    def __init__(self, code=None, msg='Unknown error'):
+        super(XAttrMetadataError, self).__init__(msg)
+        self.code = code
+
+        # Parsing code and msg
+        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+            self.reason = 'NO_SPACE'
+        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+            self.reason = 'VALUE_TOO_LONG'
+        else:
+            self.reason = 'NOT_SUPPORTED'
+
+
 class XAttrMetadataPP(PostProcessor):
 
     #
@@ -51,7 +67,10 @@ class XAttrMetadataPP(PostProcessor):
                 raise ImportError
 
             def write_xattr(path, key, value):
-                return xattr.setxattr(path, key, value)
+                try:
+                    xattr.set(path, key, value)
+                except EnvironmentError as e:
+                    raise XAttrMetadataError(e.errno, e.strerror)
 
         except ImportError:
             if os.name == 'nt':
@@ -62,8 +81,11 @@ class XAttrMetadataPP(PostProcessor):
                     assert os.path.exists(path)
 
                     ads_fn = path + ":" + key
-                    with open(ads_fn, "wb") as f:
-                        f.write(value)
+                    try:
+                        with open(ads_fn, "wb") as f:
+                            f.write(value)
+                    except EnvironmentError as e:
+                        raise XAttrMetadataError(e.errno, e.strerror)
             else:
                 user_has_setfattr = check_executable("setfattr", ['--version'])
                 user_has_xattr = check_executable("xattr", ['-h'])
@@ -71,12 +93,27 @@ class XAttrMetadataPP(PostProcessor):
                 if user_has_setfattr or user_has_xattr:
 
                     def write_xattr(path, key, value):
+                        value = value.decode('utf-8')
                         if user_has_setfattr:
-                            cmd = ['setfattr', '-n', key, '-v', value, path]
+                            executable = 'setfattr'
+                            opts = ['-n', key, '-v', value]
                         elif user_has_xattr:
-                            cmd = ['xattr', '-w', key, value, path]
-
-                        subprocess_check_output(cmd)
+                            executable = 'xattr'
+                            opts = ['-w', key, value]
+
+                        cmd = ([encodeFilename(executable, True)] +
+                               [encodeArgument(o) for o in opts] +
+                               [encodeFilename(path, True)])
+
+                        try:
+                            p = subprocess.Popen(
+                                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+                        except EnvironmentError as e:
+                            raise XAttrMetadataError(e.errno, e.strerror)
+                        stdout, stderr = p.communicate()
+                        stderr = stderr.decode('utf-8', 'replace')
+                        if p.returncode != 0:
+                            raise XAttrMetadataError(p.returncode, stderr)
 
                 else:
                     # On Unix, and can't find pyxattr, setfattr, or xattr.
@@ -121,6 +158,19 @@ class XAttrMetadataPP(PostProcessor):
 
             return [], info
 
-        except (subprocess.CalledProcessError, OSError):
-            self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
+        except XAttrMetadataError as e:
+            if e.reason == 'NO_SPACE':
+                self._downloader.report_warning(
+                    'There\'s no disk space left or disk quota exceeded. ' +
+                    'Extended attributes are not written.')
+            elif e.reason == 'VALUE_TOO_LONG':
+                self._downloader.report_warning(
+                    'Unable to write extended attributes due to too long values.')
+            else:
+                msg = 'This filesystem doesn\'t support extended attributes. '
+                if os.name == 'nt':
+                    msg += 'You need to use NTFS.'
+                else:
+                    msg += '(You may have to enable them in your /etc/fstab)'
+                self._downloader.report_error(msg)
             return [], info