X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=55c345e8a8d4f2c48ff2620fa56df98cecd5db6b;hb=41cc67c542046d300fc0a8a6bfb6c471db2db6b2;hp=7f7508c74f1b98de4d93ee51472d0b32f9184e98;hpb=693b8b2d310e119417787e7b06d1e1832d16f05d;p=youtube-dl

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 7f7508c74..55c345e8a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -162,23 +162,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                           # Dash audio
                           '141', '172', '140', '171', '139',
                           ]
-    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
-                                      # Apple HTTP Live Streaming
-                                      '96', '95', '94', '93', '92', '132', '151',
-                                      # 3D
-                                      '85', '102', '84', '101', '83', '100', '82',
-                                      # Dash video
-                                      '138', '248', '137', '247', '136', '246', '245',
-                                      '244', '135', '243', '134', '242', '133', '160',
-                                      # Dash audio
-                                      '172', '141', '171', '140', '139',
-                                      ]
-    _video_formats_map = {
-        'flv': ['35', '34', '6', '5'],
-        '3gp': ['36', '17', '13'],
-        'mp4': ['38', '37', '22', '18'],
-        'webm': ['46', '45', '44', '43'],
-    }
     _video_extensions = {
         '13': '3gp',
         '17': '3gp',
@@ -236,54 +219,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '248': 'webm',
     }
     _video_dimensions = {
-        '5': '400x240',
-        '6': '???',
-        '13': '???',
-        '17': '176x144',
-        '18': '640x360',
-        '22': '1280x720',
-        '34': '640x360',
-        '35': '854x480',
-        '36': '320x240',
-        '37': '1920x1080',
-        '38': '4096x3072',
-        '43': '640x360',
-        '44': '854x480',
-        '45': '1280x720',
-        '46': '1920x1080',
-        '82': '360p',
-        '83': '480p',
-        '84': '720p',
-        '85': '1080p',
-        '92': '240p',
-        '93': '360p',
-        '94': '480p',
-        '95': '720p',
-        '96': '1080p',
-        '100': '360p',
-        '101': '480p',
-        '102': '720p',
-        '132': '240p',
-        '151': '72p',
-        '133': '240p',
-        '134': '360p',
-        '135': '480p',
-        '136': '720p',
-        '137': '1080p',
-        '138': '>1080p',
-        '139': '48k',
-        '140': '128k',
-        '141': '256k',
-        '160': '192p',
-        '171': '128k',
-        '172': '256k',
-        '242': '240p',
-        '243': '360p',
-        '244': '480p',
-        '245': '480p',
-        '246': '480p',
-        '247': '720p',
-        '248': '1080p',
+        '5': {'width': 400, 'height': 240},
+        '6': {},
+        '13': {},
+        '17': {'width': 176, 'height': 144},
+        '18': {'width': 640, 'height': 360},
+        '22': {'width': 1280, 'height': 720},
+        '34': {'width': 640, 'height': 360},
+        '35': {'width': 854, 'height': 480},
+        '36': {'width': 320, 'height': 240},
+        '37': {'width': 1920, 'height': 1080},
+        '38': {'width': 4096, 'height': 3072},
+        '43': {'width': 640, 'height': 360},
+        '44': {'width': 854, 'height': 480},
+        '45': {'width': 1280, 'height': 720},
+        '46': {'width': 1920, 'height': 1080},
+        '82': {'height': 360, 'display': '360p'},
+        '83': {'height': 480, 'display': '480p'},
+        '84': {'height': 720, 'display': '720p'},
+        '85': {'height': 1080, 'display': '1080p'},
+        '92': {'height': 240, 'display': '240p'},
+        '93': {'height': 360, 'display': '360p'},
+        '94': {'height': 480, 'display': '480p'},
+        '95': {'height': 720, 'display': '720p'},
+        '96': {'height': 1080, 'display': '1080p'},
+        '100': {'height': 360, 'display': '360p'},
+        '101': {'height': 480, 'display': '480p'},
+        '102': {'height': 720, 'display': '720p'},
+        '132': {'height': 240, 'display': '240p'},
+        '151': {'height': 72, 'display': '72p'},
+        '133': {'height': 240, 'display': '240p'},
+        '134': {'height': 360, 'display': '360p'},
+        '135': {'height': 480, 'display': '480p'},
+        '136': {'height': 720, 'display': '720p'},
+        '137': {'height': 1080, 'display': '1080p'},
+        '138': {'height': 1081, 'display': '>1080p'},
+        '139': {'display': '48k'},
+        '140': {'display': '128k'},
+        '141': {'display': '256k'},
+        '160': {'height': 192, 'display': '192p'},
+        '171': {'display': '128k'},
+        '172': {'display': '256k'},
+        '242': {'height': 240, 'display': '240p'},
+        '243': {'height': 360, 'display': '360p'},
+        '244': {'height': 480, 'display': '480p'},
+        '245': {'height': 480, 'display': '480p'},
+        '246': {'height': 480, 'display': '480p'},
+        '247': {'height': 720, 'display': '720p'},
+        '248': {'height': 1080, 'display': '1080p'},
     }
     _special_itags = {
         '82': '3D',
@@ -1153,13 +1136,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             self._downloader.report_warning(err_msg)
             return {}
 
-    def _print_formats(self, formats):
-        print('Available formats:')
-        for x in formats:
-            print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
-                                        self._video_dimensions.get(x, '???'),
-                                        ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
-
     def _extract_id(self, url):
         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
         if mobj is None:
@@ -1172,48 +1148,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         Transform a dictionary in the format {itag:url} to a list of (itag, url)
         with the requested formats.
         """
-        req_format = self._downloader.params.get('format', None)
-        format_limit = self._downloader.params.get('format_limit', None)
-        available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
-        if format_limit is not None and format_limit in available_formats:
-            format_list = available_formats[available_formats.index(format_limit):]
-        else:
-            format_list = available_formats
-        existing_formats = [x for x in format_list if x in url_map]
+        existing_formats = [x for x in self._available_formats if x in url_map]
         if len(existing_formats) == 0:
             raise ExtractorError(u'no known formats available for video')
-        if self._downloader.params.get('listformats', None):
-            self._print_formats(existing_formats)
-            return
-        if req_format is None or req_format == 'best':
-            video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
-        elif req_format == 'worst':
-            video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
-        elif req_format in ('-1', 'all'):
-            video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
-        else:
-            # Specific formats. We pick the first in a slash-delimeted sequence.
-            # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
-            # available in the specified format. For example,
-            # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
-            # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
-            # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
-            req_formats = req_format.split('/')
-            video_url_list = None
-            for rf in req_formats:
-                if rf in url_map:
-                    video_url_list = [(rf, url_map[rf])]
-                    break
-                if rf in self._video_formats_map:
-                    for srf in self._video_formats_map[rf]:
-                        if srf in url_map:
-                            video_url_list = [(srf, url_map[srf])]
-                            break
-                    else:
-                        continue
-                    break
-            if video_url_list is None:
-                raise ExtractorError(u'requested format not available')
+        video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
+        video_url_list.reverse() # order worst to best
         return video_url_list
 
     def _extract_from_m3u8(self, manifest_url, video_id):
@@ -1361,7 +1300,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 video_description = u''
 
         def _extract_count(klass):
-            count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
+            count = self._search_regex(
+                r'class="%s">([\d,]+)</span>' % re.escape(klass),
+                video_webpage, klass, default=None)
             if count is not None:
                 return int(count.replace(',', ''))
             return None
@@ -1377,9 +1318,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         if 'length_seconds' not in video_info:
             self._downloader.report_warning(u'unable to extract video duration')
-            video_duration = ''
+            video_duration = None
         else:
-            video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
+            video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
 
         # annotations
         video_annotations = None
@@ -1460,50 +1401,67 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                         url += '&ratebypass=yes'
                     url_map[url_data['itag'][0]] = url
             video_url_list = self._get_video_url_list(url_map)
-            if not video_url_list:
-                return
         elif video_info.get('hlsvp'):
             manifest_url = video_info['hlsvp'][0]
             url_map = self._extract_from_m3u8(manifest_url, video_id)
             video_url_list = self._get_video_url_list(url_map)
-            if not video_url_list:
-                return
-
         else:
             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
 
-        results = []
+        formats = []
         for itag, video_real_url in video_url_list:
             # Extension
             video_extension = self._video_extensions.get(itag, 'flv')
+            resolution = self._video_dimensions.get(itag, {}).get('display')
+            width = self._video_dimensions.get(itag, {}).get('width')
+            height = self._video_dimensions.get(itag, {}).get('height')
+            note = self._special_itags.get(itag)
 
             video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
-                                              self._video_dimensions.get(itag, '???'),
+                                              '%dx%d' % (width, height) if width is not None and height is not None else (resolution if resolution is not None else '???'),
                                               ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
 
-            results.append({
-                'id':       video_id,
-                'url':      video_real_url,
-                'uploader': video_uploader,
-                'uploader_id': video_uploader_id,
-                'upload_date':  upload_date,
-                'title':    video_title,
-                'ext':      video_extension,
-                'format':   video_format,
-                'format_id': itag,
-                'thumbnail':    video_thumbnail,
-                'description':  video_description,
-                'player_url':   player_url,
-                'subtitles':    video_subtitles,
-                'duration':     video_duration,
-                'age_limit':    18 if age_gate else 0,
-                'annotations':  video_annotations,
-                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
-                'view_count': view_count,
-                'like_count': like_count,
-                'dislike_count': dislike_count,
+            formats.append({
+                'url':         video_real_url,
+                'ext':         video_extension,
+                'format':      video_format,
+                'format_id':   itag,
+                'player_url':  player_url,
+                '_resolution': resolution,
+                'width':       width,
+                'height':      height,
+                'format_note': note,
             })
-        return results
+
+        def _formats_key(f):
+            note = f.get('format_note')
+            if note is None:
+                note = u''
+            is_dash = u'DASH' in note
+            return (
+                0 if is_dash else 1,
+                f.get('height') if f.get('height') is not None else -1,
+                f.get('width') if f.get('width') is not None else -1)
+        formats.sort(key=_formats_key)
+
+        return {
+            'id':           video_id,
+            'uploader':     video_uploader,
+            'uploader_id':  video_uploader_id,
+            'upload_date':  upload_date,
+            'title':        video_title,
+            'thumbnail':    video_thumbnail,
+            'description':  video_description,
+            'subtitles':    video_subtitles,
+            'duration':     video_duration,
+            'age_limit':    18 if age_gate else 0,
+            'annotations':  video_annotations,
+            'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+            'view_count':   view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'formats':      formats,
+        }
 
 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
     IE_DESC = u'YouTube.com playlists'
@@ -1572,6 +1530,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         if playlist_id.startswith('RD'):
             # Mixes require a custom extraction process
             return self._extract_mix(playlist_id)
+        if playlist_id.startswith('TL'):
+            raise ExtractorError(u'For downloading YouTube.com top lists, use '
+                u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
 
         # Extract the video ids from the playlist pages
         ids = []
@@ -1594,6 +1555,38 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         return self.playlist_result(url_results, playlist_id, playlist_title)
 
 
+class YoutubeTopListIE(YoutubePlaylistIE):
+    IE_NAME = u'youtube:toplist'
+    IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
+        u' (Example: "yttoplist:music:Top Tracks")')
+    _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        channel = mobj.group('chann')
+        title = mobj.group('title')
+        query = compat_urllib_parse.urlencode({'title': title})
+        playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query)
+        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
+        link = self._html_search_regex(playlist_re, channel_page, u'list')
+        url = compat_urlparse.urljoin('https://www.youtube.com/', link)
+        
+        video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
+        ids = []
+        # sometimes the webpage doesn't contain the videos
+        # retry until we get them
+        for i in itertools.count(0):
+            msg = u'Downloading Youtube mix'
+            if i > 0:
+                msg += ', retry #%d' % i
+            webpage = self._download_webpage(url, title, msg)
+            ids = orderedSet(re.findall(video_re, webpage))
+            if ids:
+                break
+        url_results = self._ids_to_results(ids)
+        return self.playlist_result(url_results, playlist_title=title)
+
+
 class YoutubeChannelIE(InfoExtractor):
     IE_DESC = u'YouTube.com channels'
     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
@@ -1680,7 +1673,7 @@ class YoutubeUserIE(InfoExtractor):
         # page by page until there are no video ids - it means we got
         # all of them.
 
-        video_ids = []
+        url_results = []
 
         for pagenum in itertools.count(0):
             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
@@ -1698,10 +1691,17 @@ class YoutubeUserIE(InfoExtractor):
                 break
 
             # Extract video identifiers
-            ids_in_page = []
-            for entry in response['feed']['entry']:
-                ids_in_page.append(entry['id']['$t'].split('/')[-1])
-            video_ids.extend(ids_in_page)
+            entries = response['feed']['entry']
+            for entry in entries:
+                title = entry['title']['$t']
+                video_id = entry['id']['$t'].split('/')[-1]
+                url_results.append({
+                    '_type': 'url',
+                    'url': video_id,
+                    'ie_key': 'Youtube',
+                    'id': 'video_id',
+                    'title': title,
+                })
 
             # A little optimization - if current page is not
             # "full", ie. does not contain PAGE_SIZE video ids then
@@ -1709,12 +1709,9 @@ class YoutubeUserIE(InfoExtractor):
             # are no more ids on further pages - no need to query
             # again.
 
-            if len(ids_in_page) < self._GDATA_PAGE_SIZE:
+            if len(entries) < self._GDATA_PAGE_SIZE:
                 break
 
-        url_results = [
-            self.url_result(video_id, 'Youtube', video_id=video_id)
-            for video_id in video_ids]
         return self.playlist_result(url_results, playlist_title=username)