Merge remote-tracking branch 'rzhxeo/youtube'

author Philipp Hagemeister <phihag@phihag.de>

Mon, 23 Dec 2013 03:37:40 +0000 (04:37 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Mon, 23 Dec 2013 03:37:40 +0000 (04:37 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Mon, 23 Dec 2013 03:37:40 +0000 (04:37 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Mon, 23 Dec 2013 03:37:40 +0000 (04:37 +0100)
diff --combined youtube_dl/YoutubeDL.py

index 766d6e5fe0ba9ed918c9d36161e1b131842bca53,a93dd41a315ce49f3d7325249fa1b267d8565ee7..e705c410b6451c983cfc43928af584025581dfa3
--- 1/youtube_dl/YoutubeDL.py
--- 2/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@@ -183,18 -183,12 +183,18 @@@ class YoutubeDL(object)
                       width_args = []
                   else:
                       width_args = ['-w', str(width)]
- -                self._fribidi = subprocess.Popen(
- -                    ['fribidi', '-c', 'UTF-8'] + width_args,
+ +                sp_kwargs = dict(
                       stdin=subprocess.PIPE,
                       stdout=slave,
                       stderr=self._err_file)
- -                self._fribidi_channel = os.fdopen(master, 'rb')
+ +                try:
+ +                    self._output_process = subprocess.Popen(
+ +                        ['bidiv'] + width_args, **sp_kwargs
+ +                    )
+ +                except OSError:
+ +                    self._output_process = subprocess.Popen(
+ +                        ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
+ +                self._output_channel = os.fdopen(master, 'rb')
               except OSError as ose:
                   if ose.errno == 2:
                       self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
@@@ -249,15 -243,14 +249,15 @@@
           pp.set_downloader(self)
   
       def _bidi_workaround(self, message):
- -        if not hasattr(self, '_fribidi_channel'):
+ +        if not hasattr(self, '_output_channel'):
               return message
   
+ +        assert hasattr(self, '_output_process')
           assert type(message) == type(u'')
           line_count = message.count(u'\n') + 1
- -        self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
- -        self._fribidi.stdin.flush()
- -        res = u''.join(self._fribidi_channel.readline().decode('utf-8')
+ +        self._output_process.stdin.write((message + u'\n').encode('utf-8'))
+ +        self._output_process.stdin.flush()
+ +        res = u''.join(self._output_channel.readline().decode('utf-8')
                          for _ in range(line_count))
           return res[:-len(u'\n')]
   
@@@ -643,7 -636,7 +643,7 @@@
               info_dict['playlist_index'] = None
   
           # This extractors handle format selection themselves
-         if info_dict['extractor'] in [u'youtube', u'Youku']:
+         if info_dict['extractor'] in [u'Youku']:
               if download:
                   self.process_info(info_dict)
               return info_dict
@@@ -669,10 -662,6 +669,6 @@@
               if 'ext' not in format:
                   format['ext'] = determine_ext(format['url'])
   
-         if self.params.get('listformats', None):
-             self.list_formats(info_dict)
-             return
- 
           format_limit = self.params.get('format_limit', None)
           if format_limit:
               formats = list(takewhile_inclusive(
@@@ -685,9 -674,16 +681,16 @@@
                   except ValueError:
                       ext_ord = -1
                   # We only compare the extension if they have the same height and width
-                 return (f.get('height'), f.get('width'), ext_ord)
+                 return (f.get('height') if f.get('height') is not None else -1,
+                         f.get('width') if f.get('width') is not None else -1,
+                         ext_ord)
               formats = sorted(formats, key=_free_formats_key)
   
+         info_dict['formats'] = formats
+         if self.params.get('listformats', None):
+             self.list_formats(info_dict)
+             return
+ 
           req_format = self.params.get('format', 'best')
           if req_format is None:
               req_format = 'best'
diff --combined youtube_dl/extractor/youtube.py

index a68576547e85f344d7ccaa78092fc0146b2e935e,58d27497002de5467ab08ebb9a2b0188177943e9..9fb07b3664dd72bfc8ebbfc6dae2a11374483051
--- 1/youtube_dl/extractor/youtube.py
--- 2/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@@ -162,23 -162,6 +162,6 @@@ class YoutubeIE(YoutubeBaseInfoExtracto
                             # Dash audio
                             '141', '172', '140', '171', '139',
                             ]
-     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
-                                       # Apple HTTP Live Streaming
-                                       '96', '95', '94', '93', '92', '132', '151',
-                                       # 3D
-                                       '85', '102', '84', '101', '83', '100', '82',
-                                       # Dash video
-                                       '138', '248', '137', '247', '136', '246', '245',
-                                       '244', '135', '243', '134', '242', '133', '160',
-                                       # Dash audio
-                                       '172', '141', '171', '140', '139',
-                                       ]
-     _video_formats_map = {
-         'flv': ['35', '34', '6', '5'],
-         '3gp': ['36', '17', '13'],
-         'mp4': ['38', '37', '22', '18'],
-         'webm': ['46', '45', '44', '43'],
-     }
       _video_extensions = {
           '13': '3gp',
           '17': '3gp',
@@@ -236,54 -219,54 +219,54 @@@
           '248': 'webm',
       }
       _video_dimensions = {
-         '5': '400x240',
-         '6': '???',
-         '13': '???',
-         '17': '176x144',
-         '18': '640x360',
-         '22': '1280x720',
-         '34': '640x360',
-         '35': '854x480',
-         '36': '320x240',
-         '37': '1920x1080',
-         '38': '4096x3072',
-         '43': '640x360',
-         '44': '854x480',
-         '45': '1280x720',
-         '46': '1920x1080',
-         '82': '360p',
-         '83': '480p',
-         '84': '720p',
-         '85': '1080p',
-         '92': '240p',
-         '93': '360p',
-         '94': '480p',
-         '95': '720p',
-         '96': '1080p',
-         '100': '360p',
-         '101': '480p',
-         '102': '720p',
-         '132': '240p',
-         '151': '72p',
-         '133': '240p',
-         '134': '360p',
-         '135': '480p',
-         '136': '720p',
-         '137': '1080p',
-         '138': '>1080p',
-         '139': '48k',
-         '140': '128k',
-         '141': '256k',
-         '160': '192p',
-         '171': '128k',
-         '172': '256k',
-         '242': '240p',
-         '243': '360p',
-         '244': '480p',
-         '245': '480p',
-         '246': '480p',
-         '247': '720p',
-         '248': '1080p',
+         '5': {'width': 400, 'height': 240},
+         '6': {},
+         '13': {},
+         '17': {'width': 176, 'height': 144},
+         '18': {'width': 640, 'height': 360},
+         '22': {'width': 1280, 'height': 720},
+         '34': {'width': 640, 'height': 360},
+         '35': {'width': 854, 'height': 480},
+         '36': {'width': 320, 'height': 240},
+         '37': {'width': 1920, 'height': 1080},
+         '38': {'width': 4096, 'height': 3072},
+         '43': {'width': 640, 'height': 360},
+         '44': {'width': 854, 'height': 480},
+         '45': {'width': 1280, 'height': 720},
+         '46': {'width': 1920, 'height': 1080},
+         '82': {'height': 360, 'display': '360p'},
+         '83': {'height': 480, 'display': '480p'},
+         '84': {'height': 720, 'display': '720p'},
+         '85': {'height': 1080, 'display': '1080p'},
+         '92': {'height': 240, 'display': '240p'},
+         '93': {'height': 360, 'display': '360p'},
+         '94': {'height': 480, 'display': '480p'},
+         '95': {'height': 720, 'display': '720p'},
+         '96': {'height': 1080, 'display': '1080p'},
+         '100': {'height': 360, 'display': '360p'},
+         '101': {'height': 480, 'display': '480p'},
+         '102': {'height': 720, 'display': '720p'},
+         '132': {'height': 240, 'display': '240p'},
+         '151': {'height': 72, 'display': '72p'},
+         '133': {'height': 240, 'display': '240p'},
+         '134': {'height': 360, 'display': '360p'},
+         '135': {'height': 480, 'display': '480p'},
+         '136': {'height': 720, 'display': '720p'},
+         '137': {'height': 1080, 'display': '1080p'},
+         '138': {'height': 1081, 'display': '>1080p'},
+         '139': {'display': '48k'},
+         '140': {'display': '128k'},
+         '141': {'display': '256k'},
+         '160': {'height': 192, 'display': '192p'},
+         '171': {'display': '128k'},
+         '172': {'display': '256k'},
+         '242': {'height': 240, 'display': '240p'},
+         '243': {'height': 360, 'display': '360p'},
+         '244': {'height': 480, 'display': '480p'},
+         '245': {'height': 480, 'display': '480p'},
+         '246': {'height': 480, 'display': '480p'},
+         '247': {'height': 720, 'display': '720p'},
+         '248': {'height': 1080, 'display': '1080p'},
       }
       _special_itags = {
           '82': '3D',
@@@ -1153,13 -1136,6 +1136,6 @@@
               self._downloader.report_warning(err_msg)
               return {}
   
-     def _print_formats(self, formats):
-         print('Available formats:')
-         for x in formats:
-             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
-                                         self._video_dimensions.get(x, '???'),
-                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
- 
       def _extract_id(self, url):
           mobj = re.match(self._VALID_URL, url, re.VERBOSE)
           if mobj is None:
@@@ -1172,48 -1148,11 +1148,11 @@@
           Transform a dictionary in the format {itag:url} to a list of (itag, url)
           with the requested formats.
           """
-         req_format = self._downloader.params.get('format', None)
-         format_limit = self._downloader.params.get('format_limit', None)
-         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
-         if format_limit is not None and format_limit in available_formats:
-             format_list = available_formats[available_formats.index(format_limit):]
-         else:
-             format_list = available_formats
-         existing_formats = [x for x in format_list if x in url_map]
+         existing_formats = [x for x in self._available_formats if x in url_map]
           if len(existing_formats) == 0:
               raise ExtractorError(u'no known formats available for video')
-         if self._downloader.params.get('listformats', None):
-             self._print_formats(existing_formats)
-             return
-         if req_format is None or req_format == 'best':
-             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
-         elif req_format == 'worst':
-             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
-         elif req_format in ('-1', 'all'):
-             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
-         else:
-             # Specific formats. We pick the first in a slash-delimeted sequence.
-             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
-             # available in the specified format. For example,
-             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
-             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
-             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
-             req_formats = req_format.split('/')
-             video_url_list = None
-             for rf in req_formats:
-                 if rf in url_map:
-                     video_url_list = [(rf, url_map[rf])]
-                     break
-                 if rf in self._video_formats_map:
-                     for srf in self._video_formats_map[rf]:
-                         if srf in url_map:
-                             video_url_list = [(srf, url_map[srf])]
-                             break
-                     else:
-                         continue
-                     break
-             if video_url_list is None:
-                 raise ExtractorError(u'requested format not available')
+         video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
+         video_url_list.reverse() # order worst to best
           return video_url_list
   
       def _extract_from_m3u8(self, manifest_url, video_id):
@@@ -1462,50 -1401,60 +1401,60 @@@
                           url += '&ratebypass=yes'
                       url_map[url_data['itag'][0]] = url
               video_url_list = self._get_video_url_list(url_map)
-             if not video_url_list:
-                 return
           elif video_info.get('hlsvp'):
               manifest_url = video_info['hlsvp'][0]
               url_map = self._extract_from_m3u8(manifest_url, video_id)
               video_url_list = self._get_video_url_list(url_map)
-             if not video_url_list:
-                 return
- 
           else:
               raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
   
-         results = []
+         formats = []
           for itag, video_real_url in video_url_list:
               # Extension
               video_extension = self._video_extensions.get(itag, 'flv')
+             resolution = self._video_dimensions.get(itag, {}).get('display')
+             width = self._video_dimensions.get(itag, {}).get('width')
+             height = self._video_dimensions.get(itag, {}).get('height')
+             note = self._special_itags.get(itag)
   
               video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
-                                               self._video_dimensions.get(itag, '???'),
+                                               '%dx%d' % (width, height) if width is not None and height is not None else (resolution if resolution is not None else '???'),
                                                 ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
   
-             results.append({
-                 'id':       video_id,
-                 'url':      video_real_url,
-                 'uploader': video_uploader,
-                 'uploader_id': video_uploader_id,
-                 'upload_date':  upload_date,
-                 'title':    video_title,
-                 'ext':      video_extension,
-                 'format':   video_format,
-                 'format_id': itag,
-                 'thumbnail':    video_thumbnail,
-                 'description':  video_description,
-                 'player_url':   player_url,
-                 'subtitles':    video_subtitles,
-                 'duration':     video_duration,
-                 'age_limit':    18 if age_gate else 0,
-                 'annotations':  video_annotations,
-                 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
-                 'view_count': view_count,
-                 'like_count': like_count,
-                 'dislike_count': dislike_count,
+             formats.append({
+                 'url':         video_real_url,
+                 'ext':         video_extension,
+                 'format':      video_format,
+                 'format_id':   itag,
+                 'player_url':  player_url,
+                 '_resolution': resolution,
+                 'width':       width,
+                 'height':      height,
+                 'format_note': note,
               })
-         return results
+         def _formats_key(f):
+             return (f.get('height') if f.get('height') is not None else -1,
+                     f.get('width') if f.get('width') is not None else -1)
+         formats = sorted(formats, key=_formats_key)
+ 
+         return {
+             'id':           video_id,
+             'uploader':     video_uploader,
+             'uploader_id':  video_uploader_id,
+             'upload_date':  upload_date,
+             'title':        video_title,
+             'thumbnail':    video_thumbnail,
+             'description':  video_description,
+             'subtitles':    video_subtitles,
+             'duration':     video_duration,
+             'age_limit':    18 if age_gate else 0,
+             'annotations':  video_annotations,
+             'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+             'view_count':   view_count,
+             'like_count': like_count,
+             'dislike_count': dislike_count,
+             'formats':      formats,
+         }
   
   class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
       IE_DESC = u'YouTube.com playlists'
@@@ -1717,7 -1666,7 +1666,7 @@@ class YoutubeUserIE(InfoExtractor)
           # page by page until there are no video ids - it means we got
           # all of them.
   
- -        video_ids = []
+ +        url_results = []
   
           for pagenum in itertools.count(0):
               start_index = pagenum * self._GDATA_PAGE_SIZE + 1
@@@ -1735,17 -1684,10 +1684,17 @@@
                   break
   
               # Extract video identifiers
- -            ids_in_page = []
- -            for entry in response['feed']['entry']:
- -                ids_in_page.append(entry['id']['$t'].split('/')[-1])
- -            video_ids.extend(ids_in_page)
+ +            entries = response['feed']['entry']
+ +            for entry in entries:
+ +                title = entry['title']['$t']
+ +                video_id = entry['id']['$t'].split('/')[-1]
+ +                url_results.append({
+ +                    '_type': 'url',
+ +                    'url': video_id,
+ +                    'ie_key': 'Youtube',
+ +                    'id': 'video_id',
+ +                    'title': title,
+ +                })
   
               # A little optimization - if current page is not
               # "full", ie. does not contain PAGE_SIZE video ids then
@@@ -1753,9 -1695,12 +1702,9 @@@
               # are no more ids on further pages - no need to query
               # again.
   
- -            if len(ids_in_page) < self._GDATA_PAGE_SIZE:
+ +            if len(entries) < self._GDATA_PAGE_SIZE:
                   break
   
- -        url_results = [
- -            self.url_result(video_id, 'Youtube', video_id=video_id)
- -            for video_id in video_ids]
           return self.playlist_result(url_results, playlist_title=username)
author	Philipp Hagemeister <phihag@phihag.de>
	Mon, 23 Dec 2013 03:37:40 +0000 (04:37 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Mon, 23 Dec 2013 03:37:40 +0000 (04:37 +0100)
		1	2
youtube_dl/YoutubeDL.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/youtube.py	patch \|	diff1 \|	diff2 \|	blob \| history