Merge remote-tracking branch 'alab1001101/master'

author Philipp Hagemeister <phihag@phihag.de>

Tue, 27 Nov 2012 16:14:29 +0000 (17:14 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Tue, 27 Nov 2012 16:14:29 +0000 (17:14 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Tue, 27 Nov 2012 16:14:29 +0000 (17:14 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Tue, 27 Nov 2012 16:14:29 +0000 (17:14 +0100)
diff --combined youtube_dl/InfoExtractors.py

index 13b04ab5bcce4ee1e57e46afab0b198f1a477991,82459e7a80868467eb15d3a2f840d297666495c0..cea30dad81fa4224a848732159aa19684c7d5dbc
--- 1/youtube_dl/InfoExtractors.py
--- 2/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@@ -13,8 -13,6 +13,8 @@@ import urlli
   import urllib2
   import email.utils
   import xml.etree.ElementTree
+ +import random
+ +import math
   from urlparse import parse_qs
   
   try:
@@@ -97,26 -95,7 +97,26 @@@ class InfoExtractor(object)
   class YoutubeIE(InfoExtractor):
         """Information extractor for youtube.com."""
   
- -      _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
+ +      _VALID_URL = r"""^
+ +                       (
+ +                           (?:https?://)?                                       # http(s):// (optional)
+ +                           (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
+ +                              tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
+ +                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
+ +                           (?!view_play_list|my_playlists|artist|playlist)      # ignore playlist URLs
+ +                           (?:                                                  # the various things that can precede the ID:
+ +                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
+ +                               |(?:                                             # or the v= param in all its forms
+ +                                   (?:watch(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+ +                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #!
+ +                                   (?:.+&)?                                     # any other preceding param (like /?s=tuff&v=xxxx)
+ +                                   v=
+ +                               )
+ +                           )?                                                   # optional -> youtube.com/xxxx is OK
+ +                       )?                                                       # all until now is optional -> you can pass the naked ID
+ +                       ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
+ +                       (?(1).+)?                                                # if we found the ID, everything can follow
+ +                       $"""
         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
@@@ -155,10 -134,6 +155,10 @@@
         }       
         IE_NAME = u'youtube'
   
+ +      def suitable(self, url):
+ +              """Receives a URL and returns True if suitable for this IE."""
+ +              return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+ +
         def report_lang(self):
                 """Report attempt to set language."""
                 self._downloader.to_screen(u'[youtube] Setting language')
@@@ -213,9 -188,9 +213,9 @@@
                 return srt
   
         def _print_formats(self, formats):
- -              print 'Available formats:'
+ +              print('Available formats:')
                 for x in formats:
- -                      print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
+ +                      print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
   
         def _real_initialize(self):
                 if self._downloader is None:
@@@ -238,7 -213,7 +238,7 @@@
                                 else:
                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
                         except (IOError, netrc.NetrcParseError), err:
- -                              self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+ +                              self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
                                 return
   
                 # Set language
@@@ -247,7 -222,7 +247,7 @@@
                         self.report_lang()
                         urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
+ +                      self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
                         return
   
                 # No authentication to be performed
@@@ -270,7 -245,7 +270,7 @@@
                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
                                 return
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+ +                      self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
                         return
   
                 # Confirm age
@@@ -283,7 -258,7 +283,7 @@@
                         self.report_age_confirmation()
                         age_results = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
                         return
   
         def _real_extract(self, url):
@@@ -293,7 -268,7 +293,7 @@@
                         url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
   
                 # Extract video id from URL
- -              mobj = re.match(self._VALID_URL, url)
+ +              mobj = re.match(self._VALID_URL, url, re.VERBOSE)
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
                         return
@@@ -305,7 -280,7 +305,7 @@@
                 try:
                         video_webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                         return
   
                 # Attempt to extract SWF player URL
@@@ -327,7 -302,7 +327,7 @@@
                                 if 'token' in video_info:
                                         break
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                              self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
+ +                              self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
                                 return
                 if 'token' not in video_info:
                         if 'reason' in video_info:
@@@ -390,7 -365,7 +390,7 @@@
                                 try:
                                         srt_list = urllib2.urlopen(request).read()
                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                                      raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ +                                      raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
                                 srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
                                 srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
                                 if not srt_lang_list:
@@@ -407,19 -382,13 +407,19 @@@
                                 try:
                                         srt_xml = urllib2.urlopen(request).read()
                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                                      raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ +                                      raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
                                 if not srt_xml:
                                         raise Trouble(u'WARNING: unable to download video subtitles')
                                 video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
                         except Trouble as trouble:
                                 self._downloader.trouble(trouble[0])
   
+ +              if 'length_seconds' not in video_info:
+ +                      self._downloader.trouble(u'WARNING: unable to extract video duration')
+ +                      video_duration = ''
+ +              else:
+ +                      video_duration = urllib.unquote_plus(video_info['length_seconds'][0])
+ +
                 # token
                 video_token = urllib.unquote_plus(video_info['token'][0])
   
@@@ -433,7 -402,7 +433,7 @@@
                         url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
                         url_data = [parse_qs(uds) for uds in url_data_strs]
                         url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
- -                      url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
+ +                      url_map = dict((ud['itag'][0], ud['url'][0] + '&signature=' + ud['sig'][0]) for ud in url_data)
   
                         format_limit = self._downloader.params.get('format_limit', None)
                         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
@@@ -486,8 -455,7 +486,8 @@@
                                 'thumbnail':    video_thumbnail.decode('utf-8'),
                                 'description':  video_description,
                                 'player_url':   player_url,
- -                              'subtitles':    video_subtitles
+ +                              'subtitles':    video_subtitles,
+ +                              'duration':             video_duration
                         })
                 return results
   
@@@ -526,7 -494,7 +526,7 @@@ class MetacafeIE(InfoExtractor)
                         self.report_disclaimer()
                         disclaimer = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err))
                         return
   
                 # Confirm age
@@@ -539,7 -507,7 +539,7 @@@
                         self.report_age_confirmation()
                         disclaimer = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
                         return
   
         def _real_extract(self, url):
@@@ -563,7 -531,7 +563,7 @@@
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
                         return
   
                 # Extract URL, uploader and title from webpage
@@@ -603,7 -571,7 +603,7 @@@
                         return
                 video_title = mobj.group(1).decode('utf-8')
   
- -              mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
+ +              mobj = re.search(r'submitter=(.*?);', webpage)
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                         return
@@@ -624,7 -592,7 +624,7 @@@
   class DailymotionIE(InfoExtractor):
         """Information Extractor for Dailymotion"""
   
- -      _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
+ +      _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
         IE_NAME = u'dailymotion'
   
         def __init__(self, downloader=None):
@@@ -645,9 -613,9 +645,9 @@@
                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
                         return
   
- -              video_id = mobj.group(1)
+ +              video_id = mobj.group(1).split('_')[0].split('?')[0]
   
- -              video_extension = 'flv'
+ +              video_extension = 'mp4'
   
                 # Retrieve video webpage to extract further information
                 request = urllib2.Request(url)
@@@ -656,34 -624,25 +656,34 @@@
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
                         return
   
                 # Extract URL, uploader and title from webpage
                 self.report_extraction(video_id)
- -              mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
+ +              mobj = re.search(r'\s*var flashvars = (.*)', webpage)
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract media URL')
                         return
- -              sequence = urllib.unquote(mobj.group(1))
- -              mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
+ +              flashvars = urllib.unquote(mobj.group(1))
+ +
+ +              for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
+ +                      if key in flashvars:
+ +                              max_quality = key
+ +                              self._downloader.to_screen(u'[dailymotion] Using %s' % key)
+ +                              break
+ +              else:
+ +                      self._downloader.trouble(u'ERROR: unable to extract video URL')
+ +                      return
+ +
+ +              mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars)
                 if mobj is None:
- -                      self._downloader.trouble(u'ERROR: unable to extract media URL')
+ +                      self._downloader.trouble(u'ERROR: unable to extract video URL')
                         return
- -              mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
   
- -              # if needed add http://www.dailymotion.com/ if relative URL
+ +              video_url = urllib.unquote(mobj.group(1)).replace('\\/', '/')
   
- -              video_url = mediaURL
+ +              # TODO: support choosing qualities
   
                 mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
                 if mobj is None:
@@@ -691,28 -650,17 +691,28 @@@
                         return
                 video_title = unescapeHTML(mobj.group('title').decode('utf-8'))
   
- -              mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
+ +              video_uploader = u'NA'
+ +              mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
                 if mobj is None:
- -                      self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
- -                      return
- -              video_uploader = mobj.group(1)
+ +                      # lookin for official user
+ +                      mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage)
+ +                      if mobj_official is None:
+ +                              self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
+ +                      else:
+ +                              video_uploader = mobj_official.group(1)
+ +              else:
+ +                      video_uploader = mobj.group(1)
+ +
+ +              video_upload_date = u'NA'
+ +              mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
+ +              if mobj is not None:
+ +                      video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
   
                 return [{
                         'id':           video_id.decode('utf-8'),
                         'url':          video_url.decode('utf-8'),
                         'uploader':     video_uploader.decode('utf-8'),
- -                      'upload_date':  u'NA',
+ +                      'upload_date':  video_upload_date,
                         'title':        video_title,
                         'ext':          video_extension.decode('utf-8'),
                         'format':       u'NA',
@@@ -754,7 -702,7 +754,7 @@@ class GoogleIE(InfoExtractor)
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                         return
   
                 # Extract URL, uploader, and title from webpage
@@@ -793,7 -741,7 +793,7 @@@
                         try:
                                 webpage = urllib2.urlopen(request).read()
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                              self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ +                              self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                                 return
                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
                         if mobj is None:
@@@ -849,7 -797,7 +849,7 @@@ class PhotobucketIE(InfoExtractor)
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                         return
   
                 # Extract URL, uploader, and title from webpage
@@@ -919,7 -867,7 +919,7 @@@ class YahooIE(InfoExtractor)
                         try:
                                 webpage = urllib2.urlopen(request).read()
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                              self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ +                              self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                                 return
   
                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@@@ -943,7 -891,7 +943,7 @@@
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                         return
   
                 # Extract uploader and title from webpage
@@@ -1001,7 -949,7 +1001,7 @@@
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                         return
   
                 # Extract media URL from playlist XML
@@@ -1030,7 -978,7 +1030,7 @@@ class VimeoIE(InfoExtractor)
         """Information extractor for vimeo.com."""
   
         # _VALID_URL matches Vimeo URLs
- -      _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
+ +      _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)'
         IE_NAME = u'vimeo'
   
         def __init__(self, downloader=None):
@@@ -1059,7 -1007,7 +1059,7 @@@
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                         return
   
                 # Now we begin extracting as much information as we can from what we
@@@ -1100,32 -1048,21 +1100,32 @@@
                 timestamp = config['request']['timestamp']
   
                 # Vimeo specific: extract video codec and quality information
+ +              # First consider quality, then codecs, then take everything
                 # TODO bind to format param
                 codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
- -              for codec in codecs:
- -                      if codec[0] in config["video"]["files"]:
- -                              video_codec = codec[0]
- -                              video_extension = codec[1]
- -                              if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd'
- -                              else: quality = 'sd'
+ +              files = { 'hd': [], 'sd': [], 'other': []}
+ +              for codec_name, codec_extension in codecs:
+ +                      if codec_name in config["video"]["files"]:
+ +                              if 'hd' in config["video"]["files"][codec_name]:
+ +                                      files['hd'].append((codec_name, codec_extension, 'hd'))
+ +                              elif 'sd' in config["video"]["files"][codec_name]:
+ +                                      files['sd'].append((codec_name, codec_extension, 'sd'))
+ +                              else:
+ +                                      files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
+ +
+ +              for quality in ('hd', 'sd', 'other'):
+ +                      if len(files[quality]) > 0:
+ +                              video_quality = files[quality][0][2]
+ +                              video_codec = files[quality][0][0]
+ +                              video_extension = files[quality][0][1]
+ +                              self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
                                 break
                 else:
                         self._downloader.trouble(u'ERROR: no known codec found')
                         return
   
                 video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
- -                                      %(video_id, sig, timestamp, quality, video_codec.upper())
+ +                                      %(video_id, sig, timestamp, video_quality, video_codec.upper())
   
                 return [{
                         'id':           video_id,
@@@ -1140,6 -1077,161 +1140,143 @@@
                 }]
   
   
- -        """arte.tv information extractor."""
- -
- -        _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
- -        _LIVE_URL = r'index-[0-9]+\.html$'
- -
- -        IE_NAME = u'arte.tv'
- -
- -        def __init__(self, downloader=None):
- -                InfoExtractor.__init__(self, downloader)
- -
- -        def report_download_webpage(self, video_id):
- -                """Report webpage download."""
- -                self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
- -
- -        def report_extraction(self, video_id):
- -                """Report information extraction."""
- -                self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
- -
- -        def fetch_webpage(self, url):
- -                self._downloader.increment_downloads()
- -                request = urllib2.Request(url)
- -                try:
- -                        self.report_download_webpage(url)
- -                        webpage = urllib2.urlopen(request).read()
- -                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                        self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
- -                        return
- -                except ValueError, err:
- -                        self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
- -                        return
- -                return webpage
- -
- -        def grep_webpage(self, url, regex, regexFlags, matchTuples):
- -                page = self.fetch_webpage(url)
- -                mobj = re.search(regex, page, regexFlags)
- -                info = {}
- -
- -                if mobj is None:
- -                    self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
- -                    return
- -
- -                for (i, key, err) in matchTuples:
- -                    if mobj.group(i) is None:
- -                        self._downloader.trouble(err)
- -                        return
- -                    else:
- -                        info[key] = mobj.group(i)
- -
- -                return info
- -
- -        def extractLiveStream(self, url):
- -
- -                video_lang = url.split('/')[-4]
- -
- -                info = self.grep_webpage(
- -                    url,
- -                    r'src="(.*?/videothek_js.*?\.js)',
- -                    0,
- -                    [
- -                        (1, 'url', u'ERROR: Invalid URL: %s' % url)
- -                    ]
- -                )
- -
- -                http_host = url.split('/')[2]
- -                next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
- -
- -                info = self.grep_webpage(
- -                    next_url,
- -                    r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
- -                     '(http://.*?\.swf).*?' +
- -                     '(rtmp://.*?)\'',
- -                    re.DOTALL,
- -                    [
- -                        (1, 'path',   u'ERROR: could not extract video path: %s' % url),
- -                        (2, 'player', u'ERROR: could not extract video player: %s' % url),
- -                        (3, 'url',    u'ERROR: could not extract video url: %s' % url)
- -                    ]
- -                )
- -
- -                video_url = u'%s/%s' % (info.get('url'), info.get('path'))
- -
- -                print u'rtmpdump --swfVfy \'%s\' --rtmp \'%s\' --live -o arte-live.mp4' % (info.get('player'), video_url)
- -
- -        def extractPlus7Stream(self, url):
- -
- -                video_lang = url.split('/')[-3]
- -
- -                info = self.grep_webpage(
- -                    url,
- -                    r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
- -                    0,
- -                    [
- -                        (1, 'url', u'ERROR: Invalid URL: %s' % url)
- -                    ]
- -                )
- -
- -                next_url = urllib.unquote(info.get('url'))
- -
- -                info = self.grep_webpage(
- -                    next_url,
- -                    r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
- -                    0,
- -                    [
- -                        (1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
- -                    ]
- -                )
- -
- -                next_url = urllib.unquote(info.get('url'))
- -
- -                info = self.grep_webpage(
- -                    next_url,
- -                    r'<video id="(.*?)".*?>.*?' +
- -                     '<name>(.*?)</name>.*?' +
- -                     '<dateVideo>(.*?)</dateVideo>.*?' +
- -                     '<url quality="hd">(.*?)</url>',
- -                    re.DOTALL,
- -                    [
- -                        (1, 'id',    u'ERROR: could not extract video id: %s' % url),
- -                        (2, 'title', u'ERROR: could not extract video title: %s' % url),
- -                        (3, 'date',  u'ERROR: could not extract video date: %s' % url),
- -                        (4, 'url',   u'ERROR: could not extract video url: %s' % url)
- -                    ]
- -                )
- -
- -                return {
- -                    'id':           info.get('id'),
- -                    'url':          urllib.unquote(info.get('url')),
- -                    'uploader':     u'arte.tv',
- -                    'upload_date':  info.get('date'),
- -                    'title':        info.get('title'),
- -                    'ext':          u'mp4',
- -                    'format':       u'NA',
- -                    'player_url':   None,
- -                }
- -
- -        def _real_extract(self, url):
- -
- -                video_id = url.split('/')[-1]
- -
- -                self.report_extraction(video_id)
- -
- -                if re.search(self._LIVE_URL, video_id) is not None:
- -                    self.extractLiveStream(url)
- -                    return
- -                else:
- -                    info = self.extractPlus7Stream(url)
- -
- -                try:
- -                        # Process video information
- -                        self._downloader.process_info(info)
- -                except UnavailableVideoError, err:
- -                        self._downloader.trouble(u'\nERROR: unable to download video')
+ class ArteTvIE(InfoExtractor):
++      """arte.tv information extractor."""
++
++      _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
++      _LIVE_URL = r'index-[0-9]+\.html$'
++
++      IE_NAME = u'arte.tv'
++
++      def __init__(self, downloader=None):
++              InfoExtractor.__init__(self, downloader)
++
++      def report_download_webpage(self, video_id):
++              """Report webpage download."""
++              self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
++
++      def report_extraction(self, video_id):
++              """Report information extraction."""
++              self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
++
++      def fetch_webpage(self, url):
++              self._downloader.increment_downloads()
++              request = urllib2.Request(url)
++              try:
++                      self.report_download_webpage(url)
++                      webpage = urllib2.urlopen(request).read()
++              except (urllib2.URLError, httplib.HTTPException, socket.error), err:
++                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
++                      return
++              except ValueError, err:
++                      self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
++                      return
++              return webpage
++
++      def grep_webpage(self, url, regex, regexFlags, matchTuples):
++              page = self.fetch_webpage(url)
++              mobj = re.search(regex, page, regexFlags)
++              info = {}
++
++              if mobj is None:
++                      self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
++                      return
++
++              for (i, key, err) in matchTuples:
++                      if mobj.group(i) is None:
++                              self._downloader.trouble(err)
++                              return
++                      else:
++                              info[key] = mobj.group(i)
++
++              return info
++
++      def extractLiveStream(self, url):
++              video_lang = url.split('/')[-4]
++              info = self.grep_webpage(
++                      url,
++                      r'src="(.*?/videothek_js.*?\.js)',
++                      0,
++                      [
++                              (1, 'url', u'ERROR: Invalid URL: %s' % url)
++                      ]
++              )
++              http_host = url.split('/')[2]
++              next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
++              info = self.grep_webpage(
++                      next_url,
++                      r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
++                              '(http://.*?\.swf).*?' +
++                              '(rtmp://.*?)\'',
++                      re.DOTALL,
++                      [
++                              (1, 'path',   u'ERROR: could not extract video path: %s' % url),
++                              (2, 'player', u'ERROR: could not extract video player: %s' % url),
++                              (3, 'url',    u'ERROR: could not extract video url: %s' % url)
++                      ]
++              )
++              video_url = u'%s/%s' % (info.get('url'), info.get('path'))
++
++      def extractPlus7Stream(self, url):
++              video_lang = url.split('/')[-3]
++              info = self.grep_webpage(
++                      url,
++                      r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
++                      0,
++                      [
++                              (1, 'url', u'ERROR: Invalid URL: %s' % url)
++                      ]
++              )
++              next_url = urllib.unquote(info.get('url'))
++              info = self.grep_webpage(
++                      next_url,
++                      r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
++                      0,
++                      [
++                              (1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
++                      ]
++              )
++              next_url = urllib.unquote(info.get('url'))
++
++              info = self.grep_webpage(
++                      next_url,
++                      r'<video id="(.*?)".*?>.*?' +
++                              '<name>(.*?)</name>.*?' +
++                              '<dateVideo>(.*?)</dateVideo>.*?' +
++                              '<url quality="hd">(.*?)</url>',
++                      re.DOTALL,
++                      [
++                              (1, 'id',    u'ERROR: could not extract video id: %s' % url),
++                              (2, 'title', u'ERROR: could not extract video title: %s' % url),
++                              (3, 'date',  u'ERROR: could not extract video date: %s' % url),
++                              (4, 'url',   u'ERROR: could not extract video url: %s' % url)
++                      ]
++              )
++
++              return {
++                      'id':           info.get('id'),
++                      'url':          urllib.unquote(info.get('url')),
++                      'uploader':     u'arte.tv',
++                      'upload_date':  info.get('date'),
++                      'title':        info.get('title'),
++                      'ext':          u'mp4',
++                      'format':       u'NA',
++                      'player_url':   None,
++              }
++
++      def _real_extract(self, url):
++              video_id = url.split('/')[-1]
++              self.report_extraction(video_id)
++
++              if re.search(self._LIVE_URL, video_id) is not None:
++                      self.extractLiveStream(url)
++                      return
++              else:
++                      info = self.extractPlus7Stream(url)
++
++              return [info]
+ 
+ 
   class GenericIE(InfoExtractor):
         """Generic last-resort information extractor."""
   
@@@ -1225,7 -1317,7 +1362,7 @@@
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                         return
                 except ValueError, err:
                         # since this is the last-resort InfoExtractor, if
@@@ -1346,7 -1438,7 +1483,7 @@@ class YoutubeSearchIE(InfoExtractor)
                         try:
                                 data = urllib2.urlopen(request).read()
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                              self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err))
+ +                              self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err))
                                 return
                         api_response = json.loads(data)['data']
   
@@@ -1423,7 -1515,7 +1560,7 @@@ class GoogleSearchIE(InfoExtractor)
                         try:
                                 page = urllib2.urlopen(request).read()
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                              self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ +                              self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                                 return
   
                         # Extract video identifiers
@@@ -1506,7 -1598,7 +1643,7 @@@ class YahooSearchIE(InfoExtractor)
                         try:
                                 page = urllib2.urlopen(request).read()
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                              self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ +                              self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                                 return
   
                         # Extract video identifiers
@@@ -1532,9 -1624,9 +1669,9 @@@
   class YoutubePlaylistIE(InfoExtractor):
         """Information Extractor for YouTube playlists."""
   
- -      _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
+ +      _VALID_URL = r'(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL|EC)?|PL|EC)([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
- -      _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&amp;list=(PL)?%s&'
+ +      _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&amp;([^&"]+&amp;)*list=.*?%s'
         _MORE_PAGES_INDICATOR = r'yt-uix-pager-next'
         IE_NAME = u'youtube:playlist'
   
@@@ -1576,7 -1668,7 +1713,7 @@@
                         try:
                                 page = urllib2.urlopen(request).read()
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                              self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ +                              self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                                 return
   
                         # Extract video identifiers
@@@ -1602,56 -1694,6 +1739,56 @@@
                 return
   
   
+ +class YoutubeChannelIE(InfoExtractor):
+ +      """Information Extractor for YouTube channels."""
+ +
+ +      _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)(?:/.*)?$"
+ +      _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
+ +      _MORE_PAGES_INDICATOR = r'yt-uix-button-content">Next' # TODO
+ +      IE_NAME = u'youtube:channel'
+ +
+ +      def report_download_page(self, channel_id, pagenum):
+ +              """Report attempt to download channel page with given number."""
+ +              self._downloader.to_screen(u'[youtube] Channel %s: Downloading page #%s' % (channel_id, pagenum))
+ +
+ +      def _real_extract(self, url):
+ +              # Extract channel id
+ +              mobj = re.match(self._VALID_URL, url)
+ +              if mobj is None:
+ +                      self._downloader.trouble(u'ERROR: invalid url: %s' % url)
+ +                      return
+ +
+ +              # Download channel pages
+ +              channel_id = mobj.group(1)
+ +              video_ids = []
+ +              pagenum = 1
+ +
+ +              while True:
+ +                      self.report_download_page(channel_id, pagenum)
+ +                      url = self._TEMPLATE_URL % (channel_id, pagenum)
+ +                      request = urllib2.Request(url)
+ +                      try:
+ +                              page = urllib2.urlopen(request).read()
+ +                      except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ +                              self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+ +                              return
+ +
+ +                      # Extract video identifiers
+ +                      ids_in_page = []
+ +                      for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&', page):
+ +                              if mobj.group(1) not in ids_in_page:
+ +                                      ids_in_page.append(mobj.group(1))
+ +                      video_ids.extend(ids_in_page)
+ +
+ +                      if re.search(self._MORE_PAGES_INDICATOR, page) is None:
+ +                              break
+ +                      pagenum = pagenum + 1
+ +
+ +              for id in video_ids:
+ +                      self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
+ +              return
+ +
+ +
   class YoutubeUserIE(InfoExtractor):
         """Information Extractor for YouTube users."""
   
@@@ -1696,7 -1738,7 +1833,7 @@@
                         try:
                                 page = urllib2.urlopen(request).read()
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                              self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ +                              self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                                 return
   
                         # Extract video identifiers
@@@ -1768,7 -1810,7 +1905,7 @@@ class BlipTVUserIE(InfoExtractor)
                         mobj = re.search(r'data-users-id="([^"]+)"', page)
                         page_base = page_base % mobj.group(1)
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                         return
   
   
@@@ -1856,7 -1898,7 +1993,7 @@@ class DepositFilesIE(InfoExtractor)
                         self.report_download_webpage(file_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err))
                         return
   
                 # Search for the real file URL
@@@ -1973,7 -2015,7 +2110,7 @@@ class FacebookIE(InfoExtractor)
                                 else:
                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
                         except (IOError, netrc.NetrcParseError), err:
- -                              self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+ +                              self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
                                 return
   
                 if useremail is None:
@@@ -1993,7 -2035,7 +2130,7 @@@
                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                                 return
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+ +                      self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
                         return
   
         def _real_extract(self, url):
@@@ -2010,7 -2052,7 +2147,7 @@@
                         page = urllib2.urlopen(request)
                         video_webpage = page.read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                         return
   
                 # Start extracting information
@@@ -2144,13 -2186,13 +2281,13 @@@ class BlipTVIE(InfoExtractor)
                                         'urlhandle': urlh
                                 }
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
                         return
                 if info is None: # Regular URL
                         try:
                                 json_code = urlh.read()
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                              self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
+ +                              self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err))
                                 return
   
                         try:
@@@ -2218,7 -2260,7 +2355,7 @@@ class MyVideoIE(InfoExtractor)
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                         return
   
                 self.report_extraction(video_id)
@@@ -2253,25 -2295,6 +2390,25 @@@ class ComedyCentralIE(InfoExtractor)
         _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
         IE_NAME = u'comedycentral'
   
+ +      _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
+ +
+ +      _video_extensions = {
+ +              '3500': 'mp4',
+ +              '2200': 'mp4',
+ +              '1700': 'mp4',
+ +              '1200': 'mp4',
+ +              '750': 'mp4',
+ +              '400': 'mp4',
+ +      }
+ +      _video_dimensions = {
+ +              '3500': '1280x720',
+ +              '2200': '960x540',
+ +              '1700': '768x432',
+ +              '1200': '640x360',
+ +              '750': '512x288',
+ +              '400': '384x216',
+ +      }
+ +
         def report_extraction(self, episode_id):
                 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
   
@@@ -2284,13 -2307,6 +2421,13 @@@
         def report_player_url(self, episode_id):
                 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
   
+ +
+ +      def _print_formats(self, formats):
+ +              print('Available formats:')
+ +              for x in formats:
+ +                      print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
+ +
+ +
         def _real_extract(self, url):
                 mobj = re.match(self._VALID_URL, url)
                 if mobj is None:
@@@ -2331,19 -2347,10 +2468,19 @@@
                         epTitle = mobj.group('episode')
   
                 mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
+ +
                 if len(mMovieParams) == 0:
- -                      self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
- -                      return
+ +                      # The Colbert Report embeds the information in a without
+ +                      # a URL prefix; so extract the alternate reference
+ +                      # and then add the URL prefix manually.
   
+ +                      altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
+ +                      if len(altMovieParams) == 0:
+ +                              self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
+ +                              return
+ +                      else:
+ +                              mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
+ +              
                 playerUrl_raw = mMovieParams[0][0]
                 self.report_player_url(epTitle)
                 try:
@@@ -2392,31 -2399,10 +2529,31 @@@
                         if len(turls) == 0:
                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
                                 continue
+ +                      
+ +                      if self._downloader.params.get('listformats', None):
+ +                              self._print_formats([i[0] for i in turls])
+ +                              return
   
                         # For now, just pick the highest bitrate
                         format,video_url = turls[-1]
   
+ +                      # Get the format arg from the arg stream
+ +                      req_format = self._downloader.params.get('format', None)
+ +
+ +                      # Select format if we can find one
+ +                      for f,v in turls:
+ +                              if f == req_format:
+ +                                      format, video_url = f, v
+ +                                      break
+ +
+ +                      # Patch to download from alternative CDN, which does not
+ +                      # break on current RTMPDump builds
+ +                      broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
+ +                      better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
+ +
+ +                      if video_url.startswith(broken_cdn):
+ +                              video_url = video_url.replace(broken_cdn, better_cdn)
+ +
                         effTitle = showId + u'-' + epTitle
                         info = {
                                 'id': shortMediaId,
@@@ -2428,7 -2414,7 +2565,7 @@@
                                 'format': format,
                                 'thumbnail': None,
                                 'description': officialTitle,
- -                              'player_url': playerUrl
+ +                              'player_url': None #playerUrl
                         }
   
                         results.append(info)
@@@ -2536,7 -2522,7 +2673,7 @@@ class CollegeHumorIE(InfoExtractor)
                 try:
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                         return
   
                 m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
@@@ -2555,7 -2541,7 +2692,7 @@@
                 try:
                         metaXml = urllib2.urlopen(xmlUrl).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err))
                         return
   
                 mdoc = xml.etree.ElementTree.fromstring(metaXml)
@@@ -2601,7 -2587,7 +2738,7 @@@ class XVideosIE(InfoExtractor)
                 try:
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                         return
   
                 self.report_extraction(video_id)
@@@ -2687,7 -2673,7 +2824,7 @@@ class SoundcloudIE(InfoExtractor)
                 try:
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                         return
   
                 self.report_extraction('%s/%s' % (uploader, slug_title))
@@@ -2714,7 -2700,7 +2851,7 @@@
                 mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
                 if mobj:
                         description = mobj.group(1)
- -              
+ +
                 # upload date
                 upload_date = None
                 mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
@@@ -2722,7 -2708,7 +2859,7 @@@
                         try:
                                 upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
                         except Exception, e:
- -                              self._downloader.to_stderr(str(e))
+ +                              self._downloader.to_stderr(compat_str(e))
   
                 # for soundcloud, a request to a cross domain is required for cookies
                 request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
@@@ -2766,7 -2752,7 +2903,7 @@@ class InfoQIE(InfoExtractor)
                 try:
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                         return
   
                 self.report_extraction(url)
@@@ -2852,15 -2838,15 +2989,15 @@@ class MixcloudIE(InfoExtractor)
                 return None
   
         def _print_formats(self, formats):
- -              print 'Available formats:'
+ +              print('Available formats:')
                 for fmt in formats.keys():
                         for b in formats[fmt]:
                                 try:
                                         ext = formats[fmt][b][0]
- -                                      print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
+ +                                      print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
                                 except TypeError: # we have no bitrate info
                                         ext = formats[fmt][0]
- -                                      print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
+ +                                      print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
                                         break
   
         def _real_extract(self, url):
@@@ -2880,7 -2866,7 +3017,7 @@@
                         self.report_download_json(file_url)
                         jsonData = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err))
                         return
   
                 # parse JSON
@@@ -3064,7 -3050,7 +3201,7 @@@ class MTVIE(InfoExtractor)
                 try:
                         webpage = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                         return
   
                 mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
@@@ -3097,7 -3083,7 +3234,7 @@@
                 try:
                         metadataXml = urllib2.urlopen(request).read()
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- -                      self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
+ +                      self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err))
                         return
   
                 mdoc = xml.etree.ElementTree.fromstring(metadataXml)
@@@ -3124,314 -3110,3 +3261,314 @@@
                 }
   
                 return [info]
+ +
+ +
+ +class YoukuIE(InfoExtractor):
+ +
+ +      _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
+ +      IE_NAME = u'Youku'
+ +
+ +      def __init__(self, downloader=None):
+ +              InfoExtractor.__init__(self, downloader)
+ +
+ +      def report_download_webpage(self, file_id):
+ +              """Report webpage download."""
+ +              self._downloader.to_screen(u'[Youku] %s: Downloading webpage' % file_id)
+ +
+ +      def report_extraction(self, file_id):
+ +              """Report information extraction."""
+ +              self._downloader.to_screen(u'[Youku] %s: Extracting information' % file_id)
+ +
+ +      def _gen_sid(self):
+ +              nowTime = int(time.time() * 1000)
+ +              random1 = random.randint(1000,1998)
+ +              random2 = random.randint(1000,9999)
+ +
+ +              return "%d%d%d" %(nowTime,random1,random2)
+ +
+ +      def _get_file_ID_mix_string(self, seed):
+ +              mixed = []
+ +              source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
+ +              seed = float(seed)
+ +              for i in range(len(source)):
+ +                      seed  =  (seed * 211 + 30031 ) % 65536
+ +                      index  =  math.floor(seed / 65536 * len(source) )
+ +                      mixed.append(source[int(index)])
+ +                      source.remove(source[int(index)])
+ +              #return ''.join(mixed)
+ +              return mixed
+ +
+ +      def _get_file_id(self, fileId, seed):
+ +              mixed = self._get_file_ID_mix_string(seed)
+ +              ids = fileId.split('*')
+ +              realId = []
+ +              for ch in ids:
+ +                      if ch:
+ +                              realId.append(mixed[int(ch)])
+ +              return ''.join(realId)
+ +
+ +      def _real_extract(self, url):
+ +              mobj = re.match(self._VALID_URL, url)
+ +              if mobj is None:
+ +                      self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+ +                      return
+ +              video_id = mobj.group('ID')
+ +
+ +              info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
+ +
+ +              request = urllib2.Request(info_url, None, std_headers)
+ +              try:
+ +                      self.report_download_webpage(video_id)
+ +                      jsondata = urllib2.urlopen(request).read()
+ +              except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ +                      return
+ +
+ +              self.report_extraction(video_id)
+ +              try:
+ +                      config = json.loads(jsondata)
+ +
+ +                      video_title =  config['data'][0]['title']
+ +                      seed = config['data'][0]['seed']
+ +
+ +                      format = self._downloader.params.get('format', None)
+ +                      supported_format = config['data'][0]['streamfileids'].keys()
+ +
+ +                      if format is None or format == 'best':
+ +                              if 'hd2' in supported_format:
+ +                                      format = 'hd2'
+ +                              else:
+ +                                      format = 'flv'
+ +                              ext = u'flv'
+ +                      elif format == 'worst':
+ +                              format = 'mp4'
+ +                              ext = u'mp4'
+ +                      else:
+ +                              format = 'flv'
+ +                              ext = u'flv'
+ +
+ +
+ +                      fileid = config['data'][0]['streamfileids'][format]
+ +                      seg_number = len(config['data'][0]['segs'][format])
+ +
+ +                      keys=[]
+ +                      for i in xrange(seg_number):
+ +                              keys.append(config['data'][0]['segs'][format][i]['k'])
+ +
+ +                      #TODO check error
+ +                      #youku only could be viewed from mainland china
+ +              except:
+ +                      self._downloader.trouble(u'ERROR: unable to extract info section')
+ +                      return
+ +
+ +              files_info=[]
+ +              sid = self._gen_sid()
+ +              fileid = self._get_file_id(fileid, seed)
+ +
+ +              #column 8,9 of fileid represent the segment number
+ +              #fileid[7:9] should be changed
+ +              for index, key in enumerate(keys):
+ +
+ +                      temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
+ +                      download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
+ +
+ +                      info = {
+ +                              'id': '%s_part%02d' % (video_id, index),
+ +                              'url': download_url,
+ +                              'uploader': None,
+ +                              'title': video_title,
+ +                              'ext': ext,
+ +                              'format': u'NA'
+ +                      }
+ +                      files_info.append(info)
+ +
+ +              return files_info
+ +
+ +
+ +class XNXXIE(InfoExtractor):
+ +      """Information extractor for xnxx.com"""
+ +
+ +      _VALID_URL = r'^http://video\.xnxx\.com/video([0-9]+)/(.*)'
+ +      IE_NAME = u'xnxx'
+ +      VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
+ +      VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
+ +      VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
+ +
+ +      def report_webpage(self, video_id):
+ +              """Report information extraction"""
+ +              self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
+ +
+ +      def report_extraction(self, video_id):
+ +              """Report information extraction"""
+ +              self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
+ +
+ +      def _real_extract(self, url):
+ +              mobj = re.match(self._VALID_URL, url)
+ +              if mobj is None:
+ +                      self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+ +                      return
+ +              video_id = mobj.group(1).decode('utf-8')
+ +
+ +              self.report_webpage(video_id)
+ +
+ +              # Get webpage content
+ +              try:
+ +                      webpage = urllib2.urlopen(url).read()
+ +              except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ +                      self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err)
+ +                      return
+ +
+ +              result = re.search(self.VIDEO_URL_RE, webpage)
+ +              if result is None:
+ +                      self._downloader.trouble(u'ERROR: unable to extract video url')
+ +                      return
+ +              video_url = urllib.unquote(result.group(1).decode('utf-8'))
+ +
+ +              result = re.search(self.VIDEO_TITLE_RE, webpage)
+ +              if result is None:
+ +                      self._downloader.trouble(u'ERROR: unable to extract video title')
+ +                      return
+ +              video_title = result.group(1).decode('utf-8')
+ +
+ +              result = re.search(self.VIDEO_THUMB_RE, webpage)
+ +              if result is None:
+ +                      self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
+ +                      return
+ +              video_thumbnail = result.group(1).decode('utf-8')
+ +
+ +              info = {'id': video_id,
+ +                              'url': video_url,
+ +                              'uploader': None,
+ +                              'upload_date': None,
+ +                              'title': video_title,
+ +                              'ext': 'flv',
+ +                              'format': 'flv',
+ +                              'thumbnail': video_thumbnail,
+ +                              'description': None,
+ +                              'player_url': None}
+ +
+ +              return [info]
+ +
+ +
+ +class GooglePlusIE(InfoExtractor):
+ +      """Information extractor for plus.google.com."""
+ +
+ +      _VALID_URL = r'(?:https://)?plus\.google\.com/(?:\w+/)*?(\d+)/posts/(\w+)'
+ +      IE_NAME = u'plus.google'
+ +
+ +      def __init__(self, downloader=None):
+ +              InfoExtractor.__init__(self, downloader)
+ +
+ +      def report_extract_entry(self, url):
+ +              """Report downloading extry"""
+ +              self._downloader.to_screen(u'[plus.google] Downloading entry: %s' % url.decode('utf-8'))
+ +
+ +      def report_date(self, upload_date):
+ +              """Report downloading extry"""
+ +              self._downloader.to_screen(u'[plus.google] Entry date: %s' % upload_date)
+ +
+ +      def report_uploader(self, uploader):
+ +              """Report downloading extry"""
+ +              self._downloader.to_screen(u'[plus.google] Uploader: %s' % uploader.decode('utf-8'))
+ +
+ +      def report_title(self, video_title):
+ +              """Report downloading extry"""
+ +              self._downloader.to_screen(u'[plus.google] Title: %s' % video_title.decode('utf-8'))
+ +
+ +      def report_extract_vid_page(self, video_page):
+ +              """Report information extraction."""
+ +              self._downloader.to_screen(u'[plus.google] Extracting video page: %s' % video_page.decode('utf-8'))
+ +
+ +      def _real_extract(self, url):
+ +              # Extract id from URL
+ +              mobj = re.match(self._VALID_URL, url)
+ +              if mobj is None:
+ +                      self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+ +                      return
+ +
+ +              post_url = mobj.group(0)
+ +              video_id = mobj.group(2)
+ +
+ +              video_extension = 'flv'
+ +
+ +              # Step 1, Retrieve post webpage to extract further information
+ +              self.report_extract_entry(post_url)
+ +              request = urllib2.Request(post_url)
+ +              try:
+ +                      webpage = urllib2.urlopen(request).read()
+ +              except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
+ +                      return
+ +
+ +              # Extract update date
+ +              upload_date = u'NA'
+ +              pattern = 'title="Timestamp">(.*?)</a>'
+ +              mobj = re.search(pattern, webpage)
+ +              if mobj:
+ +                      upload_date = mobj.group(1)
+ +                      # Convert timestring to a format suitable for filename
+ +                      upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
+ +                      upload_date = upload_date.strftime('%Y%m%d')
+ +              self.report_date(upload_date)
+ +
+ +              # Extract uploader
+ +              uploader = u'NA'
+ +              pattern = r'rel\="author".*?>(.*?)</a>'
+ +              mobj = re.search(pattern, webpage)
+ +              if mobj:
+ +                      uploader = mobj.group(1)
+ +              self.report_uploader(uploader)
+ +
+ +              # Extract title
+ +              # Get the first line for title
+ +              video_title = u'NA'
+ +              pattern = r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]'
+ +              mobj = re.search(pattern, webpage)
+ +              if mobj:
+ +                      video_title = mobj.group(1)
+ +              self.report_title(video_title)
+ +
+ +              # Step 2, Stimulate clicking the image box to launch video
+ +              pattern = '"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]'
+ +              mobj = re.search(pattern, webpage)
+ +              if mobj is None:
+ +                      self._downloader.trouble(u'ERROR: unable to extract video page URL')
+ +
+ +              video_page = mobj.group(1)
+ +              request = urllib2.Request(video_page)
+ +              try:
+ +                      webpage = urllib2.urlopen(request).read()
+ +              except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ +                      self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ +                      return
+ +              self.report_extract_vid_page(video_page)
+ +
+ +
+ +              # Extract video links on video page
+ +              """Extract video links of all sizes"""
+ +              pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
+ +              mobj = re.findall(pattern, webpage)
+ +              if len(mobj) == 0:
+ +                      self._downloader.trouble(u'ERROR: unable to extract video links')
+ +
+ +              # Sort in resolution
+ +              links = sorted(mobj)
+ +
+ +              # Choose the lowest of the sort, i.e. highest resolution
+ +              video_url = links[-1]
+ +              # Only get the url. The resolution part in the tuple has no use anymore
+ +              video_url = video_url[-1]
+ +              # Treat escaped \u0026 style hex
+ +              video_url = unicode(video_url, "unicode_escape")
+ +
+ +
+ +              return [{
+ +                      'id':           video_id.decode('utf-8'),
+ +                      'url':          video_url,
+ +                      'uploader':     uploader.decode('utf-8'),
+ +                      'upload_date':  upload_date.decode('utf-8'),
+ +                      'title':        video_title.decode('utf-8'),
+ +                      'ext':          video_extension.decode('utf-8'),
+ +                      'format':       u'NA',
+ +                      'player_url':   None,
+ +              }]
diff --combined youtube_dl/__init__.py

index 92478aa6bb71d2b3a92127eb9e1877ec4eb813dd,13cf77896e52cf0a546d3663b44fb5d635923997..f7a49e13a8f85a1c4fcb5030078e554416f1df85
--- 1/youtube_dl/__init__.py
--- 2/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@@ -1,8 -1,6 +1,8 @@@
   #!/usr/bin/env python
   # -*- coding: utf-8 -*-
   
+ +from __future__ import with_statement
+ +
   __authors__  = (
         'Ricardo Garcia Gonzalez',
         'Danny Colligan',
@@@ -21,7 -19,7 +21,7 @@@
         )
   
   __license__ = 'Public Domain'
- -__version__ = '2012.02.27'
+ +__version__ = '2012.11.28'
   
   UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
   UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
@@@ -48,7 -46,7 +48,7 @@@ from PostProcessor import 
   def updateSelf(downloader, filename):
         ''' Update the program file with the latest version from the repository '''
         # Note: downloader only used for options
- -      
+ +
         if not os.access(filename, os.W_OK):
                 sys.exit('ERROR: no write permissions on %s' % filename)
   
@@@ -66,7 -64,7 +66,7 @@@
                 directory = os.path.dirname(exe)
                 if not os.access(directory, os.W_OK):
                         sys.exit('ERROR: no write permissions on %s' % directory)
- -                      
+ +
                 try:
                         urlh = urllib2.urlopen(UPDATE_URL_EXE)
                         newcontent = urlh.read()
@@@ -75,18 -73,20 +75,18 @@@
                                 outf.write(newcontent)
                 except (IOError, OSError), err:
                         sys.exit('ERROR: unable to download latest version')
- -                      
+ +
                 try:
                         bat = os.path.join(directory, 'youtube-dl-updater.bat')
                         b = open(bat, 'w')
- -                      
- -                      print >> b, """
+ +                      b.write("""
   echo Updating youtube-dl...
   ping 127.0.0.1 -n 5 -w 1000 > NUL
   move /Y "%s.new" "%s"
   del "%s"
- -                      """ %(exe, exe, bat)
- -                      
+ +                      \n""" %(exe, exe, bat))
                         b.close()
- -                      
+ +
                         os.startfile(bat)
                 except (IOError, OSError), err:
                         sys.exit('ERROR: unable to overwrite current version')
@@@ -186,23 -186,16 +186,23 @@@ def parseOpts()
         general.add_option('-r', '--rate-limit',
                         dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
         general.add_option('-R', '--retries',
- -                      dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
+ +                      dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
+ +      general.add_option('--buffer-size',
+ +                      dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
+ +      general.add_option('--no-resize-buffer',
+ +                      action='store_true', dest='noresizebuffer',
+ +                      help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
         general.add_option('--dump-user-agent',
                         action='store_true', dest='dump_user_agent',
                         help='display the current browser identification', default=False)
+ +      general.add_option('--user-agent',
+ +                      dest='user_agent', help='specify a custom user agent', metavar='UA')
         general.add_option('--list-extractors',
                         action='store_true', dest='list_extractors',
                         help='List all supported extractors and the URLs they would handle', default=False)
   
         selection.add_option('--playlist-start',
- -                      dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
+ +                      dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
         selection.add_option('--playlist-end',
                         dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
         selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
@@@ -268,18 -261,13 +268,18 @@@
   
         filesystem.add_option('-t', '--title',
                         action='store_true', dest='usetitle', help='use title in file name', default=False)
+ +      filesystem.add_option('--id',
+ +                      action='store_true', dest='useid', help='use video ID in file name', default=False)
         filesystem.add_option('-l', '--literal',
- -                      action='store_true', dest='useliteral', help='use literal title in file name', default=False)
+ +                      action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
         filesystem.add_option('-A', '--auto-number',
                         action='store_true', dest='autonumber',
                         help='number downloaded files starting from 00000', default=False)
         filesystem.add_option('-o', '--output',
- -                      dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
+ +                      dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
+ +      filesystem.add_option('--restrict-filenames',
+ +                      action='store_true', dest='restrictfilenames',
+ +                      help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
         filesystem.add_option('-a', '--batch-file',
                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
         filesystem.add_option('-w', '--no-overwrites',
@@@ -304,12 -292,12 +304,12 @@@
                         help='write video metadata to a .info.json file', default=False)
   
   
- -      postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
+ +      postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
                         help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
                         help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
- -      postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
- -                      help='ffmpeg/avconv audio bitrate specification, 128k by default')
+ +      postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
+ +                      help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
         postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
                         help='keeps the video file on disk after the post-processing; the video is erased by default')
   
@@@ -338,7 -326,6 +338,7 @@@ def gen_extractors()
         """
         return [
                 YoutubePlaylistIE(),
+ +              YoutubeChannelIE(),
                 YoutubeUserIE(),
                 YoutubeSearchIE(),
                 YoutubeIE(),
@@@ -364,10 -351,8 +364,10 @@@
                 MixcloudIE(),
                 StanfordOpenClassroomIE(),
                 MTVIE(),
- -                ArteTvIE(),
- -
+ +              YoukuIE(),
+ +              XNXXIE(),
+ +              GooglePlusIE(),
- 
++              ArteTvIE(),
                 GenericIE()
         ]
   
@@@ -384,9 -369,6 +384,9 @@@ def _real_main()
                                 jar.load()
                 except (IOError, OSError), err:
                         sys.exit(u'ERROR: unable to open cookie file')
+ +      # Set user agent
+ +      if opts.user_agent is not None:
+ +              std_headers['User-Agent'] = opts.user_agent
   
         # Dump user agent
         if opts.dump_user_agent:
@@@ -432,10 -414,10 +432,10 @@@
                 parser.error(u'using .netrc conflicts with giving username/password')
         if opts.password is not None and opts.username is None:
                 parser.error(u'account username missing')
- -      if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
- -              parser.error(u'using output template conflicts with using title, literal title or auto number')
- -      if opts.usetitle and opts.useliteral:
- -              parser.error(u'using title conflicts with using literal title')
+ +      if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
+ +              parser.error(u'using output template conflicts with using title, video ID or auto number')
+ +      if opts.usetitle and opts.useid:
+ +              parser.error(u'using title conflicts with using video ID')
         if opts.username is not None and opts.password is None:
                 opts.password = getpass.getpass(u'Type account password and press return:')
         if opts.ratelimit is not None:
@@@ -448,11 -430,6 +448,11 @@@
                         opts.retries = long(opts.retries)
                 except (TypeError, ValueError), err:
                         parser.error(u'invalid retry count specified')
+ +      if opts.buffersize is not None:
+ +              numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
+ +              if numeric_buffersize is None:
+ +                      parser.error(u'invalid buffer size specified')
+ +              opts.buffersize = numeric_buffersize
         try:
                 opts.playliststart = int(opts.playliststart)
                 if opts.playliststart <= 0:
@@@ -468,10 -445,6 +468,10 @@@
         if opts.extractaudio:
                 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
                         parser.error(u'invalid audio format specified')
+ +      if opts.audioquality:
+ +              opts.audioquality = opts.audioquality.strip('k').strip('K')
+ +              if not opts.audioquality.isdigit():
+ +                      parser.error(u'invalid audio quality specified')
   
         # File downloader
         fd = FileDownloader({
@@@ -491,20 -464,19 +491,20 @@@
                 'format_limit': opts.format_limit,
                 'listformats': opts.listformats,
                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
- -                      or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
- -                      or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
+ +                      or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
                         or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
- -                      or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
- -                      or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
- -                      or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
- -                      or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
+ +                      or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
+ +                      or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
+ +                      or (opts.useid and u'%(id)s.%(ext)s')
                         or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
                         or u'%(id)s.%(ext)s'),
+ +              'restrictfilenames': opts.restrictfilenames,
                 'ignoreerrors': opts.ignoreerrors,
                 'ratelimit': opts.ratelimit,
                 'nooverwrites': opts.nooverwrites,
                 'retries': opts.retries,
+ +              'buffersize': opts.buffersize,
+ +              'noresizebuffer': opts.noresizebuffer,
                 'continuedl': opts.continue_dl,
                 'noprogress': opts.noprogress,
                 'playliststart': opts.playliststart,
@@@ -544,7 -516,7 +544,7 @@@
                         parser.error(u'you must provide at least one URL')
                 else:
                         sys.exit()
- -      
+ +
         try:
                 retcode = fd.download(all_urls)
         except MaxDownloadsReached:
author	Philipp Hagemeister <phihag@phihag.de>
	Tue, 27 Nov 2012 16:14:29 +0000 (17:14 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Tue, 27 Nov 2012 16:14:29 +0000 (17:14 +0100)
		1	2
youtube_dl/InfoExtractors.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history