Merge branch 'next-url'

[youtube-dl] / youtube-dl
diff --git a/youtube-dl b/youtube-dl

index b22f1cac274fa165563f1f7e32b5a5ee474644f6..8d0d1cc3381afab236486af52f9712c110cfa311 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -15,15 +15,18 @@ __authors__  = (
         'Kevin Ngo',
         'Ori Avtalion',
         'shizeeg',
+       'Filippo Valsorda',
         )
  
  __license__ = 'Public Domain'
-__version__ = '2012.01.08'
+__version__ = '2012.02.27'
  
  UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
  
+
  import cookielib
  import datetime
+import getpass
  import gzip
  import htmlentitydefs
  import HTMLParser
@@ -31,9 +34,11 @@ import httplib
  import locale
  import math
  import netrc
+import optparse
  import os
  import os.path
  import re
+import shlex
  import socket
  import string
  import subprocess
@@ -305,7 +310,14 @@ def _encodeFilename(s):
         """
  
         assert type(s) == type(u'')
-       return s.encode(sys.getfilesystemencoding(), 'ignore')
+
+       if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
+               # Pass u'' directly to use Unicode APIs on Windows 2000 and up
+               # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+               # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+               return s
+       else:
+               return s.encode(sys.getfilesystemencoding(), 'ignore')
  
  class DownloadError(Exception):
         """Download Error exception.
@@ -479,6 +491,8 @@ class FileDownloader(object):
         updatetime:       Use the Last-modified header to set output file timestamps.
         writedescription: Write the video description to a .description file
         writeinfojson:    Write the video description to a .info.json file
+       writesubtitles:   Write the video subtitles to a .srt file
+       subtitleslang:    Language of the subtitles to download
         """
  
         params = None
@@ -670,6 +684,10 @@ class FileDownloader(object):
                 """ Report that the description file is being written """
                 self.to_screen(u'[info] Writing video description to: ' + descfn)
  
+       def report_writesubtitles(self, srtfn):
+               """ Report that the subtitles file is being written """
+               self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
+
         def report_writeinfojson(self, infofn):
                 """ Report that the metadata file has been written """
                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
@@ -797,6 +815,21 @@ class FileDownloader(object):
                         except (OSError, IOError):
                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
                                 return
+                               
+               if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+                       # subtitles download errors are already managed as troubles in relevant IE
+                       # that way it will silently go on when used with unsupporting IE 
+                       try:
+                               srtfn = filename.rsplit('.', 1)[0] + u'.srt'
+                               self.report_writesubtitles(srtfn)
+                               srtfile = open(_encodeFilename(srtfn), 'wb')
+                               try:
+                                       srtfile.write(info_dict['subtitles'].encode('utf-8'))
+                               finally:
+                                       srtfile.close()
+                       except (OSError, IOError):
+                               self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
+                               return
  
                 if self.params.get('writeinfojson', False):
                         infofn = filename + u'.info.json'
@@ -889,7 +922,15 @@ class FileDownloader(object):
                 # the connection was interrumpted and resuming appears to be
                 # possible. This is part of rtmpdump's normal usage, AFAIK.
                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
-               retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
+               args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
+               if self.params.get('verbose', False):
+                       try:
+                               import pipes
+                               shell_quote = lambda args: ' '.join(map(pipes.quote, args))
+                       except ImportError:
+                               shell_quote = repr
+                       self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
+               retval = subprocess.call(args)
                 while retval == 2 or retval == 1:
                         prevsize = os.path.getsize(_encodeFilename(tmpfilename))
                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
@@ -1135,6 +1176,7 @@ class YoutubeIE(InfoExtractor):
         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
+       _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
         _NETRC_MACHINE = 'youtube'
         # Listed in order of quality
         _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
@@ -1187,6 +1229,10 @@ class YoutubeIE(InfoExtractor):
                 """Report attempt to download video info webpage."""
                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
  
+       def report_video_subtitles_download(self, video_id):
+               """Report attempt to download video info webpage."""
+               self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id)
+
         def report_information_extraction(self, video_id):
                 """Report attempt to extract video information."""
                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
@@ -1199,6 +1245,23 @@ class YoutubeIE(InfoExtractor):
                 """Indicate the download will use the RTMP protocol."""
                 self._downloader.to_screen(u'[youtube] RTMP download detected')
  
+       def _closed_captions_xml_to_srt(self, xml_string):
+               srt = ''
+               texts = re.findall(r'<text start="([\d\.]+)"( dur="([\d\.]+)")?>([^<]+)</text>', xml_string, re.MULTILINE)
+               # TODO parse xml instead of regex
+               for n, (start, dur_tag, dur, caption) in enumerate(texts):
+                       if not dur: dur = '4'
+                       start = float(start)
+                       end = start + float(dur)
+                       start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000)
+                       end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000)
+                       caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption)
+                       caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption) # double cycle, inentional
+                       srt += str(n) + '\n'
+                       srt += start + ' --> ' + end + '\n'
+                       srt += caption + '\n\n'
+               return srt
+
         def _print_formats(self, formats):
                 print 'Available formats:'
                 for x in formats:
@@ -1274,6 +1337,11 @@ class YoutubeIE(InfoExtractor):
                         return
  
         def _real_extract(self, url):
+               # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+               mobj = re.search(self._NEXT_URL_RE, url)
+               if mobj:
+                       url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
+
                 # Extract video id from URL
                 mobj = re.match(self._VALID_URL, url)
                 if mobj is None:
@@ -1362,15 +1430,45 @@ class YoutubeIE(InfoExtractor):
                         lxml.etree
                 except NameError:
                         video_description = u'No description available.'
-                       if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
-                               mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
-                               if mobj is not None:
-                                       video_description = mobj.group(1).decode('utf-8')
+                       mobj = re.search(r'<meta name="description" content="(.*?)">', video_webpage)
+                       if mobj is not None:
+                               video_description = mobj.group(1).decode('utf-8')
                 else:
                         html_parser = lxml.etree.HTMLParser(encoding='utf-8')
                         vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
                         video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
                         # TODO use another parser
+                       
+               # closed captions
+               video_subtitles = None
+               if self._downloader.params.get('writesubtitles', False):
+                       self.report_video_subtitles_download(video_id)
+                       request = urllib2.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
+                       try:
+                               srt_list = urllib2.urlopen(request).read()
+                       except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                               self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+                       else:
+                               srt_lang_list = re.findall(r'lang_code="([\w\-]+)"', srt_list)
+                               if srt_lang_list:
+                                       if self._downloader.params.get('subtitleslang', False):
+                                               srt_lang = self._downloader.params.get('subtitleslang')
+                                       elif 'en' in srt_lang_list:
+                                               srt_lang = 'en'
+                                       else:
+                                               srt_lang = srt_lang_list[0]
+                                       if not srt_lang in srt_lang_list:
+                                               self._downloader.trouble(u'WARNING: no closed captions found in the specified language')
+                                       else:
+                                               request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id))
+                                               try:
+                                                       srt_xml = urllib2.urlopen(request).read()
+                                               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                                                       self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+                                               else:
+                                                       video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
+                               else:
+                                       self._downloader.trouble(u'WARNING: video has no closed captions')
  
                 # token
                 video_token = urllib.unquote_plus(video_info['token'][0])
@@ -1443,6 +1541,7 @@ class YoutubeIE(InfoExtractor):
                                         'thumbnail':    video_thumbnail.decode('utf-8'),
                                         'description':  video_description,
                                         'player_url':   player_url,
+                                       'subtitles':    video_subtitles
                                 })
                         except UnavailableVideoError, err:
                                 self._downloader.trouble(u'\nERROR: unable to download video')
@@ -2040,7 +2139,7 @@ class VimeoIE(InfoExtractor):
                 video_id = mobj.group(1)
  
                 # Retrieve video webpage to extract further information
-               request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
+               request = urllib2.Request(url, None, std_headers)
                 try:
                         self.report_download_webpage(video_id)
                         webpage = urllib2.urlopen(request).read()
@@ -2053,77 +2152,75 @@ class VimeoIE(InfoExtractor):
                 # and latter we extract those that are Vimeo specific.
                 self.report_extraction(video_id)
  
-               # Extract title
-               mobj = re.search(r'<caption>(.*?)</caption>', webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract video title')
+               # Extract the config JSON
+               config = webpage.split(' = {config:')[1].split(',assets:')[0]
+               try:
+                       config = json.loads(config)
+               except:
+                       self._downloader.trouble(u'ERROR: unable to extract info section')
                         return
-               video_title = mobj.group(1).decode('utf-8')
+               
+               # Extract title
+               video_title = config["video"]["title"]
                 simple_title = _simplify_title(video_title)
  
                 # Extract uploader
-               mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract video uploader')
-                       return
-               video_uploader = mobj.group(1).decode('utf-8')
+               video_uploader = config["video"]["owner"]["name"]
  
                 # Extract video thumbnail
-               mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
-                       return
-               video_thumbnail = mobj.group(1).decode('utf-8')
+               video_thumbnail = config["video"]["thumbnail"]
  
-               # # Extract video description
-               # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
-               # if mobj is None:
-               #       self._downloader.trouble(u'ERROR: unable to extract video description')
-               #       return
-               # video_description = mobj.group(1).decode('utf-8')
-               # if not video_description: video_description = 'No description available.'
-               video_description = 'Foo.'
-
-               # Vimeo specific: extract request signature
-               mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract request signature')
-                       return
-               sig = mobj.group(1).decode('utf-8')
-
-               # Vimeo specific: extract video quality information
-               mobj = re.search(r'<isHD>(\d+)</isHD>', webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract video quality information')
-                       return
-               quality = mobj.group(1).decode('utf-8')
-
-               if int(quality) == 1:
-                       quality = 'hd'
+               # Extract video description
+               try:
+                       lxml.etree
+               except NameError:
+                       video_description = u'No description available.'
+                       mobj = re.search(r'<meta name="description" content="(.*?)" />', webpage, re.MULTILINE)
+                       if mobj is not None:
+                               video_description = mobj.group(1)
                 else:
-                       quality = 'sd'
+                       html_parser = lxml.etree.HTMLParser()
+                       vwebpage_doc = lxml.etree.parse(StringIO.StringIO(webpage), html_parser)
+                       video_description = u''.join(vwebpage_doc.xpath('id("description")//text()')).strip()
+                       # TODO use another parser
  
-               # Vimeo specific: Extract request signature expiration
-               mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
+               # Extract upload date
+               video_upload_date = u'NA'
+               mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage)
+               if mobj is not None:
+                       video_upload_date = mobj.group(1)
+
+               # Vimeo specific: extract request signature and timestamp
+               sig = config['request']['signature']
+               timestamp = config['request']['timestamp']
+
+               # Vimeo specific: extract video codec and quality information
+               # TODO bind to format param
+               codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
+               for codec in codecs:
+                       if codec[0] in config["video"]["files"]:
+                               video_codec = codec[0]
+                               video_extension = codec[1]
+                               if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd'
+                               else: quality = 'sd'
+                               break
+               else:
+                       self._downloader.trouble(u'ERROR: no known codec found')
                         return
-               sig_exp = mobj.group(1).decode('utf-8')
  
-               video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id, sig, sig_exp, quality)
+               video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
+                                       %(video_id, sig, timestamp, quality, video_codec.upper())
  
                 try:
                         # Process video information
                         self._downloader.process_info({
-                               'id':           video_id.decode('utf-8'),
+                               'id':           video_id,
                                 'url':          video_url,
                                 'uploader':     video_uploader,
-                               'upload_date':  u'NA',
+                               'upload_date':  video_upload_date,
                                 'title':        video_title,
                                 'stitle':       simple_title,
-                               'ext':          u'mp4',
-                               'thumbnail':    video_thumbnail.decode('utf-8'),
-                               'description':  video_description,
+                               'ext':          video_extension,
                                 'thumbnail':    video_thumbnail,
                                 'description':  video_description,
                                 'player_url':   None,
@@ -2150,7 +2247,67 @@ class GenericIE(InfoExtractor):
                 """Report information extraction."""
                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
  
+       def report_following_redirect(self, new_url):
+               """Report information extraction."""
+               self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
+               
+       def _test_redirect(self, url):
+               """Check if it is a redirect, like url shorteners, in case restart chain."""
+               class HeadRequest(urllib2.Request):
+                       def get_method(self):
+                               return "HEAD"
+
+               class HEADRedirectHandler(urllib2.HTTPRedirectHandler):
+                       """
+                       Subclass the HTTPRedirectHandler to make it use our 
+                       HeadRequest also on the redirected URL
+                       """
+                       def redirect_request(self, req, fp, code, msg, headers, newurl): 
+                               if code in (301, 302, 303, 307):
+                                   newurl = newurl.replace(' ', '%20') 
+                                   newheaders = dict((k,v) for k,v in req.headers.items()
+                                                     if k.lower() not in ("content-length", "content-type"))
+                                   return HeadRequest(newurl, 
+                                                      headers=newheaders,
+                                                      origin_req_host=req.get_origin_req_host(), 
+                                                      unverifiable=True) 
+                               else: 
+                                   raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) 
+                                   
+               class HTTPMethodFallback(urllib2.BaseHandler):
+                       """
+                       Fallback to GET if HEAD is not allowed (405 HTTP error)
+                       """
+                       def http_error_405(self, req, fp, code, msg, headers): 
+                               fp.read()
+                               fp.close()
+
+                               newheaders = dict((k,v) for k,v in req.headers.items()
+                                                 if k.lower() not in ("content-length", "content-type"))
+                               return self.parent.open(urllib2.Request(req.get_full_url(), 
+                                                                headers=newheaders, 
+                                                                origin_req_host=req.get_origin_req_host(), 
+                                                                unverifiable=True))
+
+               # Build our opener
+               opener = urllib2.OpenerDirector() 
+               for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
+                                       HTTPMethodFallback, HEADRedirectHandler,
+                                       urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
+                       opener.add_handler(handler())
+
+               response = opener.open(HeadRequest(url))
+               new_url = response.geturl()
+               
+               if url == new_url: return False
+               
+               self.report_following_redirect(new_url)
+               self._downloader.download([new_url])
+               return True
+
         def _real_extract(self, url):
+               if self._test_redirect(url): return
+               
                 # At this point we have a new video
                 self._downloader.increment_downloads()
  
@@ -2232,9 +2389,7 @@ class GenericIE(InfoExtractor):
  class YoutubeSearchIE(InfoExtractor):
         """Information Extractor for YouTube search queries."""
         _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
-       _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
-       _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
-       _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
+       _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
         _youtube_ie = None
         _max_youtube_results = 1000
         IE_NAME = u'youtube:search'
@@ -2285,45 +2440,39 @@ class YoutubeSearchIE(InfoExtractor):
                 """Downloads a specified number of results for a query"""
  
                 video_ids = []
-               already_seen = set()
-               pagenum = 1
+               pagenum = 0
+               limit = n
  
-               while True:
-                       self.report_download_page(query, pagenum)
-                       result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
+               while (50 * pagenum) < limit:
+                       self.report_download_page(query, pagenum+1)
+                       result_url = self._API_URL % (urllib.quote_plus(query), (50*pagenum)+1)
                         request = urllib2.Request(result_url)
                         try:
-                               page = urllib2.urlopen(request).read()
+                               data = urllib2.urlopen(request).read()
                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err))
                                 return
+                       api_response = json.loads(data)['data']
  
-                       # Extract video identifiers
-                       for mobj in re.finditer(self._VIDEO_INDICATOR, page):
-                               video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
-                               if video_id not in already_seen:
-                                       video_ids.append(video_id)
-                                       already_seen.add(video_id)
-                                       if len(video_ids) == n:
-                                               # Specified n videos reached
-                                               for id in video_ids:
-                                                       self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
-                                               return
+                       new_ids = list(video['id'] for video in api_response['items'])
+                       video_ids += new_ids
  
-                       if re.search(self._MORE_PAGES_INDICATOR, page) is None:
-                               for id in video_ids:
-                                       self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
-                               return
+                       limit = min(n, api_response['totalItems'])
+                       pagenum += 1
  
-                       pagenum = pagenum + 1
+               if len(video_ids) > n:
+                       video_ids = video_ids[:n]
+               for id in video_ids:
+                       self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+               return
  
  
  class GoogleSearchIE(InfoExtractor):
         """Information Extractor for Google Video search queries."""
         _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
-       _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
-       _MORE_PAGES_INDICATOR = r'<span>Next</span>'
+       _VIDEO_INDICATOR = r'<a href="http://video\.google\.com/videoplay\?docid=([^"\&]+)'
+       _MORE_PAGES_INDICATOR = r'class="pn" id="pnnext"'
         _google_ie = None
         _max_google_results = 1000
         IE_NAME = u'video.google:search'
@@ -2374,12 +2523,11 @@ class GoogleSearchIE(InfoExtractor):
                 """Downloads a specified number of results for a query"""
  
                 video_ids = []
-               already_seen = set()
-               pagenum = 1
+               pagenum = 0
  
                 while True:
                         self.report_download_page(query, pagenum)
-                       result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
+                       result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum*10)
                         request = urllib2.Request(result_url)
                         try:
                                 page = urllib2.urlopen(request).read()
@@ -2390,9 +2538,8 @@ class GoogleSearchIE(InfoExtractor):
                         # Extract video identifiers
                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
                                 video_id = mobj.group(1)
-                               if video_id not in already_seen:
+                               if video_id not in video_ids:
                                         video_ids.append(video_id)
-                                       already_seen.add(video_id)
                                         if len(video_ids) == n:
                                                 # Specified n videos reached
                                                 for id in video_ids:
@@ -2501,7 +2648,7 @@ class YoutubePlaylistIE(InfoExtractor):
  
         _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
-       _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
+       _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&amp;list=PL%s&'
         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
         _youtube_ie = None
         IE_NAME = u'youtube:playlist'
@@ -2553,7 +2700,7 @@ class YoutubePlaylistIE(InfoExtractor):
  
                         # Extract video identifiers
                         ids_in_page = []
-                       for mobj in re.finditer(self._VIDEO_INDICATOR, page):
+                       for mobj in re.finditer(self._VIDEO_INDICATOR_TEMPLATE % playlist_id, page):
                                 if mobj.group(1) not in ids_in_page:
                                         ids_in_page.append(mobj.group(1))
                         video_ids.extend(ids_in_page)
@@ -2564,7 +2711,10 @@ class YoutubePlaylistIE(InfoExtractor):
  
                 playliststart = self._downloader.params.get('playliststart', 1) - 1
                 playlistend = self._downloader.params.get('playlistend', -1)
-               video_ids = video_ids[playliststart:playlistend]
+               if playlistend == -1:
+                       video_ids = video_ids[playliststart:]
+               else:
+                       video_ids = video_ids[playliststart:playlistend]
  
                 for id in video_ids:
                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
@@ -2652,7 +2802,7 @@ class YoutubeUserIE(InfoExtractor):
                 else:
                         video_ids = video_ids[playliststart:playlistend]
  
-               self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
+               self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
                                 (username, all_ids_count, len(video_ids)))
  
                 for video_id in video_ids:
@@ -4170,7 +4320,7 @@ def updateSelf(downloader, filename):
         if not os.access(filename, os.W_OK):
                 sys.exit('ERROR: no write permissions on %s' % filename)
  
-       downloader.to_screen('Updating to latest version...')
+       downloader.to_screen(u'Updating to latest version...')
  
         try:
                 try:
@@ -4179,7 +4329,7 @@ def updateSelf(downloader, filename):
                         
                         vmatch = re.search("__version__ = '([^']+)'", newcontent)
                         if vmatch is not None and vmatch.group(1) == __version__:
-                               downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')')
+                               downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
                                 return
                 finally:
                         urlh.close()
@@ -4195,14 +4345,9 @@ def updateSelf(downloader, filename):
         except (IOError, OSError), err:
                 sys.exit('ERROR: unable to overwrite current version')
  
-       downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
+       downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
  
  def parseOpts():
-       # Deferred imports
-       import getpass
-       import optparse
-       import shlex
-
         def _readOptions(filename_bytes):
                 try:
                         optionf = open(filename_bytes)
@@ -4315,6 +4460,12 @@ def parseOpts():
                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
         video_format.add_option('-F', '--list-formats',
                         action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
+       video_format.add_option('--write-srt',
+                       action='store_true', dest='writesubtitles',
+                       help='write video closed captions to a .srt file (currently youtube only)', default=False)
+       video_format.add_option('--srt-lang',
+                       action='store', dest='subtitleslang', metavar='LANG',
+                       help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
  
  
         verbosity.add_option('-q', '--quiet',
@@ -4344,6 +4495,8 @@ def parseOpts():
         verbosity.add_option('--console-title',
                         action='store_true', dest='consoletitle',
                         help='display progress in console titlebar', default=False)
+       verbosity.add_option('-v', '--verbose',
+                       action='store_true', dest='verbose', help='print various debugging information', default=False)
  
  
         filesystem.add_option('-t', '--title',
@@ -4360,7 +4513,7 @@ def parseOpts():
         filesystem.add_option('-w', '--no-overwrites',
                         action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
         filesystem.add_option('-c', '--continue',
-                       action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
+                       action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
         filesystem.add_option('--no-continue',
                         action='store_false', dest='continue_dl',
                         help='do not resume partially downloaded files (restart from beginning)')
@@ -4477,13 +4630,18 @@ def _real_main():
                 except IOError:
                         sys.exit(u'ERROR: batch file could not be read')
         all_urls = batchurls + args
+       all_urls = map(lambda url: url.strip(), all_urls)
  
         # General configuration
         cookie_processor = urllib2.HTTPCookieProcessor(jar)
-       opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
+       proxy_handler = urllib2.ProxyHandler()
+       opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
         urllib2.install_opener(opener)
         socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
  
+       if opts.verbose:
+               print(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
+
         extractors = gen_extractors()
  
         if opts.list_extractors:
@@ -4573,10 +4731,13 @@ def _real_main():
                 'updatetime': opts.updatetime,
                 'writedescription': opts.writedescription,
                 'writeinfojson': opts.writeinfojson,
+               'writesubtitles': opts.writesubtitles,
+               'subtitleslang': opts.subtitleslang,
                 'matchtitle': opts.matchtitle,
                 'rejecttitle': opts.rejecttitle,
                 'max_downloads': opts.max_downloads,
                 'prefer_free_formats': opts.prefer_free_formats,
+               'verbose': opts.verbose,
                 })
         for extractor in extractors:
                 fd.add_info_extractor(extractor)