Merge branch 'master' into extract_info_rewrite

author Jaime Marquínez Ferrándiz <jaimemf93@gmail.com>

Thu, 28 Mar 2013 12:02:04 +0000 (13:02 +0100)

committer Jaime Marquínez Ferrándiz <jaimemf93@gmail.com>

Thu, 28 Mar 2013 12:20:33 +0000 (13:20 +0100)
author Jaime Marquínez Ferrándiz <jaimemf93@gmail.com>
Thu, 28 Mar 2013 12:02:04 +0000 (13:02 +0100)
committer Jaime Marquínez Ferrándiz <jaimemf93@gmail.com>
Thu, 28 Mar 2013 12:20:33 +0000 (13:20 +0100)
diff --combined youtube_dl/FileDownloader.py

index 68fad11bc72bf4f84b5d36266afbef403facb30e,725d4a0160388b3faa8c7a5b09cc83a8726170f8..6af2acbeee73b1258c32134100eb3a6e4abac38a
--- 1/youtube_dl/FileDownloader.py
--- 2/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@@ -78,7 -78,11 +78,11 @@@ class FileDownloader(object)
       updatetime:        Use the Last-modified header to set output file timestamps.
       writedescription:  Write the video description to a .description file
       writeinfojson:     Write the video description to a .info.json file
-     writesubtitles:    Write the video subtitles to a .srt file
+     writesubtitles:    Write the video subtitles to a file
+     onlysubtitles:     Downloads only the subtitles of the video
+     allsubtitles:      Downloads all the subtitles of the video
+     listsubtitles:     Lists all available subtitles for the video
+     subtitlesformat:   Subtitle format [sbv/srt] (default=srt)
       subtitleslang:     Language of the subtitles to download
       test:              Download only first bytes to test the downloader.
       keepvideo:         Keep the video file after post-processing
@@@ -301,9 -305,9 +305,9 @@@
           """ Report that the description file is being written """
           self.to_screen(u'[info] Writing video description to: ' + descfn)
   
-     def report_writesubtitles(self, srtfn):
+     def report_writesubtitles(self, sub_filename):
           """ Report that the subtitles file is being written """
-         self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
+         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
   
       def report_writeinfojson(self, infofn):
           """ Report that the metadata file has been written """
@@@ -372,8 -376,11 +376,11 @@@
   
               filename = self.params['outtmpl'] % template_dict
               return filename
-         except (ValueError, KeyError) as err:
-             self.trouble(u'ERROR: invalid system charset or erroneous output template')
+         except KeyError as err:
+             self.trouble(u'ERROR: Erroneous output template')
+             return None
+         except ValueError as err:
+             self.trouble(u'ERROR: Insufficient system charset ' + repr(preferredencoding()))
               return None
   
       def _match_entry(self, info_dict):
@@@ -389,72 -396,6 +396,72 @@@
               if re.search(rejecttitle, title, re.IGNORECASE):
                   return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
           return None
+ +        
+ +    def extract_info(self, url):
+ +        '''
+ +        Returns a list with a dictionary for each video we find.
+ +         '''
+ +        suitable_found = False
+ +        for ie in self._ies:
+ +            # Go to next InfoExtractor if not suitable
+ +            if not ie.suitable(url):
+ +                continue
+ +
+ +            # Warn if the _WORKING attribute is False
+ +            if not ie.working():
+ +                self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
+ +                               u'and will probably not work. If you want to go on, use the -i option.')
+ +
+ +            # Suitable InfoExtractor found
+ +            suitable_found = True
+ +
+ +            # Extract information from URL and process it
+ +            try:
+ +                ie_results = ie.extract(url)
+ +                results = self.process_ie_results(ie_results, ie)
+ +                return results
+ +            except ExtractorError as de: # An error we somewhat expected
+ +                self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
+ +                break
+ +            except Exception as e:
+ +                if self.params.get('ignoreerrors', False):
+ +                    self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
+ +                    break
+ +                else:
+ +                    raise
+ +        if not suitable_found:
+ +                self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
+ +    def extract_info_iterable(self, urls):
+ +        '''
+ +            Return the videos founded for the urls
+ +        '''
+ +        results = []
+ +        for url in urls:
+ +            results.extend(self.extract_info(url))
+ +        return results
+ +        
+ +    def process_ie_results(self, ie_results, ie):
+ +        """
+ +        Take the results of the ie and return a list of videos.
+ +        For url elements it will seartch the suitable ie and get the videos
+ +        For playlist elements it will process each of the elements of the 'entries' key
+ +        """
+ +        results = [] 
+ +        for result in ie_results or []:
+ +            result_type = result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system
+ +            if result_type == 'video':
+ +                if not 'extractor' in result:
+ +                    #The extractor has already been set somewhere else
+ +                    result['extractor'] = ie.IE_NAME
+ +                results.append(result)
+ +            elif result_type == 'url':
+ +                #We get the videos pointed by the url
+ +                results.extend(self.extract_info(result['url']))
+ +            elif result_type == 'playlist':
+ +                #We process each entry in the playlist
+ +                entries_result = self.process_ie_results(result['entries'], ie)
+ +                results.extend(entries_result)
+ +        return results
   
       def process_info(self, info_dict):
           """Process a single dictionary returned by an InfoExtractor."""
@@@ -519,14 -460,35 +526,35 @@@
           if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
               # subtitles download errors are already managed as troubles in relevant IE
               # that way it will silently go on when used with unsupporting IE
+             subtitle = info_dict['subtitles'][0]
+             (sub_error, sub_lang, sub) = subtitle
+             sub_format = self.params.get('subtitlesformat')
               try:
-                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
-                 self.report_writesubtitles(srtfn)
-                 with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile:
-                     srtfile.write(info_dict['subtitles'])
+                 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+                 self.report_writesubtitles(sub_filename)
+                 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+                     subfile.write(sub)
               except (OSError, IOError):
                   self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
                   return
+             if self.params.get('onlysubtitles', False):
+                 return 
+ 
+         if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+             subtitles = info_dict['subtitles']
+             sub_format = self.params.get('subtitlesformat')
+             for subtitle in subtitles:
+                 (sub_error, sub_lang, sub) = subtitle
+                 try:
+                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+                     self.report_writesubtitles(sub_filename)
+                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+                             subfile.write(sub)
+                 except (OSError, IOError):
+                     self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
+                     return
+             if self.params.get('onlysubtitles', False):
+                 return 
   
           if self.params.get('writeinfojson', False):
               infofn = filename + u'.info.json'
@@@ -566,14 -528,49 +594,14 @@@
               raise SameFileError(self.params['outtmpl'])
   
           for url in url_list:
- -            suitable_found = False
- -            for ie in self._ies:
- -                # Go to next InfoExtractor if not suitable
- -                if not ie.suitable(url):
- -                    continue
- -
- -                # Warn if the _WORKING attribute is False
- -                if not ie.working():
- -                    self.report_warning(u'the program functionality for this site has been marked as broken, '
- -                                        u'and will probably not work. If you want to go on, use the -i option.')
+ +            videos = self.extract_info(url)
   
- -                # Suitable InfoExtractor found
- -                suitable_found = True
- -
- -                # Extract information from URL and process it
+ +            for video in videos or []:
                   try:
- -                    videos = ie.extract(url)
- -                except ExtractorError as de: # An error we somewhat expected
- -                    self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
- -                    break
- -                except Exception as e:
- -                    if self.params.get('ignoreerrors', False):
- -                        self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
- -                        break
- -                    else:
- -                        raise
- -
- -                if len(videos or []) > 1 and self.fixed_template():
- -                    raise SameFileError(self.params['outtmpl'])
- -
- -                for video in videos or []:
- -                    video['extractor'] = ie.IE_NAME
- -                    try:
- -                        self.increment_downloads()
- -                        self.process_info(video)
- -                    except UnavailableVideoError:
- -                        self.trouble(u'\nERROR: unable to download video')
- -
- -                # Suitable InfoExtractor had been found; go to next URL
- -                break
- -
- -            if not suitable_found:
- -                self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
+ +                    self.increment_downloads()
+ +                    self.process_info(video)
+ +                except UnavailableVideoError:
+ +                    self.trouble(u'\nERROR: unable to download video')
   
           return self._download_retcode
   
diff --combined youtube_dl/InfoExtractors.py

index e714fa6b078a87f0520c661d9a73db71bf78df6b,835428f3232afb8a6aaeca5c72b64bf1c3cd11b7..dd4a776e4a50adf7e71ab4580bbbc2fcac81e65d
--- 1/youtube_dl/InfoExtractors.py
--- 2/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@@ -48,7 -48,7 +48,7 @@@ class InfoExtractor(object)
       uploader_id:    Nickname or id of the video uploader.
       location:       Physical location of the video.
       player_url:     SWF Player URL (used for rtmpdump).
-     subtitles:      The .srt file contents.
+     subtitles:      The subtitle file contents.
       urlhandle:      [internal] The urlHandle to be used to download the file,
                       like returned by urllib.request.urlopen
   
@@@ -126,26 -126,14 +126,32 @@@
       def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
           """ Returns the data of the page as a string """
           urlh = self._request_webpage(url_or_request, video_id, note, errnote)
+         content_type = urlh.headers.get('Content-Type', '')
+         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
+         if m:
+             encoding = m.group(1)
+         else:
+             encoding = 'utf-8'
           webpage_bytes = urlh.read()
-         return webpage_bytes.decode('utf-8', 'replace')
+         return webpage_bytes.decode(encoding, 'replace')
+ +        
+ +    #Methods for following #608
+ +    #They set the correct value of the '_type' key
+ +    def video_result(self, video_info):
+ +        """Returns a video"""
+ +        video_info['_type'] = 'video'
+ +        return video_info
+ +    def url_result(self, url, ie=None):
+ +        """Returns a url that points to a page that should be processed"""
+ +        #TODO: ie should be the class used for getting the info
+ +        video_info = {'_type': 'url',
+ +                      'url': url}
+ +        return video_info
+ +    def playlist_result(self, entries):
+ +        """Returns a playlist"""
+ +        video_info = {'_type': 'playlist',
+ +                      'entries': entries}
+ +        return video_info
   
   
   class YoutubeIE(InfoExtractor):
@@@ -236,7 -224,16 +242,16 @@@
   
       def report_video_subtitles_download(self, video_id):
           """Report attempt to download video info webpage."""
-         self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id)
+         self._downloader.to_screen(u'[youtube] %s: Checking available subtitles' % video_id)
+ 
+     def report_video_subtitles_request(self, video_id, sub_lang, format):
+         """Report attempt to download video info webpage."""
+         self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
+ 
+     def report_video_subtitles_available(self, video_id, sub_lang_list):
+         """Report available subtitles."""
+         sub_lang = ",".join(list(sub_lang_list.keys()))
+         self._downloader.to_screen(u'[youtube] %s: Available subtitles for video: %s' % (video_id, sub_lang))
   
       def report_information_extraction(self, video_id):
           """Report attempt to extract video information."""
@@@ -250,55 -247,63 +265,63 @@@
           """Indicate the download will use the RTMP protocol."""
           self._downloader.to_screen(u'[youtube] RTMP download detected')
   
-     def _closed_captions_xml_to_srt(self, xml_string):
-         srt = ''
-         texts = re.findall(r'<text start="([\d\.]+)"( dur="([\d\.]+)")?>([^<]+)</text>', xml_string, re.MULTILINE)
-         # TODO parse xml instead of regex
-         for n, (start, dur_tag, dur, caption) in enumerate(texts):
-             if not dur: dur = '4'
-             start = float(start)
-             end = start + float(dur)
-             start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000)
-             end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000)
-             caption = unescapeHTML(caption)
-             caption = unescapeHTML(caption) # double cycle, intentional
-             srt += str(n+1) + '\n'
-             srt += start + ' --> ' + end + '\n'
-             srt += caption + '\n\n'
-         return srt
- 
-     def _extract_subtitles(self, video_id):
+     def _get_available_subtitles(self, video_id):
           self.report_video_subtitles_download(video_id)
           request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
           try:
-             srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
+             sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
           except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
               return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
-         srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
-         srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
-         if not srt_lang_list:
-             return (u'WARNING: video has no closed captions', None)
-         if self._downloader.params.get('subtitleslang', False):
-             srt_lang = self._downloader.params.get('subtitleslang')
-         elif 'en' in srt_lang_list:
-             srt_lang = 'en'
-         else:
-             srt_lang = list(srt_lang_list.keys())[0]
-         if not srt_lang in srt_lang_list:
-             return (u'WARNING: no closed captions found in the specified language', None)
+         sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
+         sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
+         if not sub_lang_list:
+             return (u'WARNING: video doesn\'t have subtitles', None)
+         return sub_lang_list
+ 
+     def _list_available_subtitles(self, video_id):
+         sub_lang_list = self._get_available_subtitles(video_id)
+         self.report_video_subtitles_available(video_id, sub_lang_list)
+ 
+     def _request_subtitle(self, sub_lang, sub_name, video_id, format):
+         self.report_video_subtitles_request(video_id, sub_lang, format)
           params = compat_urllib_parse.urlencode({
-             'lang': srt_lang,
-             'name': srt_lang_list[srt_lang].encode('utf-8'),
+             'lang': sub_lang,
+             'name': sub_name,
               'v': video_id,
+             'fmt': format,
           })
           url = 'http://www.youtube.com/api/timedtext?' + params
           try:
-             srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8')
+             sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
           except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
               return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
-         if not srt_xml:
+         if not sub:
               return (u'WARNING: Did not fetch video subtitles', None)
-         return (None, self._closed_captions_xml_to_srt(srt_xml))
+         return (None, sub_lang, sub)
+ 
+     def _extract_subtitle(self, video_id):
+         sub_lang_list = self._get_available_subtitles(video_id)
+         sub_format = self._downloader.params.get('subtitlesformat')
+         if self._downloader.params.get('subtitleslang', False):
+             sub_lang = self._downloader.params.get('subtitleslang')
+         elif 'en' in sub_lang_list:
+             sub_lang = 'en'
+         else:
+             sub_lang = list(sub_lang_list.keys())[0]
+         if not sub_lang in sub_lang_list:
+             return (u'WARNING: no closed captions found in the specified language "%s"' % sub_lang, None)
+ 
+         subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
+         return [subtitle]
+ 
+     def _extract_all_subtitles(self, video_id):
+         sub_lang_list = self._get_available_subtitles(video_id)
+         sub_format = self._downloader.params.get('subtitlesformat')
+         subtitles = []
+         for sub_lang in sub_lang_list:
+             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
+             subtitles.append(subtitle)
+         return subtitles
   
       def _print_formats(self, formats):
           print('Available formats:')
@@@ -519,12 -524,26 +542,26 @@@
           else:
               video_description = ''
   
-         # closed captions
+         # subtitles
           video_subtitles = None
+ 
           if self._downloader.params.get('writesubtitles', False):
-             (srt_error, video_subtitles) = self._extract_subtitles(video_id)
-             if srt_error:
-                 self._downloader.trouble(srt_error)
+             video_subtitles = self._extract_subtitle(video_id)
+             if video_subtitles:
+                 (sub_error, sub_lang, sub) = video_subtitles[0]
+                 if sub_error:
+                     self._downloader.trouble(sub_error)
+ 
+         if self._downloader.params.get('allsubtitles', False):
+             video_subtitles = self._extract_all_subtitles(video_id)
+             for video_subtitle in video_subtitles:
+                 (sub_error, sub_lang, sub) = video_subtitle
+                 if sub_error:
+                     self._downloader.trouble(sub_error)
+ 
+         if self._downloader.params.get('listsubtitles', False):
+             sub_lang_list = self._list_available_subtitles(video_id)
+             return
   
           if 'length_seconds' not in video_info:
               self._downloader.trouble(u'WARNING: unable to extract video duration')
@@@ -1299,7 -1318,8 +1336,8 @@@ class GenericIE(InfoExtractor)
   
       def report_download_webpage(self, video_id):
           """Report webpage download."""
-         self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
+         if not self._downloader.params.get('test', False):
+             self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
           self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
   
       def report_extraction(self, video_id):
@@@ -1311,7 -1331,7 +1349,7 @@@
           self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
   
       def _test_redirect(self, url):
- -        """Check if it is a redirect, like url shorteners, in case restart chain."""
+ +        """Check if it is a redirect, like url shorteners, in case return the new url."""
           class HeadRequest(compat_urllib_request.Request):
               def get_method(self):
                   return "HEAD"
@@@ -1362,20 -1382,15 +1400,15 @@@
               return False
   
           self.report_following_redirect(new_url)
- -        self._downloader.download([new_url])
- -        return True
+ +        return new_url
   
       def _real_extract(self, url):
- -        if self._test_redirect(url): return
+ +        new_url = self._test_redirect(url)
+ +        if new_url: return [self.url_result(new_url)]
   
           video_id = url.split('/')[-1]
-         request = compat_urllib_request.Request(url)
           try:
-             self.report_download_webpage(video_id)
-             webpage = compat_urllib_request.urlopen(request).read()
-         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
-             return
+             webpage = self._download_webpage(url, video_id)
           except ValueError as err:
               # since this is the last-resort InfoExtractor, if
               # this error is thrown, it'll be thrown here
@@@ -1774,8 -1789,9 +1807,8 @@@ class YoutubePlaylistIE(InfoExtractor)
           else:
               self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos)))
   
- -        for video in videos:
- -            self._downloader.download([video])
- -        return
+ +        url_results = [self.url_result(url) for url in videos]
+ +        return [self.playlist_result(url_results)]
   
   
   class YoutubeChannelIE(InfoExtractor):
@@@ -1825,9 -1841,9 +1858,9 @@@
   
           self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
   
- -        for id in video_ids:
- -            self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
- -        return
+ +        urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
+ +        url_entries = [self.url_result(url) for url in urls]
+ +        return [self.playlist_result(url_entries)]
   
   
   class YoutubeUserIE(InfoExtractor):
@@@ -1909,9 -1925,8 +1942,9 @@@
           self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
                   (username, all_ids_count, len(video_ids)))
   
- -        for video_id in video_ids:
- -            self._downloader.download(['http://www.youtube.com/watch?v=%s' % video_id])
+ +        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
+ +        url_results = [self.url_result(url) for url in urls]
+ +        return [self.playlist_result(url_results)]
   
   
   class BlipTVUserIE(InfoExtractor):
@@@ -2001,9 -2016,8 +2034,9 @@@
           self._downloader.to_screen(u"[%s] user %s: Collected %d video ids (downloading %d of them)" %
                   (self.IE_NAME, username, all_ids_count, len(video_ids)))
   
- -        for video_id in video_ids:
- -            self._downloader.download([u'http://blip.tv/'+video_id])
+ +        urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
+ +        url_entries = [self.url_result(url) for url in urls]
+ +        return [self.playlist_result(url_entries)]
   
   
   class DepositFilesIE(InfoExtractor):
@@@ -2576,7 -2590,7 +2609,7 @@@ class EscapistIE(InfoExtractor)
               'uploader': showName,
               'upload_date': None,
               'title': showName,
-             'ext': 'flv',
+             'ext': 'mp4',
               'thumbnail': imgUrl,
               'description': description,
               'player_url': playerUrl,
@@@ -3972,11 -3986,11 +4005,11 @@@ class KeekIE(InfoExtractor)
           webpage = self._download_webpage(url, video_id)
           m = re.search(r'<meta property="og:title" content="(?P<title>.+)"', webpage)
           title = unescapeHTML(m.group('title'))
-         m = re.search(r'<div class="bio-names-and-report">[\s\n]+<h4>(?P<uploader>\w+)</h4>', webpage)
-         uploader = unescapeHTML(m.group('uploader'))
+         m = re.search(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', webpage)
+         uploader = clean_html(m.group('uploader'))
           info = {
-                 'id':video_id,
-                 'url':video_url,
+                 'id': video_id,
+                 'url': video_url,
                   'ext': 'mp4',
                   'title': title,
                   'thumbnail': thumbnail,
@@@ -4113,6 -4127,40 +4146,40 @@@ class MySpassIE(InfoExtractor)
           }
           return [info]
   
+ class SpiegelIE(InfoExtractor):
+     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?$'
+ 
+     def _real_extract(self, url):
+         m = re.match(self._VALID_URL, url)
+         video_id = m.group('videoID')
+ 
+         webpage = self._download_webpage(url, video_id)
+         m = re.search(r'<div class="spVideoTitle">(.*?)</div>', webpage)
+         if not m:
+             raise ExtractorError(u'Cannot find title')
+         video_title = unescapeHTML(m.group(1))
+ 
+         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
+         xml_code = self._download_webpage(xml_url, video_id,
+                     note=u'Downloading XML', errnote=u'Failed to download XML')
+ 
+         idoc = xml.etree.ElementTree.fromstring(xml_code)
+         last_type = idoc[-1]
+         filename = last_type.findall('./filename')[0].text
+         duration = float(last_type.findall('./duration')[0].text)
+ 
+         video_url = 'http://video2.spiegel.de/flash/' + filename
+         video_ext = filename.rpartition('.')[2]
+         info = {
+             'id': video_id,
+             'url': video_url,
+             'ext': video_ext,
+             'title': video_title,
+             'duration': duration,
+         }
+         return [info]
+ 
+ 
   def gen_extractors():
       """ Return a list of an instance of every supported extractor.
       The order does matter; the first extractor matched is the one handling the URL.
@@@ -4161,6 -4209,7 +4228,7 @@@
           KeekIE(),
           TEDIE(),
           MySpassIE(),
+         SpiegelIE(),
           GenericIE()
       ]
author	Jaime Marquínez Ferrándiz <jaimemf93@gmail.com>
	Thu, 28 Mar 2013 12:02:04 +0000 (13:02 +0100)
committer	Jaime Marquínez Ferrándiz <jaimemf93@gmail.com>
	Thu, 28 Mar 2013 12:20:33 +0000 (13:20 +0100)
		1	2
youtube_dl/FileDownloader.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/InfoExtractors.py	patch \|	diff1 \|	diff2 \|	blob \| history