]> git.bitcoin.ninja Git - youtube-dl/blobdiff - youtube_dl/InfoExtractors.py
Removed conversion from youtube closed caption format to srt since youtube api suppor...
[youtube-dl] / youtube_dl / InfoExtractors.py
index 53fab690a41c55730b6339fd213b72345e3cd692..e3998fbe88173ae1cb949d2f6256fa0fc06fb530 100755 (executable)
@@ -228,23 +228,6 @@ class YoutubeIE(InfoExtractor):
         """Indicate the download will use the RTMP protocol."""
         self._downloader.to_screen(u'[youtube] RTMP download detected')
 
-    def _closed_captions_xml_to_srt(self, xml_string):
-        srt = ''
-        texts = re.findall(r'<text start="([\d\.]+)"( dur="([\d\.]+)")?>([^<]+)</text>', xml_string, re.MULTILINE)
-        # TODO parse xml instead of regex
-        for n, (start, dur_tag, dur, caption) in enumerate(texts):
-            if not dur: dur = '4'
-            start = float(start)
-            end = start + float(dur)
-            start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000)
-            end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000)
-            caption = unescapeHTML(caption)
-            caption = unescapeHTML(caption) # double cycle, intentional
-            srt += str(n+1) + '\n'
-            srt += start + ' --> ' + end + '\n'
-            srt += caption + '\n\n'
-        return srt
-
     def _extract_subtitles(self, video_id):
         self.report_video_subtitles_download(video_id)
         request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
@@ -268,15 +251,16 @@ class YoutubeIE(InfoExtractor):
             'lang': srt_lang,
             'name': srt_lang_list[srt_lang].encode('utf-8'),
             'v': video_id,
+            'fmt': 'srt',
         })
         url = 'http://www.youtube.com/api/timedtext?' + params
         try:
-            srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8')
+            srt = compat_urllib_request.urlopen(url).read().decode('utf-8')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
-        if not srt_xml:
+        if not srt:
             return (u'WARNING: Did not fetch video subtitles', None)
-        return (None, self._closed_captions_xml_to_srt(srt_xml))
+        return (None, srt)
 
     def _print_formats(self, formats):
         print('Available formats:')
@@ -1330,7 +1314,7 @@ class GenericIE(InfoExtractor):
         opener = compat_urllib_request.OpenerDirector()
         for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
                         HTTPMethodFallback, HEADRedirectHandler,
-                        compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
+                        compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
             opener.add_handler(handler())
 
         response = opener.open(HeadRequest(url))
@@ -1366,6 +1350,9 @@ class GenericIE(InfoExtractor):
         if mobj is None:
             # Broaden the search a little bit
             mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
+        if mobj is None:
+            # Broaden the search a little bit: JWPlayer JS loader
+            mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage)
         if mobj is None:
             self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
             return
@@ -2098,6 +2085,10 @@ class FacebookIE(InfoExtractor):
         params_raw = compat_urllib_parse.unquote(data['params'])
         params = json.loads(params_raw)
         video_url = params['hd_src']
+        if not video_url:
+            video_url = params['sd_src']
+        if not video_url:
+            raise ExtractorError(u'Cannot find video URL')
         video_duration = int(params['video_duration'])
 
         m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
@@ -2233,7 +2224,7 @@ class MyVideoIE(InfoExtractor):
         webpage = self._download_webpage(webpage_url, video_id)
 
         self.report_extraction(video_id)
-        mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
+        mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/.*?\.jpg\' />',
                  webpage)
         if mobj is None:
             self._downloader.trouble(u'ERROR: unable to extract media URL')
@@ -3725,13 +3716,13 @@ class YouPornIE(InfoExtractor):
         webpage = self._download_webpage(req, video_id)
 
         # Get the video title
-        result = re.search(r'videoTitleArea">(?P<title>.*)</h1>', webpage)
+        result = re.search(r'<h1.*?>(?P<title>.*)</h1>', webpage)
         if result is None:
-            raise ExtractorError(u'ERROR: unable to extract video title')
+            raise ExtractorError(u'Unable to extract video title')
         video_title = result.group('title').strip()
 
         # Get the video date
-        result = re.search(r'Date:</b>(?P<date>.*)</li>', webpage)
+        result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
         if result is None:
             self._downloader.to_stderr(u'WARNING: unable to extract video date')
             upload_date = None
@@ -3739,9 +3730,9 @@ class YouPornIE(InfoExtractor):
             upload_date = result.group('date').strip()
 
         # Get the video uploader
-        result = re.search(r'Submitted:</b>(?P<uploader>.*)</li>', webpage)
+        result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
         if result is None:
-            self._downloader.to_stderr(u'ERROR: unable to extract uploader')
+            self._downloader.to_stderr(u'WARNING: unable to extract uploader')
             video_uploader = None
         else:
             video_uploader = result.group('uploader').strip()