Merge remote-tracking branch 'sagittarian/vimeo-no-desc'
[youtube-dl] / youtube_dl / InfoExtractors.py
index 71f57b7c900924c309ecdcbdd134c320e67b63cd..0807306609bcde085046b68b8486b5b8f40a1d11 100755 (executable)
@@ -253,11 +253,11 @@ class YoutubeIE(InfoExtractor):
         try:
             sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
+            return (u'unable to download video subtitles: %s' % compat_str(err), None)
         sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
         sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
         if not sub_lang_list:
-            return (u'WARNING: video doesn\'t have subtitles', None)
+            return (u'video doesn\'t have subtitles', None)
         return sub_lang_list
 
     def _list_available_subtitles(self, video_id):
@@ -265,6 +265,10 @@ class YoutubeIE(InfoExtractor):
         self.report_video_subtitles_available(video_id, sub_lang_list)
 
     def _request_subtitle(self, sub_lang, sub_name, video_id, format):
+        """
+        Return tuple:
+        (error_message, sub_lang, sub)
+        """
         self.report_video_subtitles_request(video_id, sub_lang, format)
         params = compat_urllib_parse.urlencode({
             'lang': sub_lang,
@@ -276,9 +280,9 @@ class YoutubeIE(InfoExtractor):
         try:
             sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
+            return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
         if not sub:
-            return (u'WARNING: Did not fetch video subtitles', None)
+            return (u'Did not fetch video subtitles', None, None)
         return (None, sub_lang, sub)
 
     def _extract_subtitle(self, video_id):
@@ -297,7 +301,7 @@ class YoutubeIE(InfoExtractor):
         else:
             sub_lang = list(sub_lang_list.keys())[0]
         if not sub_lang in sub_lang_list:
-            return [(u'WARNING: no closed captions found in the specified language "%s"' % sub_lang, None, None)]
+            return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
 
         subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
         return [subtitle]
@@ -305,6 +309,8 @@ class YoutubeIE(InfoExtractor):
     def _extract_all_subtitles(self, video_id):
         sub_lang_list = self._get_available_subtitles(video_id)
         sub_format = self._downloader.params.get('subtitlesformat')
+        if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
+            return [(sub_lang_list[0], None, None)]
         subtitles = []
         for sub_lang in sub_lang_list:
             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
@@ -538,14 +544,14 @@ class YoutubeIE(InfoExtractor):
             if video_subtitles:
                 (sub_error, sub_lang, sub) = video_subtitles[0]
                 if sub_error:
-                    self._downloader.trouble(sub_error)
+                    self._downloader.report_error(sub_error)
 
         if self._downloader.params.get('allsubtitles', False):
             video_subtitles = self._extract_all_subtitles(video_id)
             for video_subtitle in video_subtitles:
                 (sub_error, sub_lang, sub) = video_subtitle
                 if sub_error:
-                    self._downloader.trouble(sub_error)
+                    self._downloader.report_error(sub_error)
 
         if self._downloader.params.get('listsubtitles', False):
             sub_lang_list = self._list_available_subtitles(video_id)
@@ -1124,7 +1130,7 @@ class VimeoIE(InfoExtractor):
         # Extract video description
         video_description = get_element_by_attribute("itemprop", "description", webpage)
         if video_description: video_description = clean_html(video_description)
-        else: video_description = ''
+        else: video_description = u''
 
         # Extract upload date
         video_upload_date = None
@@ -1716,9 +1722,7 @@ class YoutubePlaylistIE(InfoExtractor):
                         (?:
                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
                            \? (?:.*?&)*? (?:p|a|list)=
-                        |  user/.*?/user/
                         |  p/
-                        |  user/.*?#[pg]/c/
                         )
                         ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,})
                         .*
@@ -3802,7 +3806,7 @@ class WorldStarHipHopIE(InfoExtractor):
         _title = r"""<title>(.*)</title>"""
 
         mobj = re.search(_title, webpage_src)
-        
+
         if mobj is not None:
             title = mobj.group(1)
         else:
@@ -3820,7 +3824,7 @@ class WorldStarHipHopIE(InfoExtractor):
             if mobj is not None:
                 title = mobj.group(1)
             thumbnail = None
-        
+
         results = [{
                     'id': video_id,
                     'url' : video_url,
@@ -4352,6 +4356,46 @@ class LiveLeakIE(InfoExtractor):
 
         return [info]
 
+class ARDIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+    _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
+    _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
+
+    def _real_extract(self, url):
+        # determine video id from url
+        m = re.match(self._VALID_URL, url)
+
+        numid = re.search(r'documentId=([0-9]+)', url)
+        if numid:
+            video_id = numid.group(1)
+        else:
+            video_id = m.group('video_id')
+
+        # determine title and media streams from webpage
+        html = self._download_webpage(url, video_id)
+        title = re.search(self._TITLE, html).group('title')
+        streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
+        if not streams:
+            assert '"fsk"' in html
+            self._downloader.report_error(u'this video is only available after 8:00 pm')
+            return
+
+        # choose default media type and highest quality for now
+        stream = max([s for s in streams if int(s["media_type"]) == 0],
+                     key=lambda s: int(s["quality"]))
+
+        # there's two possibilities: RTMP stream or HTTP download
+        info = {'id': video_id, 'title': title, 'ext': 'mp4'}
+        if stream['rtmp_url']:
+            self._downloader.to_screen(u'[%s] RTMP download detected' % self.IE_NAME)
+            assert stream['video_url'].startswith('mp4:')
+            info["url"] = stream["rtmp_url"]
+            info["play_path"] = stream['video_url']
+        else:
+            assert stream["video_url"].endswith('.mp4')
+            info["url"] = stream["video_url"]
+        return [info]
+
 
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
@@ -4405,5 +4449,6 @@ def gen_extractors():
         MySpassIE(),
         SpiegelIE(),
         LiveLeakIE(),
+        ARDIE(),
         GenericIE()
     ]