[ard] Remove deleted video test case
[youtube-dl] / youtube_dl / extractor / common.py
index 03f3f18c83012cdced0e305fe1cc02d69a85bb7c..8b4ef3f09a6b0d00d223153e333d6459ff932e6a 100644 (file)
@@ -14,6 +14,7 @@ import xml.etree.ElementTree
 
 from ..compat import (
     compat_cookiejar,
+    compat_HTTPError,
     compat_http_client,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
@@ -26,6 +27,7 @@ from ..utils import (
     compiled_regex_type,
     ExtractorError,
     float_or_none,
+    HEADRequest,
     int_or_none,
     RegexNotFoundError,
     sanitize_filename,
@@ -87,7 +89,8 @@ class InfoExtractor(object):
                     * player_url SWF Player URL (used for rtmpdump).
                     * protocol   The protocol that will be used for the actual
                                  download, lower-case.
-                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
+                                 "http", "https", "rtsp", "rtmp", "rtmpe",
+                                 "m3u8", or "m3u8_native".
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
                                  by this field, regardless of all other values.
@@ -108,15 +111,17 @@ class InfoExtractor(object):
                                   (quality takes higher priority)
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
-                    * http_referer  HTTP Referer header value to set.
                     * http_method  HTTP method to use for the download.
                     * http_headers  A dictionary of additional HTTP headers
                                  to add to the request.
                     * http_post_data  Additional data to send with a POST
                                  request.
                     * stretched_ratio  If given and not 1, indicates that the
-                                       video's pixels are not square.
-                                       width : height ratio as float.
+                                 video's pixels are not square.
+                                 width : height ratio as float.
+                    * no_resume  The server does not support resuming the
+                                 (HTTP or RTMP) download. Boolean.
+
     url:            Final video URL.
     ext:            Video filename extension.
     format:         The video format, defaults to ext (used for --get-format)
@@ -130,7 +135,9 @@ class InfoExtractor(object):
                     something like "4234987", title "Dancing naked mole rats",
                     and display_id "dancing-naked-mole-rats"
     thumbnails:     A list of dictionaries, with the following entries:
+                        * "id" (optional, string) - Thumbnail format ID
                         * "url"
+                        * "preference" (optional, int) - quality of the image
                         * "width" (optional, int)
                         * "height" (optional, int)
                         * "resolution" (optional, string "{width}x{height"},
@@ -712,6 +719,27 @@ class InfoExtractor(object):
             )
         formats.sort(key=_formats_key)
 
+    def _check_formats(self, formats, video_id):
+        if formats:
+            formats[:] = filter(
+                lambda f: self._is_valid_url(
+                    f['url'], video_id,
+                    item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
+                formats)
+
+    def _is_valid_url(self, url, video_id, item='video'):
+        try:
+            self._request_webpage(
+                HEADRequest(url), video_id,
+                'Checking %s URL' % item)
+            return True
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                self.report_warning(
+                    '%s URL is invalid, skipping' % item, video_id)
+                return False
+            raise
+
     def http_scheme(self):
         """ Either "http:" or "https:", depending on the user's preferences """
         return (