[extractor/common] Check validity of direct URLs

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index d694e818e98dc29939118a6d9fccb8b942b03128..b928e24beed9cd161b2fa84c9b410980268c0061 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -39,6 +39,7 @@ from ..utils import (
      RegexNotFoundError,
      sanitize_filename,
      unescapeHTML,
+    unified_strdate,
      url_basename,
      xpath_text,
      xpath_with_ns,
@@ -152,6 +153,7 @@ class InfoExtractor(object):
      description:    Full video description.
      uploader:       Full name of the video uploader.
      creator:        The main artist who created the video.
+    release_date:   The date (YYYYMMDD) when the video was released.
      timestamp:      UNIX timestamp of the moment the video became available.
      upload_date:    Video upload date (YYYYMMDD).
                      If not explicitly set, calculated from timestamp.
@@ -516,6 +518,12 @@ class InfoExtractor(object):
              '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
              expected=True)
  
+    @staticmethod
+    def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
+        raise ExtractorError(
+            '%s. You might want to use --proxy to workaround.' % msg,
+            expected=True)
+
      # Methods for following #608
      @staticmethod
      def url_result(url, ie=None, video_id=None, video_title=None):
@@ -1037,6 +1045,7 @@ class InfoExtractor(object):
          video_id = os.path.splitext(url_basename(smil_url))[0]
          title = None
          description = None
+        upload_date = None
          for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
              name = meta.attrib.get('name')
              content = meta.attrib.get('content')
@@ -1046,11 +1055,22 @@ class InfoExtractor(object):
                  title = content
              elif not description and name in ('description', 'abstract'):
                  description = content
+            elif not upload_date and name == 'date':
+                upload_date = unified_strdate(content)
+
+        thumbnails = [{
+            'id': image.get('type'),
+            'url': image.get('src'),
+            'width': int_or_none(image.get('width')),
+            'height': int_or_none(image.get('height')),
+        } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
  
          return {
              'id': video_id,
              'title': title or video_id,
              'description': description,
+            'upload_date': upload_date,
+            'thumbnails': thumbnails,
              'formats': formats,
              'subtitles': subtitles,
          }
@@ -1077,7 +1097,7 @@ class InfoExtractor(object):
              if not src:
                  continue
  
-            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+            bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
              filesize = int_or_none(video.get('size') or video.get('fileSize'))
              width = int_or_none(video.get('width'))
              height = int_or_none(video.get('height'))
@@ -1125,7 +1145,7 @@ class InfoExtractor(object):
                  formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
                  continue
  
-            if src_url.startswith('http'):
+            if src_url.startswith('http') and self._is_valid_url(src, video_id):
                  http_count += 1
                  formats.append({
                      'url': src_url,