[tumblr] Add support for pornhub embeds (Closes #5963)

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 28f672e42d130e616a0763fc8fb8d63b3b6df74b..49e4dc7109e151ae124ed1aac15a9762d00eac21 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -23,6 +23,7 @@ from ..compat import (
  )
  from ..utils import (
      age_restricted,
  )
  from ..utils import (
      age_restricted,
+    bug_reports_message,
      clean_html,
      compiled_regex_type,
      ExtractorError,
      clean_html,
      compiled_regex_type,
      ExtractorError,
@@ -46,7 +47,7 @@ class InfoExtractor(object):
      information possibly downloading the video to the file system, among
      other possible outcomes.
  
      information possibly downloading the video to the file system, among
      other possible outcomes.
  
-    The type field determines the the type of the result.
+    The type field determines the type of the result.
      By far the most common value (and the default if _type is missing) is
      "video", which indicates a single video.
  
      By far the most common value (and the default if _type is missing) is
      "video", which indicates a single video.
  
@@ -110,11 +111,8 @@ class InfoExtractor(object):
                                    (quality takes higher priority)
                                   -1 for default (order by other properties),
                                   -2 or smaller for less than default.
                                    (quality takes higher priority)
                                   -1 for default (order by other properties),
                                   -2 or smaller for less than default.
-                    * http_method  HTTP method to use for the download.
                      * http_headers  A dictionary of additional HTTP headers
                                   to add to the request.
                      * http_headers  A dictionary of additional HTTP headers
                                   to add to the request.
-                    * http_post_data  Additional data to send with a POST
-                                 request.
                      * stretched_ratio  If given and not 1, indicates that the
                                   video's pixels are not square.
                                   width : height ratio as float.
                      * stretched_ratio  If given and not 1, indicates that the
                                   video's pixels are not square.
                                   width : height ratio as float.
@@ -556,8 +554,7 @@ class InfoExtractor(object):
          elif fatal:
              raise RegexNotFoundError('Unable to extract %s' % _name)
          else:
          elif fatal:
              raise RegexNotFoundError('Unable to extract %s' % _name)
          else:
-            self._downloader.report_warning('unable to extract %s; '
-                                            'please report this issue on http://yt-dl.org/bug' % _name)
+            self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
              return None
  
      def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
              return None
  
      def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -572,7 +569,7 @@ class InfoExtractor(object):
  
      def _get_login_info(self):
          """
  
      def _get_login_info(self):
          """
-        Get the the login info as (username, password)
+        Get the login info as (username, password)
          It will look in the netrc file using the _NETRC_MACHINE value
          If there's no info available, return (None, None)
          """
          It will look in the netrc file using the _NETRC_MACHINE value
          If there's no info available, return (None, None)
          """
@@ -708,7 +705,7 @@ class InfoExtractor(object):
          return self._html_search_meta('twitter:player', html,
                                        'twitter card player')
  
          return self._html_search_meta('twitter:player', html,
                                        'twitter card player')
  
-    def _sort_formats(self, formats):
+    def _sort_formats(self, formats, field_preference=None):
          if not formats:
              raise ExtractorError('No video formats found')
  
          if not formats:
              raise ExtractorError('No video formats found')
  
@@ -718,6 +715,9 @@ class InfoExtractor(object):
              if not f.get('ext') and 'url' in f:
                  f['ext'] = determine_ext(f['url'])
  
              if not f.get('ext') and 'url' in f:
                  f['ext'] = determine_ext(f['url'])
  
+            if isinstance(field_preference, (list, tuple)):
+                return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
+
              preference = f.get('preference')
              if preference is None:
                  proto = f.get('protocol')
              preference = f.get('preference')
              if preference is None:
                  proto = f.get('protocol')
@@ -764,7 +764,7 @@ class InfoExtractor(object):
                  f.get('fps') if f.get('fps') is not None else -1,
                  f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
                  f.get('source_preference') if f.get('source_preference') is not None else -1,
                  f.get('fps') if f.get('fps') is not None else -1,
                  f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
                  f.get('source_preference') if f.get('source_preference') is not None else -1,
-                f.get('format_id'),
+                f.get('format_id') if f.get('format_id') is not None else '',
              )
          formats.sort(key=_formats_key)
  
              )
          formats.sort(key=_formats_key)
  
@@ -786,8 +786,8 @@ class InfoExtractor(object):
              return True
          except ExtractorError as e:
              if isinstance(e.cause, compat_HTTPError):
              return True
          except ExtractorError as e:
              if isinstance(e.cause, compat_HTTPError):
-                self.report_warning(
-                    '%s URL is invalid, skipping' % item, video_id)
+                self.to_screen(
+                    '%s: %s URL is invalid, skipping' % (video_id, item))
                  return False
              raise
  
                  return False
              raise
  
@@ -846,7 +846,7 @@ class InfoExtractor(object):
  
      def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
                                entry_protocol='m3u8', preference=None,
  
      def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
                                entry_protocol='m3u8', preference=None,
-                              m3u8_id=None):
+                              m3u8_id=None, note=None, errnote=None):
  
          formats = [{
              'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
  
          formats = [{
              'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
@@ -865,8 +865,8 @@ class InfoExtractor(object):
  
          m3u8_doc = self._download_webpage(
              m3u8_url, video_id,
  
          m3u8_doc = self._download_webpage(
              m3u8_url, video_id,
-            note='Downloading m3u8 information',
-            errnote='Failed to download m3u8 information')
+            note=note or 'Downloading m3u8 information',
+            errnote=errnote or 'Failed to download m3u8 information')
          last_info = None
          last_media = None
          kv_rex = re.compile(
          last_info = None
          last_media = None
          kv_rex = re.compile(
@@ -896,7 +896,7 @@ class InfoExtractor(object):
                  format_id = []
                  if m3u8_id:
                      format_id.append(m3u8_id)
                  format_id = []
                  if m3u8_id:
                      format_id.append(m3u8_id)
-                last_media_name = last_media.get('NAME') if last_media else None
+                last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
                  format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
                  f = {
                      'format_id': '-'.join(format_id),
                  format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
                  f = {
                      'format_id': '-'.join(format_id),
@@ -1072,9 +1072,6 @@ class InfoExtractor(object):
      def _get_automatic_captions(self, *args, **kwargs):
          raise NotImplementedError("This method must be implemented by subclasses")
  
      def _get_automatic_captions(self, *args, **kwargs):
          raise NotImplementedError("This method must be implemented by subclasses")
  
-    def _subtitles_timecode(self, seconds):
-        return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
-
  
  class SearchInfoExtractor(InfoExtractor):
      """
  
  class SearchInfoExtractor(InfoExtractor):
      """