[extractor/common] use compat_parse_qs in update_url_params

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 14f57563529ad5782b3c70c24d4760c78c3b334d..a95387cee4d2e601a1c42d4c784c5cce96735026 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -20,6 +20,7 @@ from ..compat import (
      compat_urllib_error,
      compat_urllib_parse,
      compat_urlparse,
+    compat_parse_qs,
      compat_str,
      compat_etree_fromstring,
  )
@@ -157,12 +158,14 @@ class InfoExtractor(object):
      thumbnail:      Full URL to a video thumbnail image.
      description:    Full video description.
      uploader:       Full name of the video uploader.
+    license:        License name the video is licensed under.
      creator:        The main artist who created the video.
      release_date:   The date (YYYYMMDD) when the video was released.
      timestamp:      UNIX timestamp of the moment the video became available.
      upload_date:    Video upload date (YYYYMMDD).
                      If not explicitly set, calculated from timestamp.
      uploader_id:    Nickname or id of the video uploader.
+    uploader_url:   Full URL to a personal webpage of the video uploader.
      location:       Physical location where the video was filmed.
      subtitles:      The available subtitles as a dictionary in the format
                      {language: subformats}. "subformats" is a list sorted from
@@ -515,6 +518,13 @@ class InfoExtractor(object):
              else:
                  self.report_warning(errmsg + str(ve))
  
+    def update_url_params(self, url, params):
+        parsed_url = compat_urlparse.urlparse(url)
+        qs = compat_parse_qs(parsed_url.query)
+        qs.update(params)
+        return compat_urlparse.urlunparse(
+            parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
+
      def report_warning(self, msg, video_id=None):
          idstr = '' if video_id is None else '%s: ' % video_id
          self._downloader.report_warning(
@@ -1033,11 +1043,21 @@ class InfoExtractor(object):
              return []
          m3u8_doc, urlh = res
          m3u8_url = urlh.geturl()
-        # A Media Playlist Tag MUST NOT appear in a Master Playlist
-        # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
-        # The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
-        # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
-        if '#EXT-X-TARGETDURATION' in m3u8_doc:
+
+        # We should try extracting formats only from master playlists [1], i.e.
+        # playlists that describe available qualities. On the other hand media
+        # playlists [2] should be returned as is since they contain just the media
+        # without qualities renditions.
+        # Fortunately, master playlist can be easily distinguished from media
+        # playlist based on particular tags availability. As of [1, 2] master
+        # playlist tags MUST NOT appear in a media playist and vice versa.
+        # As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist
+        # and MUST NOT appear in master playlist thus we can clearly detect media
+        # playlist with this criterion.
+        # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4
+        # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
+        # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
+        if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is
              return [{
                  'url': m3u8_url,
                  'format_id': m3u8_id,
@@ -1084,19 +1104,29 @@ class InfoExtractor(object):
                      'protocol': entry_protocol,
                      'preference': preference,
                  }
-                codecs = last_info.get('CODECS')
-                if codecs:
-                    # TODO: looks like video codec is not always necessarily goes first
-                    va_codecs = codecs.split(',')
-                    if va_codecs[0]:
-                        f['vcodec'] = va_codecs[0]
-                    if len(va_codecs) > 1 and va_codecs[1]:
-                        f['acodec'] = va_codecs[1]
                  resolution = last_info.get('RESOLUTION')
                  if resolution:
                      width_str, height_str = resolution.split('x')
                      f['width'] = int(width_str)
                      f['height'] = int(height_str)
+                codecs = last_info.get('CODECS')
+                if codecs:
+                    vcodec, acodec = [None] * 2
+                    va_codecs = codecs.split(',')
+                    if len(va_codecs) == 1:
+                        # Audio only entries usually come with single codec and
+                        # no resolution. For more robustness we also check it to
+                        # be mp4 audio.
+                        if not resolution and va_codecs[0].startswith('mp4a'):
+                            vcodec, acodec = 'none', va_codecs[0]
+                        else:
+                            vcodec = va_codecs[0]
+                    else:
+                        vcodec, acodec = va_codecs[:2]
+                    f.update({
+                        'acodec': acodec,
+                        'vcodec': vcodec,
+                    })
                  if last_media is not None:
                      f['m3u8_media'] = last_media
                      last_media = None
@@ -1600,6 +1630,15 @@ class InfoExtractor(object):
      def _get_automatic_captions(self, *args, **kwargs):
          raise NotImplementedError('This method must be implemented by subclasses')
  
+    def mark_watched(self, *args, **kwargs):
+        if (self._downloader.params.get('mark_watched', False) and
+                (self._get_login_info()[0] is not None or
+                    self._downloader.params.get('cookiefile') is not None)):
+            self._mark_watched(*args, **kwargs)
+
+    def _mark_watched(self, *args, **kwargs):
+        raise NotImplementedError('This method must be implemented by subclasses')
+
  
  class SearchInfoExtractor(InfoExtractor):
      """