[extractor/common] Respect URL query in _extract_wowza_formats (closes #14645)

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index b4af3f987a4fadd0085a18e50b7909ec4eb5e6e5..a67ac441191ae0d2342f3bb5979589a0e5004240 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1401,7 +1401,7 @@ class InfoExtractor(object):
              media_url = media.get('URI')
              if media_url:
                  format_id = []
-                for v in (group_id, name):
+                for v in (m3u8_id, group_id, name):
                      if v:
                          format_id.append(v)
                  f = {
@@ -1920,7 +1920,7 @@ class InfoExtractor(object):
                              # can't be used at the same time
                              if '%(Number' in media_template and 's' not in representation_ms_info:
                                  segment_duration = None
-                                if 'total_number' not in representation_ms_info and 'segment_duration':
+                                if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
                                      segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
                                      representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                                  representation_ms_info['fragments'] = [{
@@ -2186,7 +2186,10 @@ class InfoExtractor(object):
                      if is_plain_url:
                          # res attribute is not standard but seen several times
                          # in the wild
-                        f['height'] = int_or_none(source_attributes.get('res'))
+                        f.update({
+                            'height': int_or_none(source_attributes.get('res')),
+                            'format_id': source_attributes.get('label'),
+                        })
                          f.update(formats[0])
                          media_info['formats'].append(f)
                      else:
@@ -2230,27 +2233,35 @@ class InfoExtractor(object):
          return formats
  
      def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+        query = compat_urlparse.urlparse(url).query
          url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
          url_base = self._search_regex(
              r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
          http_base_url = '%s:%s' % ('http', url_base)
          formats = []
+
+        def manifest_url(manifest):
+            m_url = '%s/%s' % (http_base_url, manifest)
+            if query:
+                m_url += '?%s' % query
+            return m_url
+
          if 'm3u8' not in skip_protocols:
              formats.extend(self._extract_m3u8_formats(
-                http_base_url + '/playlist.m3u8', video_id, 'mp4',
+                manifest_url('playlist.m3u8'), video_id, 'mp4',
                  m3u8_entry_protocol, m3u8_id='hls', fatal=False))
          if 'f4m' not in skip_protocols:
              formats.extend(self._extract_f4m_formats(
-                http_base_url + '/manifest.f4m',
+                manifest_url('manifest.f4m'),
                  video_id, f4m_id='hds', fatal=False))
          if 'dash' not in skip_protocols:
              formats.extend(self._extract_mpd_formats(
-                http_base_url + '/manifest.mpd',
+                manifest_url('manifest.mpd'),
                  video_id, mpd_id='dash', fatal=False))
          if re.search(r'(?:/smil:|\.smil)', url_base):
              if 'smil' not in skip_protocols:
                  rtmp_formats = self._extract_smil_formats(
-                    http_base_url + '/jwplayer.smil',
+                    manifest_url('jwplayer.smil'),
                      video_id, fatal=False)
                  for rtmp_format in rtmp_formats:
                      rtsp_format = rtmp_format.copy()
@@ -2319,7 +2330,6 @@ class InfoExtractor(object):
              formats = self._parse_jwplayer_formats(
                  video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
                  mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
-            self._sort_formats(formats)
  
              subtitles = {}
              tracks = video_data.get('tracks')
@@ -2336,16 +2346,25 @@ class InfoExtractor(object):
                          'url': self._proto_relative_url(track_url)
                      })
  
-            entries.append({
+            entry = {
                  'id': this_video_id,
-                'title': video_data['title'] if require_title else video_data.get('title'),
+                'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
                  'description': video_data.get('description'),
                  'thumbnail': self._proto_relative_url(video_data.get('image')),
                  'timestamp': int_or_none(video_data.get('pubdate')),
                  'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
                  'subtitles': subtitles,
-                'formats': formats,
-            })
+            }
+            # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
+            if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
+                entry.update({
+                    '_type': 'url_transparent',
+                    'url': formats[0]['url'],
+                })
+            else:
+                self._sort_formats(formats)
+                entry['formats'] = formats
+            entries.append(entry)
          if len(entries) == 1:
              return entries[0]
          else:
@@ -2446,10 +2465,12 @@ class InfoExtractor(object):
                  self._downloader.report_warning(msg)
          return res
  
-    def _set_cookie(self, domain, name, value, expire_time=None):
+    def _set_cookie(self, domain, name, value, expire_time=None, port=None,
+                    path='/', secure=False, discard=False, rest={}, **kwargs):
          cookie = compat_cookiejar.Cookie(
-            0, name, value, None, None, domain, None,
-            None, '/', True, False, expire_time, '', None, None, None)
+            0, name, value, port, port is not None, domain, True,
+            domain.startswith('.'), path, True, secure, expire_time,
+            discard, None, None, rest)
          self._downloader.cookiejar.set_cookie(cookie)
  
      def _get_cookies(self, url):