Merge branch 'lecture2go' of https://github.com/nichdu/youtube-dl into nichdu-lecture2go

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 3c629d38a1c7cea5f0b45d600c41f9d9ff658873..3d8b31f9830b218e02021b41214010b4f61d29a9 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -17,6 +17,8 @@ from ..compat import (
      compat_chr,
      compat_parse_qs,
      compat_urllib_parse,
+    compat_urllib_parse_unquote,
+    compat_urllib_parse_unquote_plus,
      compat_urllib_request,
      compat_urlparse,
      compat_str,
@@ -533,7 +535,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'dorappi2000',
                  'formats': 'mincount:33',
              },
-        }
+        },
+        # DASH manifest with segment_list
+        {
+            'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
+            'md5': '8ce563a1d667b599d21064e982ab9e31',
+            'info_dict': {
+                'id': 'CsmdDsKjzN8',
+                'ext': 'mp4',
+                'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
+                'uploader': 'Airtek',
+                'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
+                'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
+                'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
+            },
+            'params': {
+                'youtube_include_dash_manifest': True,
+                'format': '135',  # bestvideo
+            }
+        },
      ]
  
      def __init__(self, *args, **kwargs):
@@ -824,6 +844,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                      # TODO implement WebVTT downloading
                      pass
                  elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+                    segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
                      format_id = r.attrib['id']
                      video_url = url_el.text
                      filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
@@ -837,6 +858,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          'filesize': filesize,
                          'fps': int_or_none(r.attrib.get('frameRate')),
                      }
+                    if segment_list is not None:
+                        f.update({
+                            'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
+                            'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
+                            'protocol': 'http_dash_segments',
+                        })
                      try:
                          existing_format = next(
                              fo for fo in formats
@@ -865,7 +892,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          # Extract original video URL from URL with redirection, like age verification, using next_url parameter
          mobj = re.search(self._NEXT_URL_RE, url)
          if mobj:
-            url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+            url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
          video_id = self.extract_id(url)
  
          # Get video webpage
@@ -888,6 +915,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
  
          # Get video info
          embed_webpage = None
+        is_live = None
          if re.search(r'player-age-gate-content">', video_webpage) is not None:
              age_gate = True
              # We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -920,6 +948,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                      # Convert to the same format returned by compat_parse_qs
                      video_info = dict((k, [v]) for k, v in args.items())
                      add_dash_mpd(video_info)
+                if args.get('livestream') == '1' or args.get('live_playback') == 1:
+                    is_live = True
              if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                  # We also try looking in get_video_info since it may contain different dashmpd
                  # URL that points to a DASH manifest with possibly different itag set (some itags
@@ -973,7 +1003,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          # uploader
          if 'author' not in video_info:
              raise ExtractorError('Unable to extract uploader name')
-        video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
+        video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
  
          # uploader_id
          video_uploader_id = None
@@ -1000,7 +1030,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              self._downloader.report_warning('unable to extract video thumbnail')
              video_thumbnail = None
          else:   # don't panic if we can't find it
-            video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
+            video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
  
          # upload date
          upload_date = self._html_search_meta(
@@ -1062,7 +1092,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              self._downloader.report_warning('unable to extract video duration')
              video_duration = None
          else:
-            video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
+            video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
  
          # annotations
          video_annotations = None
@@ -1224,6 +1254,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              'dislike_count': dislike_count,
              'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
              'formats': formats,
+            'is_live': is_live,
          }
  
  
@@ -1609,7 +1640,7 @@ class YoutubeSearchURLIE(InfoExtractor):
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        query = compat_urllib_parse.unquote_plus(mobj.group('query'))
+        query = compat_urllib_parse_unquote_plus(mobj.group('query'))
  
          webpage = self._download_webpage(url, query)
          result_code = self._search_regex(