[youtube] replace youtube redirect urls in description(fixes #14517)

author Remita Amine <remitamine@gmail.com>

Tue, 17 Oct 2017 10:07:37 +0000 (10:07 +0000)

committer Remita Amine <remitamine@gmail.com>

Tue, 17 Oct 2017 10:07:37 +0000 (10:07 +0000)
author Remita Amine <remitamine@gmail.com>
Tue, 17 Oct 2017 10:07:37 +0000 (10:07 +0000)
committer Remita Amine <remitamine@gmail.com>
Tue, 17 Oct 2017 10:07:37 +0000 (10:07 +0000)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 4e8db240d3f9d141cfa457c1d941d1b5399f0c67..5aef555fb9f093bbc27faacd508f8042d706b20e 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1622,6 +1622,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          # description
          description_original = video_description = get_element_by_id("eow-description", video_webpage)
          if video_description:
          # description
          description_original = video_description = get_element_by_id("eow-description", video_webpage)
          if video_description:
+
+            def replace_url(m):
+                redir_url = compat_urlparse.urljoin(url, m.group(1))
+                parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
+                if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
+                    qs = compat_parse_qs(parsed_redir_url.query)
+                    q = qs.get('q')
+                    if q and q[0]:
+                        return q[0]
+                return redir_url
+
              description_original = video_description = re.sub(r'''(?x)
                  <a\s+
                      (?:[a-zA-Z-]+="[^"]*"\s+)*?
              description_original = video_description = re.sub(r'''(?x)
                  <a\s+
                      (?:[a-zA-Z-]+="[^"]*"\s+)*?
@@ -1630,7 +1641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                      class="[^"]*"[^>]*>
                  [^<]+\.{3}\s*
                  </a>
                      class="[^"]*"[^>]*>
                  [^<]+\.{3}\s*
                  </a>
-            ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description)
+            ''', replace_url, video_description)
              video_description = clean_html(video_description)
          else:
              fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
              video_description = clean_html(video_description)
          else:
              fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
author	Remita Amine <remitamine@gmail.com>
	Tue, 17 Oct 2017 10:07:37 +0000 (10:07 +0000)
committer	Remita Amine <remitamine@gmail.com>
	Tue, 17 Oct 2017 10:07:37 +0000 (10:07 +0000)