projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
6b9cbd0
)
[youtube] replace youtube redirect urls in description(fixes #14517)
author
Remita Amine
<remitamine@gmail.com>
Tue, 17 Oct 2017 10:07:37 +0000
(10:07 +0000)
committer
Remita Amine
<remitamine@gmail.com>
Tue, 17 Oct 2017 10:07:37 +0000
(10:07 +0000)
youtube_dl/extractor/youtube.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/youtube.py
b/youtube_dl/extractor/youtube.py
index 4e8db240d3f9d141cfa457c1d941d1b5399f0c67..5aef555fb9f093bbc27faacd508f8042d706b20e 100644
(file)
--- a/
youtube_dl/extractor/youtube.py
+++ b/
youtube_dl/extractor/youtube.py
@@
-1622,6
+1622,17
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
# description
description_original = video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
# description
description_original = video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
+
+ def replace_url(m):
+ redir_url = compat_urlparse.urljoin(url, m.group(1))
+ parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
+ if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
+ qs = compat_parse_qs(parsed_redir_url.query)
+ q = qs.get('q')
+ if q and q[0]:
+ return q[0]
+ return redir_url
+
description_original = video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
description_original = video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
@@
-1630,7
+1641,7
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
class="[^"]*"[^>]*>
[^<]+\.{3}\s*
</a>
class="[^"]*"[^>]*>
[^<]+\.{3}\s*
</a>
- ''',
lambda m: compat_urlparse.urljoin(url, m.group(1))
, video_description)
+ ''',
replace_url
, video_description)
video_description = clean_html(video_description)
else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
video_description = clean_html(video_description)
else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)