projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
9a340af
)
[redtube] Fix metadata extraction (closes #15472)
author
Sergey M․
<dstftw@gmail.com>
Fri, 2 Feb 2018 15:32:53 +0000
(22:32 +0700)
committer
Sergey M․
<dstftw@gmail.com>
Fri, 2 Feb 2018 15:32:53 +0000
(22:32 +0700)
youtube_dl/extractor/redtube.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/redtube.py
b/youtube_dl/extractor/redtube.py
index f70a75256c638f4a3ce9cda3b9577176e49f3cca..843e45d3683038a28d5ac64a5c3675b11020f97d 100644
(file)
--- a/
youtube_dl/extractor/redtube.py
+++ b/
youtube_dl/extractor/redtube.py
@@
-46,9
+46,10
@@
class RedTubeIE(InfoExtractor):
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
title = self._html_search_regex(
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
title = self._html_search_regex(
- (r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
- r'videoTitle\s*:\s*(["\'])(?P<title>)\1'),
- webpage, 'title', group='title')
+ (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
+ r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
+ webpage, 'title', group='title',
+ default=None) or self._og_search_title(webpage)
formats = []
sources = self._parse_json(
formats = []
sources = self._parse_json(
@@
-87,12
+88,13
@@
class RedTubeIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
- r'<span[^>]+
class="added-time"[^>]*
>ADDED ([^<]+)<',
+ r'<span[^>]+>ADDED ([^<]+)<',
webpage, 'upload date', fatal=False))
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex(
webpage, 'upload date', fatal=False))
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex(
- r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
+ (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
+ r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'),
webpage, 'view count', fatal=False))
# No self-labeling, but they describe themselves as
webpage, 'view count', fatal=False))
# No self-labeling, but they describe themselves as