'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
# Dash webm
- '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
- '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
- '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
- '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
- '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
- '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
- '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
- '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
- '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
- '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
- '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
- '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
- '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
+ '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+ '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+ '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+ '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+ '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+ '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+ '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash webm audio
- '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
- '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
+ '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
+ '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
# RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'},
u"uploader": u"Philipp Hagemeister",
u"uploader_id": u"phihag",
u"upload_date": u"20121002",
- u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+ u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
+ u"categories": [u'Science & Technology'],
}
},
{
u'id': u'IB3lcPjvWLA',
u'ext': u'm4a',
u'title': u'Afrojack - The Spark ft. Spree Wilson',
- u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
+ u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
u'uploader': u'AfrojackVEVO',
u'uploader_id': u'AfrojackVEVO',
u'upload_date': u'20131011',
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
- r'signature=([a-zA-Z]+)', jscode,
+ r'signature=([$a-zA-Z]+)', jscode,
u'Initial JS player signature function name')
jsi = JSInterpreter(jscode)
# title
if 'title' in video_info:
- video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
+ video_title = video_info['title'][0]
else:
self._downloader.report_warning(u'Unable to extract video title')
video_title = u'_'
# upload date
upload_date = None
- mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
+ mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
+ if mobj is None:
+ mobj = re.search(
+ r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
+ video_webpage)
if mobj is not None:
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date)
+ m_cat_container = get_element_by_id("eow-category", video_webpage)
+ if m_cat_container:
+ category = self._html_search_regex(
+ r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
+ default=None)
+ video_categories = None if category is None else [category]
+ else:
+ video_categories = None
+
# description
video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
+ 'categories': video_categories,
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
| p/
)
(
- (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
+ (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
.*
|
- ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
+ ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
title_span = (search_title('playlist-title') or
search_title('title long-title') or search_title('title'))
title = clean_html(title_span)
- video_re = r'''(?x)data-video-username="(.*?)".*?
+ video_re = r'''(?x)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id)
- matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
- # Some of the videos may have been deleted, their username field is empty
- ids = [video_id for (username, video_id) in matches if username]
+ ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)
page = self._download_webpage(url, playlist_id)
more_widget_html = content_html = page
+ # Check if the playlist exists or is private
+ if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
+ raise ExtractorError(
+ u'The playlist doesn\'t exist or is private, use --username or '
+ '--netrc to access it.',
+ expected=True)
+
# Extract the video ids from the playlist pages
ids = []
feed_entries.extend(
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in ids)
- if info['paging'] is None:
+ mobj = re.search(
+ r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
+ feed_html)
+ if mobj is None:
break
- paging = info['paging']
+ paging = mobj.group('paging')
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):