X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Flibraryofcongress.py;h=49351759ed318bda771a6648f7f33744fe62c4b7;hb=c917106be4d6d98ce7504d71a32b58ddca2bc03d;hp=d311f994624d349d6c3e089753c9f01898bd6ec2;hpb=7f3c3dfa52769d1f44c1f1031449118c564a92bf;p=youtube-dl diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index d311f9946..49351759e 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -13,8 +13,9 @@ from ..utils import ( class LibraryOfCongressIE(InfoExtractor): IE_NAME = 'loc' IE_DESC = 'Library of Congress' - _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P[0-9]+)' + _TESTS = [{ + # embedded via
.+?)\1', r']+id=(["\'])uuid-(?P.+?)\1', - r']+data-uuid=(["\'])(?P.+?)\1'), + r']+data-uuid=(["\'])(?P.+?)\1', + r'mediaObjectId\s*:\s*(["\'])(?P.+?)\1'), webpage, 'media id', group='id') - data = self._parse_json( - self._download_webpage( - 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, - video_id), + data = self._download_json( + 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, video_id)['mediaObject'] derivative = data['derivatives'][0] @@ -74,11 +88,20 @@ class LibraryOfCongressIE(InfoExtractor): duration = float_or_none(data.get('duration')) view_count = int_or_none(data.get('viewCount')) + subtitles = {} + cc_url = data.get('ccUrl') + if cc_url: + subtitles.setdefault('en', []).append({ + 'url': cc_url, + 'ext': 'ttml', + }) + return { 'id': video_id, 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'view_count': view_count, 'formats': formats, + 'subtitles': subtitles, }