2 from __future__ import unicode_literals
4 from .common import InfoExtractor
6 from ..utils import determine_ext
9 class LibraryOfCongressIE(InfoExtractor):
10 _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P<id>[0-9]+)'
12 'url': 'http://loc.gov/item/90716351/',
16 'title': 'Pa\'s trip to Mars /'
20 'skip_download': True,
23 'url': 'https://www.loc.gov/item/97516576/',
24 'only_matching': True,
27 def _real_extract(self, url):
28 video_id = self._match_id(url)
29 webpage = self._download_webpage(url, video_id)
31 self.report_extraction(video_id)
32 json_id = self._search_regex('media-player-([0-9A-Z]{32})', webpage, 'json id')
34 data = self._parse_json(self._download_webpage(
35 'https://media.loc.gov/services/v1/media?id=%s' % json_id,
37 data = data['mediaObject']
39 media_url = data['derivatives'][0]['derivativeUrl']
40 media_url = media_url.replace('rtmp', 'https')
42 is_video = data['mediaType'].lower() == 'v'
43 if not determine_ext(media_url) in ('mp4', 'mp3'):
44 media_url += '.mp4' if is_video else '.mp3'
46 if media_url.index('vod/mp4:') > -1:
47 media_url = media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8'
48 elif url.index('vod/mp3:') > -1:
49 media_url = media_url.replace('vod/mp3:', '')
52 if determine_ext(media_url) == 'm3u8':
53 formats = self._extract_m3u8_formats(media_url, video_id, ext='mp4')
54 elif determine_ext(media_url) is 'mp3':
62 'thumbnail': self._og_search_thumbnail(webpage),
63 'title': self._og_search_title(webpage),