X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcondenast.py;h=d6949ca28f9605fc69c0ee2adb032217903da393;hb=3c63e1bb5740484dae483595a87325e8cf9c7e7b;hp=91c1c1348f587798131459676ffe5444727c5c3b;hpb=0f2999fe2b352795d54e6fcc4027e6a64ce5bc1d;p=youtube-dl diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 91c1c1348..d6949ca28 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -2,15 +2,16 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - orderedSet, compat_urllib_parse_urlparse, compat_urlparse, ) +from ..utils import ( + orderedSet, +) class CondeNastIE(InfoExtractor): @@ -22,22 +23,38 @@ class CondeNastIE(InfoExtractor): # The keys are the supported sites and the values are the name to be shown # to the user and in the extractor description. _SITES = { - 'wired': 'WIRED', + 'allure': 'Allure', + 'architecturaldigest': 'Architectural Digest', + 'arstechnica': 'Ars Technica', + 'bonappetit': 'Bon Appétit', + 'brides': 'Brides', + 'cnevids': 'Condé Nast', + 'cntraveler': 'Condé Nast Traveler', + 'details': 'Details', + 'epicurious': 'Epicurious', + 'glamour': 'Glamour', + 'golfdigest': 'Golf Digest', 'gq': 'GQ', + 'newyorker': 'The New Yorker', + 'self': 'SELF', + 'teenvogue': 'Teen Vogue', + 'vanityfair': 'Vanity Fair', 'vogue': 'Vogue', - 'glamour': 'Glamour', + 'wired': 'WIRED', 'wmagazine': 'W Magazine', - 'vanityfair': 'Vanity Fair', } - _VALID_URL = r'http://(video|www)\.(?P%s)\.com/(?Pwatch|series|video)/(?P.+)' % '|'.join(_SITES.keys()) + _VALID_URL = r'http://(?:video|www|player)\.(?P%s)\.com/(?Pwatch|series|video|embed)/(?P[^/?#]+)' % '|'.join(_SITES.keys()) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) + EMBED_URL = r'(?:https?:)?//player\.(?P%s)\.com/(?Pembed)/.+?' % '|'.join(_SITES.keys()) + _TEST = { 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', - 'file': '5171b343c2b4c00dd0c1ccb3.mp4', 'md5': '1921f713ed48aabd715691f774c451f7', 'info_dict': { + 'id': '5171b343c2b4c00dd0c1ccb3', + 'ext': 'mp4', 'title': '3D Printed Speakers Lit With LED', 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', } @@ -55,12 +72,16 @@ class CondeNastIE(InfoExtractor): entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] return self.playlist_result(entries, playlist_title=title) - def _extract_video(self, webpage): - description = self._html_search_regex([r'
(.+?)
', - r'
(.+?)
', - ], - webpage, 'description', - fatal=False, flags=re.DOTALL) + def _extract_video(self, webpage, url_type): + if url_type != 'embed': + description = self._html_search_regex( + [ + r'
(.+?)
', + r'
(.+?)
', + ], + webpage, 'description', fatal=False, flags=re.DOTALL) + else: + description = None params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, 'player params', flags=re.DOTALL) video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') @@ -76,8 +97,8 @@ class CondeNastIE(InfoExtractor): info_url = base_info_url + data info_page = self._download_webpage(info_url, video_id, 'Downloading video info') - video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info') - video_info = json.loads(video_info) + video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info') + video_info = self._parse_json(video_info, video_id) formats = [{ 'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']), @@ -99,12 +120,12 @@ class CondeNastIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) site = mobj.group('site') url_type = mobj.group('type') - id = mobj.group('id') + item_id = mobj.group('id') - self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site]) - webpage = self._download_webpage(url, id) + self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site]) + webpage = self._download_webpage(url, item_id) if url_type == 'series': return self._extract_series(url, webpage) else: - return self._extract_video(webpage) + return self._extract_video(webpage, url_type)