X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fdbtv.py;h=aaedf2e3d36084d90699277a52aebb6c3a71a335;hb=HEAD;hp=cf76dbf0533652de1e22fbfabebb904238c3aeec;hpb=f063a04f079f7af0078a02da39586b5e71a6c0b1;p=youtube-dl diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index cf76dbf05..aaedf2e3d 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -5,72 +5,53 @@ import re from .common import InfoExtractor -from ..utils import ( - ExtractorError -) class DBTVIE(InfoExtractor): - _VALID_URL = r'http://dbtv.no/(?P[0-9]+)/?(?P.*)$' - _TEST = { - 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', - 'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', - 'info_dict': { - 'id': '3649835190001', - 'ext': 'mp4', - 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', - 'description': 'md5:d681bf2bb7dd3503892cedb9c2d0e6f2', - 'thumbnail': 'http://gfx.dbtv.no/thumbs/still/33100.jpg', - 'timestamp': 1404039863, - 'upload_date': '20140629', - 'duration': 69544, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - # Download JSON file containing video info. - data = self._download_json('http://api.dbtv.no/discovery/%s' % video_id, video_id, 'Downloading media JSON') - # We only want the first video in the JSON API file. - video = data['playlist'][0] - - # Check for full HD video, else use the standard video URL - for i in range(0, len(video['renditions'])): - if int(video['renditions'][i]['width']) == 1280: - video_url = video['renditions'][i]['URL'] - break - else: - video_url = video['URL'] - - # Add access token to image or it will fail. - thumbnail = video['splash'] - - # Duration int. - duration = int(video['length']) - - # Timestamp is given in milliseconds. - timestamp = float(str(video['publishedAt'])[0:-3]) - - formats = [] - - # Video URL. - if video['URL'] is not None: - formats.append({ - 'url': video_url, - 'format_id': 'mp4', - 'ext': 'mp4' - }) - else: - raise ExtractorError('No download URL found for video: %s.' % video_id, expected=True) - - return { - 'id': video_id, - 'title': video['title'], - 'description': video['desc'], - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'view_count': video['views'], - 'formats': formats, - } + _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P[^/]+))/)?(?P[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})' + _TESTS = [{ + 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/', + 'md5': 'b8f850ba1860adbda668d367f9b77699', + 'info_dict': { + 'id': 'PynxJnNWChE', + 'ext': 'mp4', + 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', + 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', + 'thumbnail': r're:https?://.*\.jpg', + 'upload_date': '20160916', + 'duration': 69, + 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', + 'uploader': 'Dagbladet', + }, + 'add_ie': ['Youtube'] + }, { + 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', + 'only_matching': True, + }, { + 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r']+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1', + webpage)] + + def _real_extract(self, url): + display_id, video_id = re.match(self._VALID_URL, url).groups() + info = { + '_type': 'url_transparent', + 'id': video_id, + 'display_id': display_id, + } + if len(video_id) == 11: + info.update({ + 'url': video_id, + 'ie_key': 'Youtube', + }) + else: + info.update({ + 'url': 'jwplatform:' + video_id, + 'ie_key': 'JWPlatform', + }) + return info