X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fexpressen.py;h=dc8b855d2332521b505a53a49737e9364296876e;hb=HEAD;hp=f61178012feb4db017e3c96f18ec4959ac3f4f42;hpb=734d461ca04a9f271dd463aa75d44ac82377057e;p=youtube-dl diff --git a/youtube_dl/extractor/expressen.py b/youtube_dl/extractor/expressen.py index f61178012..dc8b855d2 100644 --- a/youtube_dl/extractor/expressen.py +++ b/youtube_dl/extractor/expressen.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( determine_ext, @@ -11,7 +13,13 @@ from ..utils import ( class ExpressenIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?(?:expressen|di)\.se/ + (?:(?:tvspelare/video|videoplayer/embed)/)? + tv/(?:[^/]+/)* + (?P[^/?#&]+) + ''' _TESTS = [{ 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/', 'md5': '2fbbe3ca14392a6b1b36941858d33a45', @@ -28,8 +36,24 @@ class ExpressenIE(InfoExtractor): }, { 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/', 'only_matching': True, + }, { + 'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', + 'only_matching': True, + }, { + 'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', + 'only_matching': True, + }, { + 'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', + 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1', + webpage)] + def _real_extract(self, url): display_id = self._match_id(url) @@ -61,8 +85,8 @@ class ExpressenIE(InfoExtractor): title = info.get('titleRaw') or data['title'] description = info.get('descriptionRaw') thumbnail = info.get('socialMediaImage') or data.get('image') - duration = int_or_none(info.get('videoTotalSecondsDuration') or - data.get('totalSecondsDuration')) + duration = int_or_none(info.get('videoTotalSecondsDuration') + or data.get('totalSecondsDuration')) timestamp = unified_timestamp(info.get('publishDate')) return {