from ..utils import (
compat_urllib_parse,
unified_strdate,
+ ExtractorError,
)
'ext': 'mp4',
'title': 'Im Interview: Kai Wiesinger',
'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
- 'upload_date': '20140225',
+ 'upload_date': '20140203',
'duration': 522.56,
},
'params': {
'ext': 'mp4',
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
'description': 'md5:2669cde3febe9bce13904f701e774eb6',
- 'upload_date': '20140225',
+ 'upload_date': '20141014',
'duration': 2410.44,
},
'params': {
'id': '2156342',
'ext': 'mp4',
'title': 'Kurztrips zum Valentinstag',
- 'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528',
- 'upload_date': '20130206',
+ 'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.',
'duration': 307.24,
},
'params': {
'skip_download': True,
},
},
+ {
+ 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
+ 'info_dict': {
+ 'id': '439664',
+ 'title': 'Episode 8 - Ganze Folge - Playlist',
+ 'description': 'Das finale und härteste Duell aller Zeiten ist vorbei! Der Weltmeister für dieses Jahr steht! Alle packenden Duelle der achten Episode von "Joko gegen Klaas - das Duell um die Welt" seht ihr hier noch einmal in voller Länge!',
+ },
+ 'playlist_count': 2,
+ },
]
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)',
+ r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
]
_TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
]
+ _ITEM_TYPE_REGEXES = [
+ r"'itemType'\s*:\s*'([^']*)'",
+ ]
+ _ITEM_ID_REGEXES = [
+ r"'itemId'\s*:\s*'([^']*)'",
+ ]
+ _PLAYLIST_CLIPS_REGEXES = [
+ r'data-qvt=.+?<a href="([^"]+)"',
+ ]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ item_type = self._html_search_regex(self._ITEM_TYPE_REGEXES, webpage, 'item type', default='CLIP')
+ if item_type == 'CLIP':
+ return self._clip_extract(url, webpage)
+ elif item_type == 'PLAYLIST':
+ playlist_id = self._html_search_regex(self._ITEM_ID_REGEXES, webpage, 'playlist id')
+
+ for regex in self._PLAYLIST_CLIPS_REGEXES:
+ playlist_clips = re.findall(regex, webpage, re.DOTALL)
+ if playlist_clips:
+ title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
+ description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
+ root_url = re.match('(.+?//.+?)/', url).group(1)
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': title,
+ 'description': description,
+ 'entries': [self._clip_extract(root_url + clip_path) for clip_path in playlist_clips]
+ }
+ else:
+ raise ExtractorError('Unknown item type "%s"' % item_type)
- page = self._download_webpage(url, video_id, 'Downloading page')
+ def _clip_extract(self, url, webpage=None):
+ if webpage is None:
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
- clip_id = self._html_search_regex(self._CLIPID_REGEXES, page, 'clip id')
+ clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'testclient'
client_name = 'kolibri-1.2.5'
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
- title = self._html_search_regex(self._TITLE_REGEXES, page, 'title')
- description = self._html_search_regex(self._DESCRIPTION_REGEXES, page, 'description', fatal=False)
- thumbnail = self._og_search_thumbnail(page)
+ title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
+ description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex(
- self._UPLOAD_DATE_REGEXES, page, 'upload date', fatal=False))
+ self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
formats = []
urls_sources = urls_sources.values()
def fix_bitrate(bitrate):
- return bitrate / 1000 if bitrate % 1000 == 0 else bitrate
+ return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
for source in urls_sources:
protocol = source['protocol']
'upload_date': upload_date,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }