-# encoding: utf-8
-import re
-import json
+from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- unified_strdate,
+ int_or_none,
+ parse_iso8601,
)
-class ComCarCoffIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]+)/?'
- _TESTS = [
- {
- 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
- 'info_dict': {
- 'id': 'miranda-sings-happy-thanksgiving-miranda',
- 'upload_date': '20141127',
- 'title': 'Happy Thanksgiving Miranda',
- 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
- 'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
- },
+
+class CCCIE(InfoExtractor):
+ IE_NAME = 'media.ccc.de'
+ _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
+ 'md5': '3a1eda8f3a29515d27f5adb967d7e740',
+ 'info_dict': {
+ 'id': '1839',
+ 'ext': 'mp4',
+ 'title': 'Introduction to Processor Design',
+ 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20131228',
+ 'timestamp': 1388188800,
+ 'duration': 3710,
}
- ]
+ }, {
+ 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+ event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id')
+ event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
- full_data = json.loads(self._search_regex(
- r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
- webpage, 'json'))
-
- video_id = full_data['activeVideo']['video']
- video_data = full_data['videos'][video_id]
+ formats = []
+ for recording in event_data.get('recordings', []):
+ recording_url = recording.get('recording_url')
+ if not recording_url:
+ continue
+ language = recording.get('language')
+ folder = recording.get('folder')
+ format_id = None
+ if language:
+ format_id = language
+ if folder:
+ if language:
+ format_id += '-' + folder
+ else:
+ format_id = folder
+ vcodec = 'h264' if 'h264' in folder else (
+ 'none' if folder in ('mp3', 'opus') else None
+ )
+ formats.append({
+ 'format_id': format_id,
+ 'url': recording_url,
+ 'width': int_or_none(recording.get('width')),
+ 'height': int_or_none(recording.get('height')),
+ 'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
+ 'language': language,
+ 'vcodec': vcodec,
+ })
+ self._sort_formats(formats)
return {
- 'id': video_id,
+ 'id': event_id,
'display_id': display_id,
- 'title': video_data['title'],
- 'description': video_data['description'],
- # XXX: the original datum is a full ISO timestamp... why convert it to a worse format?
- 'upload_date': unified_strdate(video_data['pubDate']),
- 'thumbnail': video_data['images']['thumb'],
- # XXX: what do we do with video_data['images']['poster']?
- 'formats': self._extract_m3u8_formats(video_data['mediaUrl'], video_id),
+ 'title': event_data['title'],
+ 'description': event_data.get('description'),
+ 'thumbnail': event_data.get('thumb_url'),
+ 'timestamp': parse_iso8601(event_data.get('date')),
+ 'duration': int_or_none(event_data.get('length')),
+ 'tags': event_data.get('tags'),
+ 'formats': formats,
}