Remita Amine
Aurélio A. Heckert
Bernhard Minks
+sceext
+Zach Bruggeman
+Tjark Saul
## Video Selection:
--playlist-start NUMBER Playlist video to start at (default is 1)
--playlist-end NUMBER Playlist video to end at (default is last)
- --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+ --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8"
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
--match-title REGEX Download only matching titles (regex or caseless sub-string)
- **anitube.se**
- **AnySex**
- **Aparat**
+ - **AppleConnect**
- **AppleDaily**: 臺灣蘋果日報
- **AppleTrailers**
- **archive.org**: archive.org videos
- **SportBox**
- **SportBoxEmbed**
- **SportDeutschland**
+ - **Sportschau**
- **Srf**
- **SRMediathek**: Saarländischer Rundfunk
- **SSA**
- **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
- - **vk.com**
- - **vk.com:user-videos**: vk.com:All of a user's videos
+ - **vk**: VK
+ - **vk:uservideos**: VK - User's Videos
- **Vodlocker**
- **VoiceRepublic**
- **Vporn**
self.assertEqual(parse_duration('02:03:04'), 7384)
self.assertEqual(parse_duration('01:02:03:04'), 93784)
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
+ self.assertEqual(parse_duration('87 Min.'), 5220)
def test_fix_xml_ampersands(self):
self.assertEqual(
if req_format is None:
req_format_list = []
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
- info_dict['extractor'] in ['youtube', 'ted']):
+ info_dict['extractor'] in ['youtube', 'ted'] and
+ not info_dict.get('is_live')):
merger = FFmpegMergerPP(self)
if merger.available and merger.can_merge():
req_format_list.append('bestvideo+bestaudio')
from .http import HttpFD
from .rtsp import RtspFD
from .rtmp import RtmpFD
+from .dash import DashSegmentsFD
from ..utils import (
determine_protocol,
'mms': RtspFD,
'rtsp': RtspFD,
'f4m': F4mFD,
+ 'http_dash_segments': DashSegmentsFD,
}
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import FileDownloader
+from ..compat import compat_urllib_request
+
+
+class DashSegmentsFD(FileDownloader):
+ """
+ Download segments in a DASH manifest
+ """
+ def real_download(self, filename, info_dict):
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+ base_url = info_dict['url']
+ segment_urls = info_dict['segment_urls']
+
+ is_test = self.params.get('test', False)
+ remaining_bytes = self._TEST_FILE_SIZE if is_test else None
+ byte_counter = 0
+
+ def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
+ self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
+ req = compat_urllib_request.Request(target_url)
+ if remaining_bytes is not None:
+ req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
+
+ data = self.ydl.urlopen(req).read()
+
+ if remaining_bytes is not None:
+ data = data[:remaining_bytes]
+
+ outf.write(data)
+ return len(data)
+
+ def combine_url(base_url, target_url):
+ if re.match(r'^https?://', target_url):
+ return target_url
+ return '%s/%s' % (base_url, target_url)
+
+ with open(tmpfilename, 'wb') as outf:
+ append_url_to_file(
+ outf, combine_url(base_url, info_dict['initialization_url']),
+ 'initialization segment')
+ for i, segment_url in enumerate(segment_urls):
+ segment_len = append_url_to_file(
+ outf, combine_url(base_url, segment_url),
+ 'segment %d / %d' % (i + 1, len(segment_urls)),
+ remaining_bytes)
+ byte_counter += segment_len
+ if remaining_bytes is not None:
+ remaining_bytes -= segment_len
+ if remaining_bytes <= 0:
+ break
+
+ self.try_rename(tmpfilename, filename)
+
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': byte_counter,
+ 'filename': filename,
+ 'status': 'finished',
+ })
+
+ return True
from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE
-from .ard import ARDIE, ARDMediathekIE
+from .ard import (
+ ARDIE,
+ ARDMediathekIE,
+ SportschauIE,
+)
from .arte import (
ArteTvIE,
ArteTVPlus7IE,
)
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
+from .lecture2go import Lecture2GoIE
from .letv import (
LetvIE,
LetvTvIE,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ str_to_int,
+ ExtractorError
+)
+
+
+class AppleConnectIE(InfoExtractor):
+ _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
+ _TEST = {
+ 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+ 'md5': '10d0f2799111df4cb1c924520ca78f98',
+ 'info_dict': {
+ 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
+ 'ext': 'm4v',
+ 'title': 'Energy',
+ 'uploader': 'Drake',
+ 'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
+ 'upload_date': '20150710',
+ 'timestamp': 1436545535,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ try:
+ video_json = self._html_search_regex(
+ r'class="auc-video-data">(\{.*?\})', webpage, 'json')
+ except ExtractorError:
+ raise ExtractorError('This post doesn\'t contain a video', expected=True)
+
+ video_data = self._parse_json(video_json, video_id)
+ timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
+ like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+
+ return {
+ 'id': video_id,
+ 'url': video_data['sslSrc'],
+ 'title': video_data['title'],
+ 'description': video_data['description'],
+ 'uploader': video_data['artistName'],
+ 'thumbnail': video_data['artworkUrl'],
+ 'timestamp': timestamp,
+ 'like_count': like_count,
+ }
from ..utils import (
determine_ext,
ExtractorError,
+ get_element_by_attribute,
qualities,
int_or_none,
parse_duration,
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TESTS = [{
- 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
- 'only_matching': True,
+ 'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
+ 'info_dict': {
+ 'id': '29582122',
+ 'ext': 'mp4',
+ 'title': 'Ich liebe das Leben trotzdem',
+ 'description': 'md5:45e4c225c72b27993314b31a84a5261c',
+ 'duration': 4557,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}, {
- 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
+ 'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
+ 'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
'info_dict': {
- 'id': '22490580',
+ 'id': '29522730',
'ext': 'mp4',
- 'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
- 'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
+ 'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
+ 'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
+ 'duration': 5252,
},
- 'skip': 'Blocked outside of Germany',
+ }, {
+ # audio
+ 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
+ 'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
+ 'info_dict': {
+ 'id': '28488308',
+ 'ext': 'mp3',
+ 'title': 'Tod eines Fußballers',
+ 'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
+ 'duration': 3240,
+ },
+ }, {
+ 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
+ 'only_matching': True,
}]
+ def _extract_media_info(self, media_info_url, webpage, video_id):
+ media_info = self._download_json(
+ media_info_url, video_id, 'Downloading media JSON')
+
+ formats = self._extract_formats(media_info, video_id)
+
+ if not formats:
+ if '"fsk"' in webpage:
+ raise ExtractorError(
+ 'This video is only available after 20:00', expected=True)
+ elif media_info.get('_geoblocked'):
+ raise ExtractorError('This video is not available due to geo restriction', expected=True)
+
+ self._sort_formats(formats)
+
+ duration = int_or_none(media_info.get('_duration'))
+ thumbnail = media_info.get('_previewImage')
+
+ subtitles = {}
+ subtitle_url = media_info.get('_subtitleUrl')
+ if subtitle_url:
+ subtitles['de'] = [{
+ 'ext': 'srt',
+ 'url': subtitle_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _extract_formats(self, media_info, video_id):
+ type_ = media_info.get('_type')
+ media_array = media_info.get('_mediaArray', [])
+ formats = []
+ for num, media in enumerate(media_array):
+ for stream in media.get('_mediaStreamArray', []):
+ stream_urls = stream.get('_stream')
+ if not stream_urls:
+ continue
+ if not isinstance(stream_urls, list):
+ stream_urls = [stream_urls]
+ quality = stream.get('_quality')
+ server = stream.get('_server')
+ for stream_url in stream_urls:
+ ext = determine_ext(stream_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
+ video_id, preference=-1, f4m_id='hds'))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', preference=1, m3u8_id='hls'))
+ else:
+ if server and server.startswith('rtmp'):
+ f = {
+ 'url': server,
+ 'play_path': stream_url,
+ 'format_id': 'a%s-rtmp-%s' % (num, quality),
+ }
+ elif stream_url.startswith('http'):
+ f = {
+ 'url': stream_url,
+ 'format_id': 'a%s-%s-%s' % (num, ext, quality)
+ }
+ else:
+ continue
+ m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
+ if m:
+ f.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ if type_ == 'audio':
+ f['vcodec'] = 'none'
+ formats.append(f)
+ return formats
+
def _real_extract(self, url):
# determine video id from url
m = re.match(self._VALID_URL, url)
'format_id': fid,
'url': furl,
})
+ self._sort_formats(formats)
+ info = {
+ 'formats': formats,
+ }
else: # request JSON file
- media_info = self._download_json(
- 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
- # The second element of the _mediaArray contains the standard http urls
- streams = media_info['_mediaArray'][1]['_mediaStreamArray']
- if not streams:
- if '"fsk"' in webpage:
- raise ExtractorError('This video is only available after 20:00')
-
- formats = []
- for s in streams:
- if type(s['_stream']) == list:
- for index, url in enumerate(s['_stream'][::-1]):
- quality = s['_quality'] + index
- formats.append({
- 'quality': quality,
- 'url': url,
- 'format_id': '%s-%s' % (determine_ext(url), quality)
- })
- continue
-
- format = {
- 'quality': s['_quality'],
- 'url': s['_stream'],
- }
-
- format['format_id'] = '%s-%s' % (
- determine_ext(format['url']), format['quality'])
+ info = self._extract_media_info(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
- formats.append(format)
-
- self._sort_formats(formats)
-
- return {
+ info.update({
'id': video_id,
'title': title,
'description': description,
- 'formats': formats,
'thumbnail': thumbnail,
- }
+ })
+
+ return info
class ARDIE(InfoExtractor):
'upload_date': upload_date,
'thumbnail': thumbnail,
}
+
+
+class SportschauIE(ARDMediathekIE):
+ IE_NAME = 'Sportschau'
+ _VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html'
+ _TESTS = [{
+ 'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
+ 'info_dict': {
+ 'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
+ 'ext': 'mp4',
+ 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ base_url = mobj.group('baseurl')
+
+ webpage = self._download_webpage(url, video_id)
+ title = get_element_by_attribute('class', 'headline', webpage)
+ description = self._html_search_meta('description', webpage, 'description')
+
+ info = self._extract_media_info(
+ base_url + '-mc_defaultQuality-h.json', webpage, video_id)
+
+ info.update({
+ 'title': title,
+ 'description': description,
+ })
+
+ return info
import json
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlparse,
- compat_urlparse,
-)
+from ..compat import compat_urlparse
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
- float_or_none,
parse_duration,
determine_ext,
)
# See https://github.com/rg3/youtube-dl/issues/3963
# m3u8 urls work fine
continue
- video_url_parsed = compat_urllib_parse_urlparse(video_url)
f4m_url = self._download_webpage(
- 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url_parsed.path,
+ 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
video_id, 'Downloading f4m manifest token', fatal=False)
if f4m_url:
- formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
+ formats.extend(self._extract_f4m_formats(
+ f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
elif video_url.startswith('rtmp'):
(?:
(?:www\.)?france[2345o]\.fr/
(?:
- emissions/[^/]+/(?:videos|diffusions)?|
- videos
+ emissions/[^/]+/(?:videos|diffusions)|
+ emission/[^/]+|
+ videos|
+ jt
)
/|
embed\.francetv\.fr/\?ue=
},
# franceo
{
- 'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
- 'md5': '52f0bfe202848b15915a2f39aaa8981b',
+ 'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
+ 'md5': '47d5816d3b24351cdce512ad7ab31da8',
'info_dict': {
- 'id': '108634970',
+ 'id': '125377621',
'ext': 'flv',
- 'title': 'Infô Afrique',
- 'description': 'md5:ebf346da789428841bee0fd2a935ea55',
- 'upload_date': '20140915',
- 'timestamp': 1410822000,
+ 'title': 'Infô soir',
+ 'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
+ 'upload_date': '20150718',
+ 'timestamp': 1437241200,
+ 'duration': 414,
},
},
{
'id': 'EV_30231',
'ext': 'flv',
'title': 'Alcaline, le concert avec Calogero',
- 'description': 'md5:',
+ 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
'upload_date': '20150226',
'timestamp': 1424989860,
'duration': 5400,
if vimeo_url is not None:
return self.url_result(vimeo_url)
+ vid_me_embed_url = self._search_regex(
+ r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
+ webpage, 'vid.me embed', default=None)
+ if vid_me_embed_url is not None:
+ return self.url_result(vid_me_embed_url, 'Vidme')
+
# Look for embedded YouTube player
matches = re.findall(r'''(?x)
(?:
'title': '名侦探柯南第752集',
},
}],
+ 'params': {
+ 'skip_download': True,
+ },
}]
_FORMATS_MAP = [
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ parse_duration,
+ int_or_none,
+)
+
+
+class Lecture2GoIE(InfoExtractor):
+ _VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',
+ 'md5': 'ac02b570883020d208d405d5a3fd2f7f',
+ 'info_dict': {
+ 'id': '17473',
+ 'ext': 'flv',
+ 'title': '2 - Endliche Automaten und reguläre Sprachen',
+ 'creator': 'Frank Heitmann',
+ 'duration': 5220,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
+
+ formats = []
+ for url in set(re.findall(r'"src","([^"]+)"', webpage)):
+ ext = determine_ext(url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(url, video_id))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(url, video_id))
+ else:
+ formats.append({
+ 'url': url,
+ })
+
+ self._sort_formats(formats)
+
+ creator = self._html_search_regex(
+ r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False)
+ duration = parse_duration(self._html_search_regex(
+ r'Duration:\s*</em>\s*<em[^>]*>([^<]+)</em>', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._html_search_regex(
+ r'Views:\s*</em>\s*<em[^>]+>(\d+)</em>', webpage, 'view count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'creator': creator,
+ 'duration': duration,
+ 'view_count': view_count,
+ }
'info_dict': {
'id': '2365006249',
'ext': 'mp4',
- 'title': 'A More Perfect Union',
+ 'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
'duration': 3190,
},
'info_dict': {
'id': '2365297690',
'ext': 'mp4',
- 'title': 'Losing Iraq',
+ 'title': 'FRONTLINE - Losing Iraq',
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
'duration': 5050,
},
'info_dict': {
'id': '2201174722',
'ext': 'mp4',
- 'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
+ 'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
'duration': 801,
},
'id': '2365297708',
'ext': 'mp4',
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
- 'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
+ 'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
'duration': 6559,
'thumbnail': 're:^https?://.*\.jpg$',
},
'display_id': 'killer-typhoon',
'ext': 'mp4',
'description': 'md5:c741d14e979fc53228c575894094f157',
- 'title': 'Killer Typhoon',
+ 'title': 'NOVA - Killer Typhoon',
'duration': 3172,
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20140122',
'id': '2280706814',
'display_id': 'player',
'ext': 'mp4',
- 'title': 'Death and the Civil War',
+ 'title': 'American Experience - Death and the Civil War',
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
'duration': 6705,
'thumbnail': 're:^https?://.*\.jpg$',
'params': {
'skip_download': True, # requires ffmpeg
},
+ },
+ {
+ 'url': 'http://video.pbs.org/video/2365367186/',
+ 'info_dict': {
+ 'id': '2365367186',
+ 'display_id': '2365367186',
+ 'ext': 'mp4',
+ 'title': 'To Catch A Comet - Full Episode',
+ 'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
+ 'duration': 3342,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
}
]
'url': closed_captions_url,
}]
+ # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
+ # Try turning it to 'program - title' naming scheme if possible
+ alt_title = info.get('program', {}).get('title')
+ if alt_title:
+ info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title'])
+
return {
'id': video_id,
'display_id': display_id,
'ext': 'mp4',
'title': '#whilewewatch',
}
+ }, {
+ # invalid labels, 360p is better that 480p
+ 'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
+ 'md5': '882fca19b9eb27ef865efeeaed376a48',
+ 'info_dict': {
+ 'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
+ 'ext': 'mp4',
+ 'title': 'Life in Limbo',
+ }
}, {
'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
'only_matching': True,
if not file_:
continue
type_ = source.get('type')
- format_id = source.get('label')
ext = determine_ext(file_)
- if any(_ == 'm3u8' for _ in (type_, ext)):
+ format_id = source.get('label') or ext
+ if all(v == 'm3u8' for v in (type_, ext)):
formats.extend(self._extract_m3u8_formats(
file_, video_id, 'mp4', m3u8_id='hls'))
else:
bitrate = int_or_none(self._search_regex(
- r'(\d+)kbps', file_, 'bitrate', default=None))
+ [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
+ file_, 'bitrate', default=None))
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None))
formats.append({
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/(ts|tsg|tt|nm|bab/bab)|video/video|tsvorzwanzig)(?P<id>-?[0-9]+)(?:~[-_a-zA-Z0-9]*)?\.html'
_TESTS = [{
- 'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
- 'md5': 'bcdeac2194fb296d599ce7929dfa4009',
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
+ 'md5': '917a228bc7df7850783bc47979673a09',
'info_dict': {
- 'id': '1399128',
+ 'id': '102143',
'ext': 'mp4',
- 'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
- 'description': 'md5:69da3c61275b426426d711bde96463ab',
+ 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
+ 'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
'thumbnail': 're:^http:.*\.jpg$',
},
}, {
import re
from .common import InfoExtractor
-from .pornhub import PornHubIE
-from .vimeo import VimeoIE
class TumblrIE(InfoExtractor):
blog = m_url.group('blog_name')
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
- webpage = self._download_webpage(url, video_id)
-
- vid_me_embed_url = self._search_regex(
- r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
- webpage, 'vid.me embed', default=None)
- if vid_me_embed_url is not None:
- return self.url_result(vid_me_embed_url, 'Vidme')
-
- pornhub_url = PornHubIE._extract_url(webpage)
- if pornhub_url:
- return self.url_result(pornhub_url, 'PornHub')
-
- vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
- if vimeo_url:
- return self.url_result(vimeo_url, 'Vimeo')
+ webpage, urlh = self._download_webpage_handle(url, video_id)
iframe_url = self._search_regex(
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
- webpage, 'iframe url')
- iframe = self._download_webpage(iframe_url, video_id)
+ webpage, 'iframe url', default=None)
+ if iframe_url is None:
+ return self.url_result(urlh.geturl(), 'Generic')
+
+ iframe = self._download_webpage(iframe_url, video_id,
+ 'Downloading iframe page')
video_url = self._search_regex(r'<source src="([^"]+)"',
iframe, 'video url')
class TwitchStreamIE(TwitchBaseIE):
IE_NAME = 'twitch:stream'
- _VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
- _TEST = {
+ _TESTS = [{
'url': 'http://www.twitch.tv/shroomztv',
'info_dict': {
'id': '12772022048',
# m3u8 download
'skip_download': True,
},
- }
+ }, {
+ 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
channel_id = self._match_id(url)
login_popup = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login popup')
- if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
+ def is_logged(webpage):
+ return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<'])
+
+ # already logged in
+ if is_logged(login_popup):
return
login_form = self._form_hidden_inputs('login-form', login_popup)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
- if all(logout_pattern not in response
- for logout_pattern in ['href="https://www.udemy.com/user/logout/', '>Logout<']):
+ if not is_logged(response):
error = self._html_search_regex(
r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
response, 'error message', default=None)
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
class ViceIE(InfoExtractor):
- _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
+ _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)+(?P<id>.+)'
- _TEST = {
- 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
- 'info_dict': {
- 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
- 'ext': 'mp4',
- 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
- },
- 'params': {
- # Requires ffmpeg (m3u8 manifest)
- 'skip_download': True,
- },
- }
+ _TESTS = [
+ {
+ 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
+ 'info_dict': {
+ 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
+ 'ext': 'mp4',
+ 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+ },
+ 'params': {
+ # Requires ffmpeg (m3u8 manifest)
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
+ 'only_matching': True,
+ }
+ ]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
- webpage = self._download_webpage(url, name)
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
try:
embed_code = self._search_regex(
r'embedCode=([^&\'"]+)', webpage,
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_request
+from ..compat import (
+ compat_urllib_request,
+ compat_urllib_parse,
+)
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+)
class ViewsterIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)'
+ _VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
_TESTS = [{
- # movielink, paymethod=fre
- 'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/',
- 'playlist': [{
- 'md5': '8f9d94b282d80c42b378dffdbb11caf3',
- 'info_dict': {
- 'id': '1293-19341-000-movie',
- 'ext': 'flv',
- 'title': "'Hout' (Wood) - Movie",
- },
- }],
- 'info_dict': {
- 'id': '1293-19341-000',
- 'title': "'Hout' (Wood)",
- 'description': 'md5:925733185a9242ef96f436937683f33b',
- }
- }, {
- # movielink, paymethod=adv
+ # movie, Type=Movie
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
- 'playlist': [{
- 'md5': '77a005453ca7396cbe3d35c9bea30aef',
- 'info_dict': {
- 'id': '1140-11855-000-movie',
- 'ext': 'flv',
- 'title': "THE LISTENING PROJECT - Movie",
- },
- }],
+ 'md5': '14d3cfffe66d57b41ae2d9c873416f01',
'info_dict': {
'id': '1140-11855-000',
- 'title': "THE LISTENING PROJECT",
- 'description': 'md5:714421ae9957e112e672551094bf3b08',
- }
+ 'ext': 'flv',
+ 'title': 'The listening Project',
+ 'description': 'md5:bac720244afd1a8ea279864e67baa071',
+ 'timestamp': 1214870400,
+ 'upload_date': '20080701',
+ 'duration': 4680,
+ },
}, {
- # direct links, no movielink
- 'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/',
- 'playlist': [{
- 'md5': '0307b7eac6bfb21ab0577a71f6eebd8f',
- 'info_dict': {
- 'id': '1198-56411-000-trailer',
- 'ext': 'mp4',
- 'title': "Sinister - Trailer",
- },
- }, {
- 'md5': '80b9ee3ad69fb368f104cb5d9732ae95',
- 'info_dict': {
- 'id': '1198-56411-000-behind-scenes',
- 'ext': 'mp4',
- 'title': "Sinister - Behind Scenes",
- },
- }, {
- 'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5',
- 'info_dict': {
- 'id': '1198-56411-000-scene-from-movie',
- 'ext': 'mp4',
- 'title': "Sinister - Scene from movie",
- },
- }],
+ # series episode, Type=Episode
+ 'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
+ 'md5': 'd5434c80fcfdb61651cc2199a88d6ba3',
'info_dict': {
- 'id': '1198-56411-000',
- 'title': "Sinister",
- 'description': 'md5:014c40b0488848de9683566a42e33372',
- }
+ 'id': '1284-19427-001',
+ 'ext': 'flv',
+ 'title': 'The World and a Wall',
+ 'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
+ 'timestamp': 1428192000,
+ 'upload_date': '20150405',
+ 'duration': 1500,
+ },
+ }, {
+ # serie, Type=Serie
+ 'url': 'http://www.viewster.com/serie/1303-19426-000/',
+ 'info_dict': {
+ 'id': '1303-19426-000',
+ 'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
+ 'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
+ },
+ 'playlist_count': 13,
+ }, {
+ # unfinished serie, no Type
+ 'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
+ 'info_dict': {
+ 'id': '1284-19427-000',
+ 'title': 'Baby Steps—Season 2',
+ 'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
+ },
+ 'playlist_mincount': 16,
}]
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
+ _AUTH_TOKEN = '/YqhSYsx8EaU9Bsta3ojlA=='
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- request = compat_urllib_request.Request(
- 'http://api.live.viewster.com/api/v1/movie/%s' % video_id)
+ def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
+ request = compat_urllib_request.Request(url)
request.add_header('Accept', self._ACCEPT_HEADER)
+ request.add_header('Auth-token', self._AUTH_TOKEN)
+ return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal)
- movie = self._download_json(
- request, video_id, 'Downloading movie metadata JSON')
-
- title = movie.get('title') or movie['original_title']
- description = movie.get('synopsis')
- thumbnail = movie.get('large_artwork') or movie.get('artwork')
-
- entries = []
- for clip in movie['play_list']:
- entry = None
-
- # movielink api
- link_request = clip.get('link_request')
- if link_request:
- request = compat_urllib_request.Request(
- 'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s¤cy=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s'
- % link_request)
- request.add_header('Accept', self._ACCEPT_HEADER)
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
- movie_link = self._download_json(
- request, video_id, 'Downloading movie link JSON', fatal=False)
+ info = self._download_json(
+ 'https://public-api.viewster.com/search/%s' % video_id,
+ video_id, 'Downloading entry JSON')
- if movie_link:
- formats = self._extract_f4m_formats(
- movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id)
- self._sort_formats(formats)
- entry = {
- 'formats': formats,
- }
+ entry_id = info.get('Id') or info['id']
- # direct link
- clip_url = clip.get('clip_data', {}).get('url')
- if clip_url:
- entry = {
- 'url': clip_url,
- 'ext': 'mp4',
- }
+ # unfinished serie has no Type
+ if info.get('Type') in ['Serie', None]:
+ episodes = self._download_json(
+ 'https://public-api.viewster.com/series/%s/episodes' % entry_id,
+ video_id, 'Downloading series JSON')
+ entries = [
+ self.url_result(
+ 'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
+ for episode in episodes]
+ title = (info.get('Title') or info['Synopsis']['Title']).strip()
+ description = info.get('Synopsis', {}).get('Detailed')
+ return self.playlist_result(entries, video_id, title, description)
- if entry:
- entry.update({
- 'id': '%s-%s' % (video_id, clip['canonical_title']),
- 'title': '%s - %s' % (title, clip['title']),
+ formats = []
+ for media_type in ('application/f4m+xml', 'application/x-mpegURL'):
+ media = self._download_json(
+ 'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
+ % (entry_id, compat_urllib_parse.quote(media_type)),
+ video_id, 'Downloading %s JSON' % media_type, fatal=False)
+ if not media:
+ continue
+ video_url = media.get('Uri')
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ video_url += '&' if '?' in video_url else '?'
+ video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id='hds'))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id='hls',
+ fatal=False # m3u8 sometimes fail
+ ))
+ else:
+ formats.append({
+ 'url': video_url,
})
- entries.append(entry)
+ self._sort_formats(formats)
- playlist = self.playlist_result(entries, video_id, title, description)
- playlist['thumbnail'] = thumbnail
- return playlist
+ synopsis = info.get('Synopsis', {})
+ # Prefer title outside synopsis since it's less messy
+ title = (info.get('Title') or synopsis['Title']).strip()
+ description = synopsis.get('Detailed') or info.get('Synopsis', {}).get('Short')
+ duration = int_or_none(info.get('Duration'))
+ timestamp = parse_iso8601(info.get('ReleaseDate'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }
'uploader': 'dorappi2000',
'formats': 'mincount:33',
},
- }
+ },
+ # DASH manifest with segment_list
+ {
+ 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
+ 'md5': '8ce563a1d667b599d21064e982ab9e31',
+ 'info_dict': {
+ 'id': 'CsmdDsKjzN8',
+ 'ext': 'mp4',
+ 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
+ 'uploader': 'Airtek',
+ 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
+ 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
+ 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '135', # bestvideo
+ }
+ },
]
def __init__(self, *args, **kwargs):
# TODO implement WebVTT downloading
pass
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+ segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')),
}
+ if segment_list is not None:
+ f.update({
+ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
+ 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
+ 'protocol': 'http_dash_segments',
+ })
try:
existing_format = next(
fo for fo in formats
# Get video info
embed_webpage = None
+ is_live = None
if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
add_dash_mpd(video_info)
+ if args.get('livestream') == '1' or args.get('live_playback') == 1:
+ is_live = True
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
# We also try looking in get_video_info since it may contain different dashmpd
# URL that points to a DASH manifest with possibly different itag set (some itags
'dislike_count': dislike_count,
'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
'formats': formats,
+ 'is_live': is_live,
}
selection.add_option(
'--playlist-items',
dest='playlist_items', metavar='ITEM_SPEC', default=None,
- help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+ help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
m = re.match(
r'''(?ix)(?:P?T)?
(?:
- (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+ (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
- \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
+ \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
(?:
(?:
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
from __future__ import unicode_literals
-__version__ = '2015.07.18'
+__version__ = '2015.07.21'