Ondřej Caletka
Dinesh S
Johan K. Jensen
+Yen Chi Hsuan
by extension for the extensions aac, m4a,
mp3, mp4, ogg, wav, webm. You can also use
the special names "best", "bestvideo",
- "bestaudio", "worst". By default, youtube-
- dl will pick the best quality. Use commas
- to download multiple audio formats, such as
- -f
+ "bestaudio", "worst". You can filter the
+ video results by putting a condition in
+ brackets, as in -f "best[height=720]" (or
+ -f "[filesize>10M]"). This works for
+ filesize, height, width, tbr, abr, and vbr
+ and the comparisons <, <=, >, >=, =, != .
+ Formats for which the value is not known
+ are excluded unless you put a question mark
+ (?) after the operator. You can combine
+ format filters, so -f "[height <=?
+ 720][tbr>500]" selects up to 720p videos
+ (or videos where the height is not known)
+ with a bitrate of at least 500 KBit/s. By
+ default, youtube-dl will pick the best
+ quality. Use commas to download multiple
+ audio formats, such as -f
136/137/mp4/bestvideo,140/m4a/bestaudio.
You can merge the video and audio of two
formats into a single file using -f <video-
## Authentication Options:
-u, --username USERNAME login with this account ID
- -p, --password PASSWORD account password
+ -p, --password PASSWORD account password. If this option is left
+ out, youtube-dl will ask interactively.
-2, --twofactor TWOFACTOR two-factor auth code
-n, --netrc use .netrc authentication data
--video-password PASSWORD video password (vimeo, smotri)
From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
+### How do I put downloads into a specific folder?
+
+Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
+
### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1id)
+ def test_format_filtering(self):
+ formats = [
+ {'format_id': 'A', 'filesize': 500, 'width': 1000},
+ {'format_id': 'B', 'filesize': 1000, 'width': 500},
+ {'format_id': 'C', 'filesize': 1000, 'width': 400},
+ {'format_id': 'D', 'filesize': 2000, 'width': 600},
+ {'format_id': 'E', 'filesize': 3000},
+ {'format_id': 'F'},
+ {'format_id': 'G', 'filesize': 1000000},
+ ]
+ for f in formats:
+ f['url'] = 'http://_/'
+ f['ext'] = 'unknown'
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'best[filesize<3000]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'D')
+
+ ydl = YDL({'format': 'best[filesize<=3000]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'E')
+
+ ydl = YDL({'format': 'best[filesize <= ? 3000]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'F')
+
+ ydl = YDL({'format': 'best [filesize = 1000] [width>450]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'B')
+
+ ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'C')
+
+ ydl = YDL({'format': '[filesize>?1]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'G')
+
+ ydl = YDL({'format': '[filesize<1M]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'E')
+
+ ydl = YDL({'format': '[filesize<1MiB]'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'G')
+
def test_add_extra_info(self):
test_dict = {
'extractor': 'Foo',
from youtube_dl.extractor import (
FacebookIE,
gen_extractors,
- TwitchIE,
YoutubeIE,
)
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
- def test_twitch_channelid_matching(self):
- self.assertTrue(TwitchIE.suitable('twitch.tv/vanillatv'))
- self.assertTrue(TwitchIE.suitable('www.twitch.tv/vanillatv'))
- self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv'))
- self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/'))
-
- def test_twitch_videoid_matching(self):
- self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
-
- def test_twitch_chapterid_matching(self):
- self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
-
def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
self.assertMatch(':ythistory', ['youtube:history'])
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
self.assertMatch(':tds', ['ComedyCentralShows'])
- self.assertMatch(':colbertreport', ['ComedyCentralShows'])
- self.assertMatch(':cr', ['ComedyCentralShows'])
def test_vimeo_matching(self):
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
fix_xml_ampersands,
InAdvancePagedList,
intlist_to_bytes,
+ is_html,
js_to_json,
limit_length,
OnDemandPagedList,
self.assertTrue(age_restricted(18, 14))
self.assertFalse(age_restricted(18, 18))
+ def test_is_html(self):
+ self.assertFalse(is_html(b'\x49\x44\x43<html'))
+ self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa'))
+ self.assertTrue(is_html( # UTF-8 with BOM
+ b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa'))
+ self.assertTrue(is_html( # UTF-16-LE
+ b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00'
+ ))
+ self.assertTrue(is_html( # UTF-16-BE
+ b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4'
+ ))
+ self.assertTrue(is_html( # UTF-32-BE
+ b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4'))
+ self.assertTrue(is_html( # UTF-32-LE
+ b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
+
if __name__ == '__main__':
unittest.main()
import itertools
import json
import locale
+import operator
import os
import platform
import re
make_HTTPS_handler,
MaxDownloadsReached,
PagedList,
+ parse_filesize,
PostProcessingError,
platform_name,
preferredencoding,
else:
raise Exception('Invalid result type: %s' % result_type)
+ def _apply_format_filter(self, format_spec, available_formats):
+ " Returns a tuple of the remaining format_spec and filtered formats "
+
+ OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*\[
+ (?P<key>width|height|tbr|abr|vbr|filesize)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
+ \]$
+ ''' % '|'.join(map(re.escape, OPERATORS.keys())))
+ m = operator_rex.search(format_spec)
+ if not m:
+ raise ValueError('Invalid format specification %r' % format_spec)
+
+ try:
+ comparison_value = int(m.group('value'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('value'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('value') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid value %r in format specification %r' % (
+ m.group('value'), format_spec))
+ op = OPERATORS[m.group('op')]
+
+ def _filter(f):
+ actual_value = f.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+ new_formats = [f for f in available_formats if _filter(f)]
+
+ new_format_spec = format_spec[:-len(m.group(0))]
+ if not new_format_spec:
+ new_format_spec = 'best'
+
+ return (new_format_spec, new_formats)
+
def select_format(self, format_spec, available_formats):
+ while format_spec.endswith(']'):
+ format_spec, available_formats = self._apply_format_filter(
+ format_spec, available_formats)
+ if not available_formats:
+ return None
+
if format_spec == 'best' or format_spec is None:
return available_formats[-1]
elif format_spec == 'worst':
from __future__ import unicode_literals
from .abc import ABCIE
+from .abc7news import Abc7NewsIE
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .adobetv import AdobeTVIE
from .grooveshark import GroovesharkIE
from .groupon import GrouponIE
from .hark import HarkIE
+from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE
from .steam import SteamIE
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
+from .streetvoice import StreetVoiceIE
from .sunporno import SunPornoIE
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .teletask import TeleTaskIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE
+from .testtube import TestTubeIE
from .tf1 import TF1IE
from .theonion import TheOnionIE
from .theplatform import ThePlatformIE
from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE
from .twentyfourvideo import TwentyFourVideoIE
-from .twitch import TwitchIE
+from .twitch import (
+ TwitchVideoIE,
+ TwitchChapterIE,
+ TwitchVodIE,
+ TwitchProfileIE,
+ TwitchPastBroadcastsIE,
+ TwitchStreamIE,
+)
from .ubu import UbuIE
from .udemy import (
UdemyIE,
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class Abc7NewsIE(InfoExtractor):
+ _VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
+ 'info_dict': {
+ 'id': '472581',
+ 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
+ 'ext': 'mp4',
+ 'title': 'East Bay museum celebrates history of synthesized music',
+ 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1421123075,
+ 'upload_date': '20150113',
+ 'uploader': 'Jonathan Bloom',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://abc7news.com/472581',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ m3u8 = self._html_search_meta(
+ 'contentURL', webpage, 'm3u8 url', fatal=True)
+
+ formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage).strip()
+ description = self._og_search_description(webpage).strip()
+ thumbnail = self._og_search_thumbnail(webpage)
+ timestamp = parse_iso8601(self._search_regex(
+ r'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">',
+ webpage, 'upload date', fatal=False))
+ uploader = self._search_regex(
+ r'rel="author">([^<]+)</a>',
+ webpage, 'uploader', default=None)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'formats': formats,
+ }
mobj = re.match(self._VALID_URL, url)
path = mobj.group('path')
page_title = mobj.group('title')
- info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path
+ info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path
info = self._download_xml(info_url, page_title)
formats = []
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
_TEST = {
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
- 'md5': '275b326f85d80dff7592a9820f5dc887',
+ 'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
'info_dict': {
- 'id': 'bestoftv/2014/12/21/sotu-crowley-president-obama-north-korea-not-going-to-be-intimidated.cnn',
+ 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
'ext': 'mp4',
- 'title': 'Obama: We\'re not going to be intimidated',
- 'description': 'md5:e735586f3dc936075fa654a4d91b21f9',
- 'upload_date': '20141220',
+ 'title': 'Obama: Cyberattack not an act of war',
+ 'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
+ 'upload_date': '20141221',
},
'add_ie': ['CNN'],
}
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
IE_DESC = 'The Daily Show / The Colbert Report'
- # urls can be abbreviations like :thedailyshow or :colbert
+ # urls can be abbreviations like :thedailyshow
# urls for episodes like:
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
- _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
+ _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|https?://(:www\.)?
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
ExtractorError,
float_or_none,
HEADRequest,
+ is_html,
orderedSet,
parse_xml,
smuggle_url,
# Maybe it's a direct link to a video?
# Be careful not to download the whole thing!
first_bytes = full_response.read(512)
- if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
+ if not is_html(first_bytes):
self._downloader.report_warning(
'URL could be a direct video link, returning it as such.')
upload_date = unified_strdate(
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_request,
+ compat_urlparse,
+)
+from ..utils import (
+ HEADRequest,
+ str_to_int,
+ urlencode_postdata,
+ urlhandle_detect_ext,
+)
+
+
+class HearThisAtIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
+ _PLAYLIST_URL = 'https://hearthis.at/playlist.php'
+ _TEST = {
+ 'url': 'https://hearthis.at/moofi/dr-kreep',
+ 'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
+ 'info_dict': {
+ 'id': '150939',
+ 'ext': 'wav',
+ 'title': 'Moofi - Dr. Kreep',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1421564134,
+ 'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
+ 'upload_date': '20150118',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 71,
+ 'categories': ['Experimental'],
+ }
+ }
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
+
+ webpage = self._download_webpage(url, display_id)
+ track_id = self._search_regex(
+ r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
+
+ payload = urlencode_postdata({'tracks[]': track_id})
+ req = compat_urllib_request.Request(self._PLAYLIST_URL, payload)
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+ track = self._download_json(req, track_id, 'Downloading playlist')[0]
+ title = '{artist:s} - {title:s}'.format(**track)
+
+ categories = None
+ if track.get('category'):
+ categories = [track['category']]
+
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ meta_span = r'<span[^>]+class="%s".*?</i>([^<]+)</span>'
+ view_count = str_to_int(self._search_regex(
+ meta_span % 'plays_count', webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._search_regex(
+ meta_span % 'likes_count', webpage, 'like count', fatal=False))
+ comment_count = str_to_int(self._search_regex(
+ meta_span % 'comment_count', webpage, 'comment count', fatal=False))
+ duration = str_to_int(self._search_regex(
+ r'data-length="(\d+)', webpage, 'duration', fatal=False))
+ timestamp = str_to_int(self._search_regex(
+ r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False))
+
+ formats = []
+ mp3_url = self._search_regex(
+ r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"',
+ webpage, 'mp3 URL', fatal=False)
+ if mp3_url:
+ formats.append({
+ 'format_id': 'mp3',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ 'url': mp3_url,
+ })
+ download_path = self._search_regex(
+ r'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"',
+ webpage, 'download URL', default=None)
+ if download_path:
+ download_url = compat_urlparse.urljoin(url, download_path)
+ ext_req = HEADRequest(download_url)
+ ext_handle = self._request_webpage(
+ ext_req, display_id, note='Determining extension')
+ ext = urlhandle_detect_ext(ext_handle)
+ formats.append({
+ 'format_id': 'download',
+ 'vcodec': 'none',
+ 'ext': ext,
+ 'url': download_url,
+ 'preference': 2, # Usually better quality
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': track_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'like_count': like_count,
+ 'categories': categories,
+ }
from .common import InfoExtractor
from ..compat import (
compat_str,
+ compat_HTTPError,
)
from ..utils import (
ExtractorError,
},
'add_ie': ['ThePlatform'],
},
+ {
+ 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
+ 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
+ 'info_dict': {
+ 'id': 'Wjf9EDR3A_60',
+ 'ext': 'mp4',
+ 'title': 'FULL EPISODE: Family Business',
+ 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
+ },
+ },
]
def _real_extract(self, url):
if not base_url:
continue
playlist_url = base_url + '?form=MPXNBCNewsAPI'
- all_videos = self._download_json(playlist_url, title)['videos']
try:
- info = next(v for v in all_videos if v['mpxId'] == mpxid)
+ all_videos = self._download_json(playlist_url, title)
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError):
+ continue
+ raise
+
+ if not all_videos or 'videos' not in all_videos:
+ continue
+
+ try:
+ info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)
break
except StopIteration:
continue
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
filename = self._search_regex(
compat_urllib_request,
)
from ..utils import (
+ ExtractorError,
str_to_int,
)
from ..aes import (
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
+ error_msg = self._html_search_regex(
+ r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>',
+ webpage, 'error message', default=None)
+ if error_msg:
+ error_msg = re.sub(r'\s+', ' ', error_msg)
+ raise ExtractorError(
+ 'PornHub said: %s' % error_msg,
+ expected=True, video_id=video_id)
+
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import unified_strdate
+
+
+class StreetVoiceIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://streetvoice.com/skippylu/songs/94440/',
+ 'md5': '15974627fc01a29e492c98593c2fd472',
+ 'info_dict': {
+ 'id': '94440',
+ 'ext': 'mp3',
+ 'filesize': 4167053,
+ 'title': '輸',
+ 'description': 'Crispy脆樂團 - 輸',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 260,
+ 'upload_date': '20091018',
+ 'uploader': 'Crispy脆樂團',
+ 'uploader_id': '627810',
+ }
+ }, {
+ 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ song_id = self._match_id(url)
+
+ song = self._download_json(
+ 'http://streetvoice.com/music/api/song/%s' % song_id, song_id)
+
+ title = song['name']
+ author = song['musician']['name']
+
+ return {
+ 'id': song_id,
+ 'url': song['file'],
+ 'filesize': song.get('size'),
+ 'title': title,
+ 'description': '%s - %s' % (author, title),
+ 'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
+ 'duration': song.get('length'),
+ 'upload_date': unified_strdate(song.get('created_at')),
+ 'uploader': author,
+ 'uploader_id': compat_str(song['musician']['id']),
+ }
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TestTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
+ 'info_dict': {
+ 'id': '60163',
+ 'display_id': '5-weird-ways-plants-can-eat-animals',
+ 'duration': 275,
+ 'ext': 'mp4',
+ 'title': '5 Weird Ways Plants Can Eat Animals',
+ 'description': 'Why have some plants evolved to eat meat?',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'DNews',
+ 'uploader_id': 'dnews',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
+ webpage, 'video ID')
+
+ all_info = self._download_json(
+ 'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id,
+ video_id)
+ info = all_info['items'][0]
+
+ formats = []
+ for vcodec, fdatas in info['media'].items():
+ for name, fdata in fdatas.items():
+ formats.append({
+ 'format_id': '%s-%s' % (vcodec, name),
+ 'url': fdata['url'],
+ 'vcodec': vcodec,
+ 'tbr': fdata.get('bitrate'),
+ })
+ self._sort_formats(formats)
+
+ duration = int_or_none(info.get('duration'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': info['title'],
+ 'description': info.get('summary'),
+ 'thumbnail': info.get('images', {}).get('large'),
+ 'uploader': info.get('show', {}).get('name'),
+ 'uploader_id': info.get('show', {}).get('slug'),
+ 'duration': duration,
+ 'formats': formats,
+ }
class TinyPicIE(InfoExtractor):
IE_NAME = 'tinypic'
IE_DESC = 'tinypic.com videos'
- _VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
-
- _TEST = {
- 'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
- 'md5': '609b74432465364e72727ebc6203f044',
- 'info_dict': {
- 'id': '6xw7tc',
- 'ext': 'flv',
- 'title': 'shadow phenomenon weird',
+ _VALID_URL = r'http://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
+
+ _TESTS = [
+ {
+ 'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
+ 'md5': '609b74432465364e72727ebc6203f044',
+ 'info_dict': {
+ 'id': '6xw7tc',
+ 'ext': 'flv',
+ 'title': 'shadow phenomenon weird',
+ },
+ },
+ {
+ 'url': 'http://de.tinypic.com/player.php?v=dy90yh&s=8',
+ 'only_matching': True,
}
- }
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
_TESTS = [{
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
+ 'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
'info_dict': {
'id': '4278035',
'ext': 'wmv',
'title': 'Ogniem i mieczem, odc. 2',
- 'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.',
},
}, {
'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
+ 'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
'info_dict': {
'id': '194536',
'ext': 'mp4',
'title': 'Czas honoru, I seria – odc. 13',
- # 'description': 'WŁADEK\nCzesław prosi Marię o dostarczenie Władkowi zarazki tyfusu. Jeśli zachoruje zostanie przewieziony do szpitala skąd łatwiej będzie go odbić. Czy matka zdecyduje się zarazić syna? Karol odwiedza Wandę przyznaje się, że ją oszukiwał, ale ostrzega też, że grozi jej aresztowanie i nalega, żeby wyjechała z Warszawy. Czy dziewczyna zdecyduje się znów oddalić od ukochanego? Rozpoczyna się akcja odbicia Władka.',
},
}, {
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
+ 'md5': 'c3b15ed1af288131115ff17a17c19dda',
'info_dict': {
'id': '17916176',
'ext': 'mp4',
'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
},
- 'params': {
- # m3u8 download
- 'skip_download': 'true',
- },
}, {
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
+ 'md5': 'c3b15ed1af288131115ff17a17c19dda',
'info_dict': {
'id': '17834272',
'ext': 'mp4',
'title': 'Na sygnale, odc. 39',
- 'description': 'Ekipa Wiktora ratuje młodą matkę, która spadła ze schodów trzymając na rękach noworodka. Okazuje się, że dziewczyna jest surogatką, a biologiczni rodzice dziecka próbują zmusić ją do oddania synka…',
- },
- 'params': {
- # m3u8 download
- 'skip_download': 'true',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
+
webpage = self._download_webpage(
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
- title = self._og_search_title(webpage)
- series = self._search_regex(
- r'{name:\s*([\'"])SeriesTitle\1,\s*value:\s*\1(?P<series>.*?)\1},',
+ title = self._search_regex(
+ r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
+ webpage, 'title', group='title')
+ series_title = self._search_regex(
+ r'name\s*:\s*([\'"])SeriesTitle\1\s*,\s*value\s*:\s*\1(?P<series>.+?)\1',
webpage, 'series', group='series', default=None)
- if series is not None and series not in title:
- title = '%s, %s' % (series, title)
- description = self._og_search_description(webpage, default=None)
+ if series_title:
+ title = '%s, %s' % (series_title, title)
+
+ thumbnail = self._search_regex(
+ r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
video_url = self._search_regex(
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
- if video_url is None:
+ if not video_url:
video_url = self._download_json(
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
video_id)['video_url']
return {
'id': video_id,
'title': title,
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'description': description,
+ 'thumbnail': thumbnail,
'formats': formats,
}
import itertools
import re
+import random
from .common import InfoExtractor
from ..compat import (
+ compat_str,
compat_urllib_parse,
compat_urllib_request,
)
)
-class TwitchIE(InfoExtractor):
- # TODO: One broadcast may be split into multiple videos. The key
- # 'broadcast_id' is the same for all parts, and 'broadcast_part'
- # starts at 1 and increases. Can we treat all parts as one video?
- _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
- (?:
- (?P<channelid>[^/]+)|
- (?:(?:[^/]+)/v/(?P<vodid>[^/]+))|
- (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
- (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
- )
- /?(?:\#.*)?$
- """
- _PAGE_LIMIT = 100
+class TwitchBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
+
_API_BASE = 'https://api.twitch.tv'
+ _USHER_BASE = 'http://usher.twitch.tv'
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
- _TESTS = [{
- 'url': 'http://www.twitch.tv/riotgames/b/577357806',
- 'info_dict': {
- 'id': 'a577357806',
- 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
- },
- 'playlist_mincount': 12,
- }, {
- 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
- 'info_dict': {
- 'id': 'c5285812',
- 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
- },
- 'playlist_mincount': 3,
- }, {
- 'url': 'http://www.twitch.tv/vanillatv',
- 'info_dict': {
- 'id': 'vanillatv',
- 'title': 'VanillaTV',
- },
- 'playlist_mincount': 412,
- }]
def _handle_error(self, response):
if not isinstance(response, dict):
expected=True)
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
- response = super(TwitchIE, self)._download_json(url, video_id, note)
+ response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
self._handle_error(response)
return response
- def _extract_media(self, item, item_id):
- ITEMS = {
- 'a': 'video',
- 'v': 'vod',
- 'c': 'chapter',
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ authenticity_token = self._search_regex(
+ r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
+ login_page, 'authenticity token')
+
+ login_form = {
+ 'utf8': '✓'.encode('utf-8'),
+ 'authenticity_token': authenticity_token,
+ 'redirect_on_login': '',
+ 'embed_form': 'false',
+ 'mp_source_action': '',
+ 'follow': '',
+ 'user[login]': username,
+ 'user[password]': password,
}
- info = self._extract_info(self._download_json(
+
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ request.add_header('Referer', self._LOGIN_URL)
+ response = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ m = re.search(
+ r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
+ if m:
+ raise ExtractorError(
+ 'Unable to login: %s' % m.group('msg').strip(), expected=True)
+
+
+class TwitchItemBaseIE(TwitchBaseIE):
+ def _download_info(self, item, item_id):
+ return self._extract_info(self._download_json(
'%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
- 'Downloading %s info JSON' % ITEMS[item]))
-
- if item == 'v':
- access_token = self._download_json(
- '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
- 'Downloading %s access token' % ITEMS[item])
- formats = self._extract_m3u8_formats(
- 'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s'
- % (item_id, access_token['token'], access_token['sig']),
- item_id, 'mp4')
- info['formats'] = formats
- return info
+ 'Downloading %s info JSON' % self._ITEM_TYPE))
+ def _extract_media(self, item_id):
+ info = self._download_info(self._ITEM_SHORTCUT, item_id)
response = self._download_json(
- '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
- 'Downloading %s playlist JSON' % ITEMS[item])
+ '%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id,
+ 'Downloading %s playlist JSON' % self._ITEM_TYPE)
entries = []
chunks = response['chunks']
qualities = list(chunks.keys())
'view_count': info['views'],
}
- def _real_initialize(self):
- self._login()
+ def _real_extract(self, url):
+ return self._extract_media(self._match_id(url))
- def _login(self):
- (username, password) = self._get_login_info()
- if username is None:
- return
- login_page = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login page')
+class TwitchVideoIE(TwitchItemBaseIE):
+ IE_NAME = 'twitch:video'
+ _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _ITEM_TYPE = 'video'
+ _ITEM_SHORTCUT = 'a'
- authenticity_token = self._search_regex(
- r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
- login_page, 'authenticity token')
+ _TEST = {
+ 'url': 'http://www.twitch.tv/riotgames/b/577357806',
+ 'info_dict': {
+ 'id': 'a577357806',
+ 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
+ },
+ 'playlist_mincount': 12,
+ }
- login_form = {
- 'utf8': '✓'.encode('utf-8'),
- 'authenticity_token': authenticity_token,
- 'redirect_on_login': '',
- 'embed_form': 'false',
- 'mp_source_action': '',
- 'follow': '',
- 'user[login]': username,
- 'user[password]': password,
- }
- request = compat_urllib_request.Request(
- self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
- request.add_header('Referer', self._LOGIN_URL)
- response = self._download_webpage(
- request, None, 'Logging in as %s' % username)
+class TwitchChapterIE(TwitchItemBaseIE):
+ IE_NAME = 'twitch:chapter'
+ _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _ITEM_TYPE = 'chapter'
+ _ITEM_SHORTCUT = 'c'
- m = re.search(
- r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
- if m:
- raise ExtractorError(
- 'Unable to login: %s' % m.group('msg').strip(), expected=True)
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
+ 'info_dict': {
+ 'id': 'c5285812',
+ 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
+ 'only_matching': True,
+ }]
+
+
+class TwitchVodIE(TwitchItemBaseIE):
+ IE_NAME = 'twitch:vod'
+ _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _ITEM_TYPE = 'vod'
+ _ITEM_SHORTCUT = 'v'
+
+ _TEST = {
+ 'url': 'http://www.twitch.tv/ksptv/v/3622000',
+ 'info_dict': {
+ 'id': 'v3622000',
+ 'ext': 'mp4',
+ 'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 6951,
+ 'timestamp': 1419028564,
+ 'upload_date': '20141219',
+ 'uploader': 'KSPTV',
+ 'uploader_id': 'ksptv',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+ info = self._download_info(self._ITEM_SHORTCUT, item_id)
+ access_token = self._download_json(
+ '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
+ 'Downloading %s access token' % self._ITEM_TYPE)
+ formats = self._extract_m3u8_formats(
+ '%s/vod/%s?nauth=%s&nauthsig=%s'
+ % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
+ item_id, 'mp4')
+ info['formats'] = formats
+ return info
+
+
+class TwitchPlaylistBaseIE(TwitchBaseIE):
+ _PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
+ _PAGE_LIMIT = 100
+
+ def _extract_playlist(self, channel_id):
+ info = self._download_json(
+ '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
+ channel_id, 'Downloading channel info JSON')
+ channel_name = info.get('display_name') or info.get('name')
+ entries = []
+ offset = 0
+ limit = self._PAGE_LIMIT
+ for counter in itertools.count(1):
+ response = self._download_json(
+ self._PLAYLIST_URL % (channel_id, offset, limit),
+ channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
+ videos = response['videos']
+ if not videos:
+ break
+ entries.extend([self.url_result(video['url']) for video in videos])
+ offset += limit
+ return self.playlist_result(entries, channel_id, channel_name)
+
+ def _real_extract(self, url):
+ return self._extract_playlist(self._match_id(url))
+
+
+class TwitchProfileIE(TwitchPlaylistBaseIE):
+ IE_NAME = 'twitch:profile'
+ _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+ _PLAYLIST_TYPE = 'profile'
+
+ _TEST = {
+ 'url': 'http://www.twitch.tv/vanillatv/profile',
+ 'info_dict': {
+ 'id': 'vanillatv',
+ 'title': 'VanillaTV',
+ },
+ 'playlist_mincount': 412,
+ }
+
+
+class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
+ IE_NAME = 'twitch:past_broadcasts'
+ _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+ _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true'
+ _PLAYLIST_TYPE = 'past broadcasts'
+
+ _TEST = {
+ 'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
+ 'info_dict': {
+ 'id': 'spamfish',
+ 'title': 'Spamfish',
+ },
+ 'playlist_mincount': 54,
+ }
+
+
+class TwitchStreamIE(TwitchBaseIE):
+ IE_NAME = 'twitch:stream'
+ _VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+
+ _TEST = {
+ 'url': 'http://www.twitch.tv/shroomztv',
+ 'info_dict': {
+ 'id': '12772022048',
+ 'display_id': 'shroomztv',
+ 'ext': 'mp4',
+ 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
+ 'is_live': True,
+ 'timestamp': 1421928037,
+ 'upload_date': '20150122',
+ 'uploader': 'ShroomzTV',
+ 'uploader_id': 'shroomztv',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- if mobj.group('chapterid'):
- return self._extract_media('c', mobj.group('chapterid'))
+ channel_id = self._match_id(url)
+
+ stream = self._download_json(
+ '%s/kraken/streams/%s' % (self._API_BASE, channel_id), channel_id,
+ 'Downloading stream JSON').get('stream')
+
+ # Fallback on profile extraction if stream is offline
+ if not stream:
+ return self.url_result(
+ 'http://www.twitch.tv/%s/profile' % channel_id,
+ 'TwitchProfile', channel_id)
- """
- webpage = self._download_webpage(url, chapter_id)
- m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
+ access_token = self._download_json(
+ '%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
+ 'Downloading channel access token')
+
+ query = {
+ 'allow_source': 'true',
+ 'p': random.randint(1000000, 10000000),
+ 'player': 'twitchweb',
+ 'segment_preference': '4',
+ 'sig': access_token['sig'],
+ 'token': access_token['token'],
+ }
+
+ formats = self._extract_m3u8_formats(
+ '%s/api/channel/hls/%s.m3u8?%s'
+ % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
+ channel_id, 'mp4')
+
+ view_count = stream.get('viewers')
+ timestamp = parse_iso8601(stream.get('created_at'))
+
+ channel = stream['channel']
+ title = self._live_title(channel.get('display_name') or channel.get('name'))
+ description = channel.get('status')
+
+ thumbnails = []
+ for thumbnail_key, thumbnail_url in stream['preview'].items():
+ m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
if not m:
- raise ExtractorError('Cannot find archive of a chapter')
- archive_id = m.group(1)
-
- api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
- doc = self._download_xml(
- api, chapter_id,
- note='Downloading chapter information',
- errnote='Chapter information download failed')
- for a in doc.findall('.//archive'):
- if archive_id == a.find('./id').text:
- break
- else:
- raise ExtractorError('Could not find chapter in chapter information')
-
- video_url = a.find('./video_file_url').text
- video_ext = video_url.rpartition('.')[2] or 'flv'
-
- chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
- chapter_info = self._download_json(
- chapter_api_url, 'c' + chapter_id,
- note='Downloading chapter metadata',
- errnote='Download of chapter metadata failed')
-
- bracket_start = int(doc.find('.//bracket_start').text)
- bracket_end = int(doc.find('.//bracket_end').text)
-
- # TODO determine start (and probably fix up file)
- # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
- #video_url += '?start=' + TODO:start_timestamp
- # bracket_start is 13290, but we want 51670615
- self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
- 'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
-
- info = {
- 'id': 'c' + chapter_id,
- 'url': video_url,
- 'ext': video_ext,
- 'title': chapter_info['title'],
- 'thumbnail': chapter_info['preview'],
- 'description': chapter_info['description'],
- 'uploader': chapter_info['channel']['display_name'],
- 'uploader_id': chapter_info['channel']['name'],
- }
- return info
- """
- elif mobj.group('videoid'):
- return self._extract_media('a', mobj.group('videoid'))
- elif mobj.group('vodid'):
- return self._extract_media('v', mobj.group('vodid'))
- elif mobj.group('channelid'):
- channel_id = mobj.group('channelid')
- info = self._download_json(
- '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
- channel_id, 'Downloading channel info JSON')
- channel_name = info.get('display_name') or info.get('name')
- entries = []
- offset = 0
- limit = self._PAGE_LIMIT
- for counter in itertools.count(1):
- response = self._download_json(
- '%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
- % (self._API_BASE, channel_id, offset, limit),
- channel_id, 'Downloading channel videos JSON page %d' % counter)
- videos = response['videos']
- if not videos:
- break
- entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
- offset += limit
- return self.playlist_result(entries, channel_id, channel_name)
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+
+ return {
+ 'id': compat_str(stream['_id']),
+ 'display_id': channel_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'uploader': channel.get('display_name'),
+ 'uploader_id': channel.get('name'),
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'is_live': True,
+ }
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
+ ExtractorError,
remove_start,
)
req.add_header('Referer', url)
webpage = self._download_webpage(req, video_id)
- escaped_data = self._search_regex(
- r'unescape\("([^"]+)"\)', webpage, 'escaped data')
+ try:
+ escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
+ except IndexError:
+ raise ExtractorError('Unable to extract escaped data')
+
playlist = compat_urllib_parse.unquote(escaped_data)
thumbnail = self._search_regex(
authentication.add_option(
'-p', '--password',
dest='password', metavar='PASSWORD',
- help='account password')
+ help='account password. If this option is left out, youtube-dl will ask interactively.')
authentication.add_option(
'-2', '--twofactor',
dest='twofactor', metavar='TWOFACTOR',
'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
'You can also use the special names "best",'
' "bestvideo", "bestaudio", "worst". '
+ ' You can filter the video results by putting a condition in'
+ ' brackets, as in -f "best[height=720]"'
+ ' (or -f "[filesize>10M]"). '
+ ' This works for filesize, height, width, tbr, abr, and vbr'
+ ' and the comparisons <, <=, >, >=, =, != .'
+ ' Formats for which the value is not known are excluded unless you'
+ ' put a question mark (?) after the operator.'
+ ' You can combine format filters, so '
+ '-f "[height <=? 720][tbr>500]" '
+ 'selects up to 720p videos (or videos where the height is not '
+ 'known) with a bitrate of at least 500 KBit/s.'
' By default, youtube-dl will pick the best quality.'
' Use commas to download multiple audio formats, such as'
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
except AttributeError: # Python < 3
getheader = url_handle.info().getheader
+ cd = getheader('Content-Disposition')
+ if cd:
+ m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+ if m:
+ e = determine_ext(m.group('filename'), default_ext=None)
+ if e:
+ return e
+
return getheader('Content-Type').split("/")[1]
if content_limit is None:
return False # Content available for everyone
return age_limit < content_limit
+
+
+def is_html(first_bytes):
+ """ Detect whether a file contains HTML by examining its first bytes. """
+
+ BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+ ]
+ for bom, enc in BOMS:
+ if first_bytes.startswith(bom):
+ s = first_bytes[len(bom):].decode(enc, 'replace')
+ break
+ else:
+ s = first_bytes.decode('utf-8', 'replace')
+
+ return re.match(r'^\s*<', s)
from __future__ import unicode_literals
-__version__ = '2015.01.16'
+__version__ = '2015.01.23.1'