import sys
import time
-if os.name == 'nt':
- import ctypes
-
from .utils import (
compat_urllib_error,
compat_urllib_request,
def to_stderr(self, message):
self.ydl.to_screen(message)
- def to_cons_title(self, message):
- """Set console/terminal window title to message."""
- if not self.params.get('consoletitle', False):
- return
- if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
- # c_wchar_p() might not be necessary if `message` is
- # already of type unicode()
- ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
- elif 'TERM' in os.environ:
- self.to_screen('\033]0;%s\007' % message, skip_eol=True)
+ def to_console_title(self, message):
+ self.ydl.to_console_title(message)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
else:
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
- self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
+ self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
def report_resuming_byte(self, resume_len):
import time
import traceback
-from .utils import *
+if os.name == 'nt':
+ import ctypes
+
+from .utils import (
+ compat_http_client,
+ compat_print,
+ compat_str,
+ compat_urllib_error,
+ compat_urllib_request,
+ ContentTooShortError,
+ date_from_str,
+ DateRange,
+ determine_ext,
+ DownloadError,
+ encodeFilename,
+ ExtractorError,
+ locked_file,
+ MaxDownloadsReached,
+ PostProcessingError,
+ preferredencoding,
+ SameFileError,
+ sanitize_filename,
+ subtitles_filename,
+ takewhile_inclusive,
+ UnavailableVideoError,
+ write_json_file,
+ write_string,
+)
from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader
output = output.encode(preferredencoding())
sys.stderr.write(output)
+ def to_console_title(self, message):
+ if not self.params.get('consoletitle', False):
+ return
+ if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
+ # c_wchar_p() might not be necessary if `message` is
+ # already of type unicode()
+ ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+ elif 'TERM' in os.environ:
+ write_string(u'\033]0;%s\007' % message, self._screen_file)
+
+ def save_console_title(self):
+ if not self.params.get('consoletitle', False):
+ return
+ if 'TERM' in os.environ:
+ # Save the title on stack
+ write_string(u'\033[22;0t', self._screen_file)
+
+ def restore_console_title(self):
+ if not self.params.get('consoletitle', False):
+ return
+ if 'TERM' in os.environ:
+ # Restore the title from stack
+ write_string(u'\033[23;0t', self._screen_file)
+
+ def __enter__(self):
+ self.save_console_title()
+ return self
+
+ def __exit__(self, *args):
+ self.restore_console_title()
+
def fixed_template(self):
"""Checks if the output template is fixed."""
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
- except (UnicodeEncodeError) as err:
+ except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def increment_downloads(self):
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
if result_type == 'video':
self.add_extra_info(ie_result, extra_info)
- return self.process_video_result(ie_result)
+ return self.process_video_result(ie_result, download=download)
elif result_type == 'url':
# We have to add extra_info to the results because it may be
# contained in a playlist
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
- sub_format = self.params.get('subtitlesformat')
+ sub_format = self.params.get('subtitlesformat', 'srt')
for sub_lang in subtitles.keys():
sub = subtitles[sub_lang]
if sub is None:
return res
def list_formats(self, info_dict):
+ def format_note(fdict):
+ if fdict.get('format_note') is not None:
+ return fdict['format_note']
+ res = u''
+ if fdict.get('vcodec') is not None:
+ res += u'%-5s' % fdict['vcodec']
+ elif fdict.get('vbr') is not None:
+ res += u'video'
+ if fdict.get('vbr') is not None:
+ res += u'@%4dk' % fdict['vbr']
+ if fdict.get('acodec') is not None:
+ if res:
+ res += u', '
+ res += u'%-5s' % fdict['acodec']
+ elif fdict.get('abr') is not None:
+ if res:
+ res += u', '
+ res += 'audio'
+ if fdict.get('abr') is not None:
+ res += u'@%3dk' % fdict['abr']
+ return res
+
def line(format):
return (u'%-20s%-10s%-12s%s' % (
format['format_id'],
format['ext'],
self.format_resolution(format),
- format.get('format_note', ''),
+ format_note(format),
)
)
formats = info_dict.get('formats', [info_dict])
formats_s = list(map(line, formats))
if len(formats) > 1:
- formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)'
- formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)'
+ formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
+ formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
header_line = line({
'format_id': u'format code', 'ext': u'extension',
'Ismael Mejía',
'Steffan \'Ruirize\' James',
'Andras Elso',
+ 'Jelle van der Waa',
+ 'Marcin Cieślak',
)
__license__ = 'Public Domain'
u' file! Use "%%(ext)s" instead of %r' %
determine_ext(outtmpl, u''))
- # YoutubeDL
- ydl = YoutubeDL({
+ ydl_opts = {
'usenetrc': opts.usenetrc,
'username': opts.username,
'password': opts.password,
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': opts.download_archive,
- })
+ }
- if opts.verbose:
- write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
- try:
- sp = subprocess.Popen(
- ['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- write_string(u'[debug] Git HEAD: ' + out + u'\n')
- except:
+ with YoutubeDL(ydl_opts) as ydl:
+ if opts.verbose:
+ write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
- sys.exc_clear()
+ sp = subprocess.Popen(
+ ['git', 'rev-parse', '--short', 'HEAD'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ cwd=os.path.dirname(os.path.abspath(__file__)))
+ out, err = sp.communicate()
+ out = out.decode().strip()
+ if re.match('[0-9a-f]+', out):
+ write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
- pass
- write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
-
- proxy_map = {}
- for handler in opener.handlers:
- if hasattr(handler, 'proxies'):
- proxy_map.update(handler.proxies)
- write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
-
- ydl.add_default_info_extractors()
-
- # PostProcessors
- # Add the metadata pp first, the other pps will copy it
- if opts.addmetadata:
- ydl.add_post_processor(FFmpegMetadataPP())
- if opts.extractaudio:
- ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
- if opts.recodevideo:
- ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
- if opts.embedsubtitles:
- ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
-
- # Update version
- if opts.update_self:
- update_self(ydl.to_screen, opts.verbose)
-
- # Maybe do nothing
- if len(all_urls) < 1:
- if not opts.update_self:
- parser.error(u'you must provide at least one URL')
- else:
- sys.exit()
+ try:
+ sys.exc_clear()
+ except:
+ pass
+ write_string(u'[debug] Python version %s - %s' %
+ (platform.python_version(), platform_name()) + u'\n')
+
+ proxy_map = {}
+ for handler in opener.handlers:
+ if hasattr(handler, 'proxies'):
+ proxy_map.update(handler.proxies)
+ write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
+
+ ydl.add_default_info_extractors()
+
+ # PostProcessors
+ # Add the metadata pp first, the other pps will copy it
+ if opts.addmetadata:
+ ydl.add_post_processor(FFmpegMetadataPP())
+ if opts.extractaudio:
+ ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
+ if opts.recodevideo:
+ ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
+ if opts.embedsubtitles:
+ ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
+
+ # Update version
+ if opts.update_self:
+ update_self(ydl.to_screen, opts.verbose)
+
+ # Maybe do nothing
+ if len(all_urls) < 1:
+ if not opts.update_self:
+ parser.error(u'you must provide at least one URL')
+ else:
+ sys.exit()
- try:
- retcode = ydl.download(all_urls)
- except MaxDownloadsReached:
- ydl.to_screen(u'--max-download limit reached, aborting.')
- retcode = 101
+ try:
+ retcode = ydl.download(all_urls)
+ except MaxDownloadsReached:
+ ydl.to_screen(u'--max-download limit reached, aborting.')
+ retcode = 101
# Dump cookie jar if requested
if opts.cookiefile is not None:
)
from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE
+from .gamekings import GamekingsIE
from .gamespot import GameSpotIE
from .gametrailers import GametrailersIE
from .generic import GenericIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
-from .livestream import LivestreamIE
+from .livestream import LivestreamIE, LivestreamOriginalIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE
from .slideshare import SlideshareIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
-from .southparkstudios import SouthParkStudiosIE
+from .southparkstudios import (
+ SouthParkStudiosIE,
+ SouthparkDeIE,
+)
from .space import SpaceIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
+from .tvp import TvpIE
from .unistra import UnistraIE
from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE
lang = mobj.group('lang')
return self._extract_liveweb(url, name, lang)
- if re.search(self._LIVE_URL, video_id) is not None:
+ if re.search(self._LIVE_URL, url) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
# return
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
- url_node = video_doc.find('urlSd')
+ url_node = event_doc.find('urlSd')
return {'id': video_id,
'title': event_doc.find('name%s' % lang.capitalize()).text,
-import os.path
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
- compat_urllib_parse_urlparse,
+ determine_ext,
+ ExtractorError,
)
class AUEngineIE(InfoExtractor):
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
webpage, u'title')
title = title.strip()
- links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage)
- links = [compat_urllib_parse.unquote(l) for l in links]
+ links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
+ links = map(compat_urllib_parse.unquote, links)
+
+ thumbnail = None
+ video_url = None
for link in links:
- root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path)
- if pathext == '.png':
+ if link.endswith('.png'):
thumbnail = link
- elif pathext == '.mp4':
- url = link
- ext = pathext
+ elif '/videos/' in link:
+ video_url = link
+ if not video_url:
+ raise ExtractorError(u'Could not find video URL')
+ ext = u'.' + determine_ext(video_url)
if ext == title[-len(ext):]:
title = title[:-len(ext)]
- ext = ext[1:]
- return [{
+
+ return {
'id': video_id,
- 'url': url,
- 'ext': ext,
+ 'url': video_url,
'title': title,
'thumbnail': thumbnail,
- }]
+ }
_TEST = {
u'url': u'http://bambuser.com/v/4050584',
- u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
+ # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
+ #u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
u'info_dict': {
u'id': u'4050584',
u'ext': u'flv',
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
+ * abr Average audio bitrate in KBit/s
+ * acodec Name of the audio codec in use
+ * vbr Average video bitrate in KBit/s
+ * vcodec Name of the video codec in use
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
# Helper functions for extracting OpenGraph info
@staticmethod
- def _og_regex(prop):
- return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
+ def _og_regexes(prop):
+ content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
+ property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
+ template = r'<meta[^>]+?%s[^>]+?%s'
+ return [
+ template % (property_re, content_re),
+ template % (content_re, property_re),
+ ]
def _og_search_property(self, prop, html, name=None, **kargs):
if name is None:
name = 'OpenGraph %s' % prop
- escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
+ escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
if escaped is None:
return None
return unescapeHTML(escaped)
return self._og_search_property('title', html, **kargs)
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
- regexes = [self._og_regex('video')]
- if secure: regexes.insert(0, self._og_regex('video:secure_url'))
+ regexes = self._og_regexes('video')
+ if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
def _rta_search(self, html):
webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum)
- playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
+ playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
--- /dev/null
+import re
+
+from .common import InfoExtractor
+
+
+class GamekingsIE(InfoExtractor):
+ _VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
+ _TEST = {
+ u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
+ u'file': u'20130811.mp4',
+ # MD5 is flaky, seems to change regularly
+ #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
+ u'info_dict': {
+ u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
+ u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
+ }
+ }
+
+ def _real_extract(self, url):
+
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ webpage = self._download_webpage(url, name)
+ video_url = self._og_search_video_url(webpage)
+
+ video = re.search(r'[0-9]+', video_url)
+ video_id = video.group(0)
+
+ # Todo: add medium format
+ video_url = video_url.replace(video_id, 'large/' + video_id)
+
+ return {
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'url': video_url,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ }
raise ExtractorError(u'Failed to download URL: %s' % url)
self.report_extraction(video_id)
+
+ # it's tempting to parse this further, but you would
+ # have to take into account all the variations like
+ # Video Title - Site Name
+ # Site Name | Video Title
+ # Video Title - Tagline | Site Name
+ # and so on and so forth; it's just not practical
+ video_title = self._html_search_regex(r'<title>(.*)</title>',
+ webpage, u'video title', default=u'video', flags=re.DOTALL)
+
# Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
if bc_url is not None:
return self.url_result(surl, 'Vimeo')
# Look for embedded YouTube player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?youtube.com/embed/.+?)\1', webpage)
- if mobj:
- surl = unescapeHTML(mobj.group(u'url'))
- return self.url_result(surl, 'Youtube')
+ matches = re.findall(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
+ if matches:
+ urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
+ for tuppl in matches]
+ return self.playlist_result(
+ urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
video_extension = os.path.splitext(video_id)[1][1:]
video_id = os.path.splitext(video_id)[0]
- # it's tempting to parse this further, but you would
- # have to take into account all the variations like
- # Video Title - Site Name
- # Site Name | Video Title
- # Video Title - Tagline | Site Name
- # and so on and so forth; it's just not practical
- video_title = self._html_search_regex(r'<title>(.*)</title>',
- webpage, u'video title', default=u'video', flags=re.DOTALL)
-
# video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
url, u'video uploader')
import re
import json
+import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urlparse,
get_meta_content,
+ xpath_with_ns,
ExtractorError,
)
class LivestreamIE(InfoExtractor):
+ IE_NAME = u'livestream'
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_TEST = {
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
info = json.loads(self._download_webpage(api_url, video_id,
u'Downloading video info'))
return self._extract_video_info(info)
+
+
+# The original version of Livestream uses a different system
+class LivestreamOriginalIE(InfoExtractor):
+ IE_NAME = u'livestream:original'
+ _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
+ _TEST = {
+ u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+ u'info_dict': {
+ u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+ u'ext': u'flv',
+ u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
+ },
+ u'params': {
+ # rtmp
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ user = mobj.group('user')
+ api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
+
+ api_response = self._download_webpage(api_url, video_id)
+ info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
+ item = info.find('channel').find('item')
+ ns = {'media': 'http://search.yahoo.com/mrss'}
+ thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
+ # Remove the extension and number from the path (like 1.jpg)
+ path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
+
+ return {
+ 'id': video_id,
+ 'title': item.find('title').text,
+ 'url': 'rtmp://extondemand.livestream.com/ondemand',
+ 'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
+ 'ext': 'flv',
+ 'thumbnail': thumbnail_url,
+ }
def _transform_rtmp_url(rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
if not m:
- raise ExtractorError(u'Cannot transform RTMP url')
+ return rtmp_video_url
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
return base + m.group('finalid')
_TEST = {
u'url': u'http://www.redtube.com/66418',
u'file': u'66418.mp4',
- u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
+ # md5 varies from time to time, as in
+ # https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
+ #u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
u'info_dict': {
u"title": u"Sucked on a toilet",
u"age_limit": 18,
u'skip_download': True,
},
},
- {
- u'url': u'http://www.rtlnitronow.de/recht-ordnung/stadtpolizei-frankfurt-gerichtsvollzieher-leipzig.php?film_id=129679&player=1&season=1',
- u'file': u'129679.flv',
- u'info_dict': {
- u'upload_date': u'20131016',
- u'title': u'Recht & Ordnung - Stadtpolizei Frankfurt/ Gerichtsvollzieher...',
- u'description': u'Stadtpolizei Frankfurt/ Gerichtsvollzieher Leipzig',
- },
- u'params': {
- u'skip_download': True,
- },
- },
{
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
u'file': u'124903.flv',
'uploader': info['user']['username'],
'upload_date': unified_strdate(info['created_at']),
'title': info['title'],
- 'ext': u'mp3',
+ 'ext': info.get('original_format', u'mp3'),
'description': info['description'],
'thumbnail': thumbnail,
}
class SouthParkStudiosIE(MTVIE):
IE_NAME = u'southparkstudios.com'
- _VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)'
+ _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
- _TEST = {
+ # Overwrite MTVIE properties we don't want
+ _TESTS = [{
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
u'info_dict': {
u'title': u'Bat Daded',
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
},
- }
-
- # Overwrite MTVIE properties we don't want
- _TESTS = []
+ }]
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
+ url = u'http://www.' + mobj.group(u'url')
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
webpage, u'mgid')
return self._get_videos_info(mgid)
+
+class SouthparkDeIE(SouthParkStudiosIE):
+ IE_NAME = u'southpark.de'
+ _VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
+ u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
+ u'info_dict': {
+ u'title': u'The Government Won\'t Respect My Privacy',
+ u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
+ },
+ }]
import xml.etree.ElementTree
from .common import InfoExtractor
+from ..utils import determine_ext
class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
- _TEST = {
+ _TESTS = [{
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
u'file': u'1259285.mp4',
u'md5': u'2c2754212136f35fb4b19767d242f66e',
u'info_dict': {
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
}
- }
+ },
+ {
+ u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
+ u'file': u'1309159.mp4',
+ u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
+ u'info_dict': {
+ u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
+ }
+ }]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, video_id)
- video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
- webpage, u'title')
+ video_title = self._html_search_regex(
+ r'<div class="module-title">(.*?)</div>', webpage, u'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
- xml_code = self._download_webpage(xml_url, video_id,
- note=u'Downloading XML', errnote=u'Failed to download XML')
+ xml_code = self._download_webpage(
+ xml_url, video_id,
+ note=u'Downloading XML', errnote=u'Failed to download XML')
idoc = xml.etree.ElementTree.fromstring(xml_code)
- last_type = idoc[-1]
- filename = last_type.findall('./filename')[0].text
- duration = float(last_type.findall('./duration')[0].text)
- video_url = 'http://video2.spiegel.de/flash/' + filename
- video_ext = filename.rpartition('.')[2]
+ formats = [
+ {
+ 'format_id': n.tag.rpartition('type')[2],
+ 'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
+ 'width': int(n.find('./width').text),
+ 'height': int(n.find('./height').text),
+ 'abr': int(n.find('./audiobitrate').text),
+ 'vbr': int(n.find('./videobitrate').text),
+ 'vcodec': n.find('./codec').text,
+ 'acodec': 'MP4A',
+ }
+ for n in list(idoc)
+ # Blacklist type 6, it's extremely LQ and not available on the same server
+ if n.tag.startswith('type') and n.tag != 'type6'
+ ]
+ formats.sort(key=lambda f: f['vbr'])
+ duration = float(idoc[0].findall('./duration')[0].text)
+
info = {
'id': video_id,
- 'url': video_url,
- 'ext': video_ext,
'title': video_title,
'duration': duration,
+ 'formats': formats,
}
- return [info]
+ return info
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
return [self._playlist_videos_info(url,name,playlist_id)]
- def _playlist_videos_info(self,url,name,playlist_id=0):
+
+ def _playlist_videos_info(self, url, name, playlist_id):
'''Returns the videos of the playlist'''
- video_RE=r'''
- <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
- ([.\s]*?)data-playlist_item_id="(\d+)"
- ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
- '''
- video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
- webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
- m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
- m_names=re.finditer(video_name_RE,webpage)
+
+ webpage = self._download_webpage(
+ url, playlist_id, u'Downloading playlist webpage')
+ matches = re.finditer(
+ r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
+ webpage)
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
webpage, 'playlist title')
- playlist_entries = []
- for m_video, m_name in zip(m_videos,m_names):
- talk_url='http://www.ted.com%s' % m_name.group('talk_url')
- playlist_entries.append(self.url_result(talk_url, 'TED'))
- return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title)
+ playlist_entries = [
+ self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
+ for m in matches
+ ]
+ return self.playlist_result(
+ playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
def _talk_info(self, url, video_id=0):
"""Return the video for the talk in the url"""
'ext': 'mp4',
'url': stream['file'],
'format': stream['id']
- } for stream in info['htmlStreams']]
+ } for stream in info['htmlStreams']]
video_id = info['id']
self._list_available_subtitles(video_id, webpage)
return
- info = {
+ return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}
- # TODO: Remove when #980 has been merged
- info.update(info['formats'][-1])
-
- return info
-
def _get_available_subtitles(self, video_id, webpage):
try:
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
--- /dev/null
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class TvpIE(InfoExtractor):
+ IE_NAME = u'tvp.pl'
+ _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
+
+ _TEST = {
+ u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
+ u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
+ u'file': u'12878238.wmv',
+ u'info_dict': {
+ u'title': u'31.10.2013 - Odcinek 2',
+ u'description': u'31.10.2013 - Odcinek 2',
+ },
+ u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
+ json_params = self._download_webpage(
+ json_url, video_id, u"Downloading video metadata")
+
+ params = json.loads(json_params)
+ self.report_extraction(video_id)
+ video_url = params['video_url']
+
+ title = self._og_search_title(webpage, fatal=True)
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'ext': 'wmv',
+ 'url': video_url,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
continue
format_url = self._SMIL_BASE_URL + m.group('path')
- format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' %
- m.groupdict())
formats.append({
'url': format_url,
'format_id': u'SMIL_' + m.group('cbr'),
- 'format_note': format_note,
+ 'vcodec': m.group('vcodec'),
+ 'acodec': m.group('acodec'),
+ 'vbr': int(m.group('vbr')),
+ 'abr': int(m.group('abr')),
'ext': m.group('ext'),
'width': int(m.group('width')),
'height': int(m.group('height')),
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
IE_DESC = u'YouTube.com'
- _VALID_URL = r"""^
+ _VALID_URL = r"""(?x)^
(
- (?:https?://)? # http(s):// (optional)
- (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
+ (?:https?://|//)? # http(s):// or protocol-independent URL (optional)
+ (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
tube\.majestyc\.net/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
u"uploader_id": u"justintimberlakeVEVO"
}
},
+ {
+ u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
+ u"file": u"yZIXLfi8CZQ.mp4",
+ u"note": u"Embed-only video (#1746)",
+ u"info_dict": {
+ u"upload_date": u"20120608",
+ u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
+ u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
+ u"uploader": u"SET India",
+ u"uploader_id": u"setindia"
+ }
+ },
]
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
if YoutubePlaylistIE.suitable(url): return False
- return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
+ return re.match(cls._VALID_URL, url) is not None
def __init__(self, *args, **kwargs):
super(YoutubeIE, self).__init__(*args, **kwargs)
"""Turn the encrypted s field into a working signature"""
if player_url is not None:
+ if player_url.startswith(u'//'):
+ player_url = u'https:' + player_url
try:
player_id = (player_url, len(s))
if player_id not in self._player_cache:
params = compat_urllib_parse.urlencode({
'lang': lang,
'v': video_id,
- 'fmt': self._downloader.params.get('subtitlesformat'),
+ 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
'name': l[0].encode('utf-8'),
})
url = u'http://www.youtube.com/api/timedtext?' + params
def _get_available_automatic_caption(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
- sub_format = self._downloader.params.get('subtitlesformat')
+ sub_format = self._downloader.params.get('subtitlesformat', 'srt')
self.to_screen(u'%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = u'Couldn\'t find automatic captions for %s' % video_id
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
data = compat_urllib_parse.urlencode({'video_id': video_id,
- 'el': 'embedded',
+ 'el': 'player_embedded',
'gl': 'US',
'hl': 'en',
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
else:
raise ExtractorError(u'"token" parameter not in video info for unknown reason')
+ if 'view_count' in video_info:
+ view_count = int(video_info['view_count'][0])
+ else:
+ view_count = None
+
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
raise ExtractorError(u'"rental" videos not supported')
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+ 'view_count': view_count,
})
return results
# Download channel page
channel_id = mobj.group(1)
video_ids = []
+ url = 'https://www.youtube.com/channel/%s/videos' % channel_id
+ channel_page = self._download_webpage(url, channel_id)
+ if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
+ autogenerated = True
+ else:
+ autogenerated = False
- # Download all channel pages using the json-based channel_ajax query
- for pagenum in itertools.count(1):
- url = self._MORE_PAGES_URL % (pagenum, channel_id)
- page = self._download_webpage(url, channel_id,
- u'Downloading page #%s' % pagenum)
-
- page = json.loads(page)
-
- ids_in_page = self.extract_videos_from_page(page['content_html'])
- video_ids.extend(ids_in_page)
-
- if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
- break
+ if autogenerated:
+ # The videos are contained in a single page
+ # the ajax pages can't be used, they are empty
+ video_ids = self.extract_videos_from_page(channel_page)
+ else:
+ # Download all channel pages using the json-based channel_ajax query
+ for pagenum in itertools.count(1):
+ url = self._MORE_PAGES_URL % (pagenum, channel_id)
+ page = self._download_webpage(url, channel_id,
+ u'Downloading page #%s' % pagenum)
+
+ page = json.loads(page)
+
+ ids_in_page = self.extract_videos_from_page(page['content_html'])
+ video_ids.extend(ids_in_page)
+
+ if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
+ break
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
import json
import traceback
import hashlib
+import os
import subprocess
import sys
from zipimport import zipimporter
-from .utils import *
+from .utils import (
+ compat_str,
+ compat_urllib_request,
+)
from .version import __version__
def rsa_verify(message, signature, key):
-__version__ = '2013.11.11'
+__version__ = '2013.11.19'