ngld
nyuszika7h
Shaun Walbridge
+Lee Jenkins
+Anssi Hannula
+Lukáš Lalinský
- **GodTube**
- **GoldenMoustache**
- **Golem**
- - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
+ - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
- **Goshgay**
- **Groupon**
- **Hark**
- **mtvservices:embedded**
- **MuenchenTV**: münchen.tv
- **MusicPlayOn**
- - **MusicVault**
- **muzu.tv**
- **Mwave**
- **MySpace**
- **vine:user**
- **vk**: VK
- **vk:uservideos**: VK - User's Videos
+ - **vlive**
- **Vodlocker**
- **VoiceRepublic**
- **Vporn**
TEST_ID = 'gr51aVj-mLg'
-ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
+ANNOTATIONS_FILE = TEST_ID + '.annotations.xml'
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
return False
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
ctx['dest_stream'].write(down.read())
+ down.close()
frags_filenames.append(frag_sanitized)
self._finish_frag_download(ctx)
)
from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
-from .musicvault import MusicVaultIE
from .muzu import MuzuTVIE
from .mwave import MwaveIE
from .myspace import MySpaceIE, MySpaceAlbumIE
OoyalaIE,
OoyalaExternalIE,
)
-from .openfilm import OpenFilmIE
from .orf import (
ORFTVthekIE,
ORFOE1IE,
@staticmethod
def _hidden_inputs(html):
+ html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
hidden_inputs = {}
- for input in re.findall(r'<input([^>]+)>', html):
+ for input in re.findall(r'(?i)<input([^>]+)>', html):
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
continue
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
def _form_hidden_inputs(self, form_id, html):
form = self._search_regex(
- r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+ r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
html, '%s form' % form_id, group='form')
return self._hidden_inputs(form)
)
-class CrunchyrollIE(InfoExtractor):
+class CrunchyrollBaseIE(InfoExtractor):
+ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
+ request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
+ else compat_urllib_request.Request(url_or_request))
+ # Accept-Language must be set explicitly to accept any language to avoid issues
+ # similar to https://github.com/rg3/youtube-dl/issues/6797.
+ # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
+ # should be imposed or not (from what I can see it just takes the first language
+ # ignoring the priority and requires it to correspond the IP). By the way this causes
+ # Crunchyroll to not work in georestriction cases in some browsers that don't place
+ # the locale lang first in header. However allowing any language seems to workaround the issue.
+ request.add_header('Accept-Language', '*')
+ return super(CrunchyrollBaseIE, self)._download_webpage(
+ request, video_id, note, errnote, fatal, tries, timeout, encoding)
+
+
+class CrunchyrollIE(CrunchyrollBaseIE):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_NETRC_MACHINE = 'crunchyroll'
_TESTS = [{
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
if not video_description:
video_description = None
- video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
+ video_upload_date = self._html_search_regex(
+ [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
+ webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
- video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
+ video_uploader = self._html_search_regex(
+ r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
+ 'video_uploader', fatal=False)
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
}
-class CrunchyrollShowPlaylistIE(InfoExtractor):
+class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
IE_NAME = "crunchyroll:playlist"
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
+++ /dev/null
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class MusicVaultIE(InfoExtractor):
- _VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
- _TEST = {
- 'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
- 'md5': '3adcbdb3dcc02d647539e53f284ba171',
- 'info_dict': {
- 'id': '1010863',
- 'ext': 'mp4',
- 'uploader_id': 'the-allman-brothers-band',
- 'title': 'Straight from the Heart',
- 'duration': 244,
- 'uploader': 'The Allman Brothers Band',
- 'thumbnail': 're:^https?://.*/thumbnail/.*',
- 'upload_date': '20131219',
- 'location': 'Capitol Theatre (Passaic, NJ)',
- 'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
- 'timestamp': int,
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
- webpage = self._download_webpage(url, display_id)
-
- thumbnail = self._search_regex(
- r'<meta itemprop="thumbnail" content="([^"]+)"',
- webpage, 'thumbnail', fatal=False)
-
- data_div = self._search_regex(
- r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields')
- uploader = self._html_search_regex(
- r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
- title = self._html_search_regex(
- r'<h2.*?>(.*?)</h2>', data_div, 'title')
- location = self._html_search_regex(
- r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
-
- kaltura_id = self._search_regex(
- r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
- webpage, 'kaltura ID')
- wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
-
- return {
- 'id': mobj.group('id'),
- '_type': 'url_transparent',
- 'url': 'kaltura:%s:%s' % (wid, kaltura_id),
- 'ie_key': 'Kaltura',
- 'display_id': display_id,
- 'uploader_id': mobj.group('uploader_id'),
- 'thumbnail': thumbnail,
- 'description': self._html_search_meta('description', webpage),
- 'location': location,
- 'title': title,
- 'uploader': uploader,
- }
+++ /dev/null
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
-from ..utils import (
- parse_iso8601,
- parse_age_limit,
- int_or_none,
-)
-
-
-class OpenFilmIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P<id>.+)'
- _TEST = {
- 'url': 'http://www.openfilm.com/videos/human-resources-remastered',
- 'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37',
- 'info_dict': {
- 'id': '32736',
- 'display_id': 'human-resources-remastered',
- 'ext': 'mp4',
- 'title': 'Human Resources (Remastered)',
- 'description': 'Social Engineering in the 20th Century.',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'duration': 7164,
- 'timestamp': 1334756988,
- 'upload_date': '20120418',
- 'uploader_id': '41117',
- 'view_count': int,
- 'age_limit': 0,
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- player = compat_urllib_parse_unquote_plus(
- self._og_search_video_url(webpage))
-
- video = json.loads(self._search_regex(
- r'\bp=({.+?})(?:&|$)', player, 'video JSON'))
-
- video_url = '%s1.mp4' % video['location']
- video_id = video.get('video_id')
- display_id = video.get('alias') or display_id
- title = video.get('title')
- description = video.get('description')
- thumbnail = video.get('main_thumb')
- duration = int_or_none(video.get('duration'))
- timestamp = parse_iso8601(video.get('dt_published'), ' ')
- uploader_id = video.get('user_id')
- view_count = int_or_none(video.get('views_count'))
- age_limit = parse_age_limit(video.get('age_limit'))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'url': video_url,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'uploader_id': uploader_id,
- 'view_count': view_count,
- 'age_limit': age_limit,
- }
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
+ ExtractorError,
int_or_none,
float_or_none,
- str_to_int,
+ parse_iso8601,
)
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'https://vid.me/QNB',
- 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+ 'md5': 'c62f1156138dc3323902188c5b5a8bd6',
'info_dict': {
'id': 'QNB',
'ext': 'mp4',
'title': 'Fishing for piranha - the easy way',
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
- 'duration': 119.92,
+ 'thumbnail': 're:^https?://.*\.jpg',
'timestamp': 1406313244,
'upload_date': '20140725',
+ 'age_limit': 0,
+ 'duration': 119.92,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }, {
+ 'url': 'https://vid.me/Gc6M',
+ 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+ 'info_dict': {
+ 'id': 'Gc6M',
+ 'ext': 'mp4',
+ 'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
'thumbnail': 're:^https?://.*\.jpg',
+ 'timestamp': 1441211642,
+ 'upload_date': '20150902',
+ 'uploader': 'SunshineM',
+ 'uploader_id': '3552827',
+ 'age_limit': 0,
+ 'duration': 223.72,
'view_count': int,
'like_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
},
}, {
# tests uploader field
'ext': 'mp4',
'title': 'The Carver',
'description': 'md5:e9c24870018ae8113be936645b93ba3c',
- 'duration': 97.859999999999999,
+ 'thumbnail': 're:^https?://.*\.jpg',
'timestamp': 1433203629,
'upload_date': '20150602',
'uploader': 'Thomas',
- 'thumbnail': 're:^https?://.*\.jpg',
+ 'uploader_id': '109747',
+ 'age_limit': 0,
+ 'duration': 97.859999999999999,
'view_count': int,
'like_count': int,
+ 'comment_count': int,
},
'params': {
'skip_download': True,
},
}, {
- # From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
+ # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
'url': 'https://vid.me/e/Wmur',
- 'only_matching': True,
+ 'info_dict': {
+ 'id': 'Wmur',
+ 'ext': 'mp4',
+ 'title': 'naked smoking & stretching',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'timestamp': 1430931613,
+ 'upload_date': '20150506',
+ 'uploader': 'naked-yogi',
+ 'uploader_id': '1638622',
+ 'age_limit': 18,
+ 'duration': 653.26999999999998,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
- url = url.replace('vid.me/e/', 'vid.me/')
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- video_url = self._html_search_regex(
- r'<source src="([^"]+)"', webpage, 'video URL')
+ try:
+ response = self._download_json(
+ 'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ response = self._parse_json(e.cause.read(), video_id)
+ else:
+ raise
+
+ error = response.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error), expected=True)
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage, default='')
- thumbnail = self._og_search_thumbnail(webpage)
- timestamp = int_or_none(self._og_search_property(
- 'updated_time', webpage, fatal=False))
- width = int_or_none(self._og_search_property(
- 'video:width', webpage, fatal=False))
- height = int_or_none(self._og_search_property(
- 'video:height', webpage, fatal=False))
- duration = float_or_none(self._html_search_regex(
- r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
- view_count = str_to_int(self._html_search_regex(
- r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?',
- webpage, 'view count', fatal=False))
- like_count = str_to_int(self._html_search_regex(
- r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
- webpage, 'like count', fatal=False))
- uploader = self._html_search_regex(
- 'class="video_author_username"[^>]*>([^<]+)',
- webpage, 'uploader', default=None)
+ video = response['video']
+
+ formats = [{
+ 'format_id': f.get('type'),
+ 'url': f['uri'],
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ } for f in video.get('formats', []) if f.get('uri')]
+ self._sort_formats(formats)
+
+ title = video['title']
+ description = video.get('description')
+ thumbnail = video.get('thumbnail_url')
+ timestamp = parse_iso8601(video.get('date_created'), ' ')
+ uploader = video.get('user', {}).get('username')
+ uploader_id = video.get('user', {}).get('user_id')
+ age_limit = 18 if video.get('nsfw') is True else 0
+ duration = float_or_none(video.get('duration'))
+ view_count = int_or_none(video.get('view_count'))
+ like_count = int_or_none(video.get('likes_count'))
+ comment_count = int_or_none(video.get('comment_count'))
return {
'id': video_id,
- 'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'age_limit': age_limit,
'timestamp': timestamp,
- 'width': width,
- 'height': height,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
- 'uploader': uploader,
+ 'comment_count': comment_count,
+ 'formats': formats,
}
from __future__ import unicode_literals
import re
+import itertools
from .common import InfoExtractor
mobj = re.match(self._VALID_URL, url)
program = mobj.group('program')
- webpage = self._download_webpage(url, program)
-
page_id = mobj.group('page')
if page_id:
page_id = int(page_id)
start_page = page_id
- last_page = start_page + 1
playlist_id = '%s-page%d' % (program, page_id)
else:
start_page = 0
- last_page = int(self._search_regex(
- r'videos\?page=(\d+)">laatste</a>',
- webpage, 'last page', default=0)) + 1
playlist_id = program
entries = []
- for current_page_id in range(start_page, last_page):
+ for current_page_id in itertools.count(start_page):
current_page = self._download_webpage(
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
program,
- 'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
+ 'Downloading page %d' % (current_page_id + 1))
page_entries = [
self.url_result('http://www.vier.be' + video_url, 'Vier')
for video_url in re.findall(
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
entries.extend(page_entries)
+ if page_id or '>Meer<' not in current_page:
+ break
return self.playlist_result(entries, playlist_id)
'title': 'Sinkhole of bureaucracy',
},
'playlist': [{
- 'md5': '79132cc09ec5309fa590ae46e4cc31bc',
+ 'md5': 'b9be794ceb56c7267d410a13f99d801a',
'info_dict': {
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
'title': 'Breaking Points: The Paper Mine',
- 'duration': 1287,
+ 'duration': 1290,
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
'uploader': 'The Washington Post',
'timestamp': 1395527908,
'upload_date': '20140322',
},
}, {
- 'md5': 'e1d5734c06865cc504ad99dc2de0d443',
+ 'md5': '1fff6a689d8770966df78c8cb6c8c17c',
'info_dict': {
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
'title': 'The town bureaucracy sustains',
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
- 'duration': 2217,
+ 'duration': 2220,
'timestamp': 1395528005,
'upload_date': '20140322',
'uploader': 'The Washington Post',
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from .youtube import YoutubeIE
class WimpIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
+ _VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)/'
_TESTS = [{
'url': 'http://www.wimp.com/maruexhausted/',
- 'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
+ 'md5': 'ee21217ffd66d058e8b16be340b74883',
'info_dict': {
'id': 'maruexhausted',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
}
}, {
- # youtube video
'url': 'http://www.wimp.com/clowncar/',
+ 'md5': '4e2986c793694b55b37cf92521d12bb4',
'info_dict': {
- 'id': 'cG4CEr2aiSg',
+ 'id': 'clowncar',
'ext': 'mp4',
- 'title': 'Basset hound clown car...incredible!',
- 'description': 'md5:8d228485e0719898c017203f900b3a35',
- 'uploader': 'Gretchen Hoey',
- 'uploader_id': 'gretchenandjeff1',
- 'upload_date': '20140303',
+ 'title': 'It\'s like a clown car.',
+ 'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
},
- 'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1)
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
[r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
_TESTS = [{
# Audio
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
- 'md5': '63a42c705772aa53fd4c1a0027f86adf',
+ 'md5': 'e79284c87b371424885448d11f6398c8',
'info_dict': {
'id': '3860914',
'ext': 'mp3',
}, {
'url': 'https://tw.news.yahoo.com/-100120367.html',
'only_matching': True,
+ }, {
+ # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
+ 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
+ 'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
+ 'info_dict': {
+ 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
+ 'ext': 'mp4',
+ 'title': 'Communitary - Community Episode 1: Ladders',
+ 'description': 'md5:8fc39608213295748e1e289807838c97',
+ 'duration': 1646,
+ },
}
]
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+ # Query result is often embedded in webpage as JSON. Sometimes explicit requests
+ # to video API results in a failure with geo restriction reason therefore using
+ # embedded query result when present sounds reasonable.
+ config_json = self._search_regex(
+ r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
+ webpage, 'videoplayer applet', default=None)
+ if config_json:
+ config = self._parse_json(config_json, display_id, fatal=False)
+ if config:
+ sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
+ if sapi:
+ return self._extract_info(display_id, sapi, webpage)
+
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
default=None)
video_id = info['id']
return self._get_info(video_id, display_id, webpage)
- def _get_info(self, video_id, display_id, webpage):
- region = self._search_regex(
- r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
- webpage, 'region', fatal=False, default='US')
- data = compat_urllib_parse.urlencode({
- 'protocol': 'http',
- 'region': region,
- })
- query_url = (
- 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
- '{id}?{data}'.format(id=video_id, data=data))
- query_result = self._download_json(
- query_url, display_id, 'Downloading video info')
-
- info = query_result['query']['results']['mediaObj'][0]
+ def _extract_info(self, display_id, query, webpage):
+ info = query['query']['results']['mediaObj'][0]
meta = info.get('meta')
+ video_id = info.get('id')
if not meta:
msg = info['status'].get('msg')
'ext': 'flv',
})
else:
+ if s.get('format') == 'm3u8_playlist':
+ format_info['protocol'] = 'm3u8_native'
+ format_info['ext'] = 'mp4'
format_url = compat_urlparse.urljoin(host, path)
format_info['url'] = format_url
formats.append(format_info)
'subtitles': subtitles,
}
+ def _get_info(self, video_id, display_id, webpage):
+ region = self._search_regex(
+ r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
+ webpage, 'region', fatal=False, default='US')
+ data = compat_urllib_parse.urlencode({
+ 'protocol': 'http',
+ 'region': region,
+ })
+ query_url = (
+ 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
+ '{id}?{data}'.format(id=video_id, data=data))
+ query_result = self._download_json(
+ query_url, display_id, 'Downloading video info')
+ return self._extract_info(display_id, query_result, webpage)
+
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = 'Yahoo screen search'
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
IE_NAME = 'youtube:show'
_TESTS = [{
- 'url': 'http://www.youtube.com/show/airdisasters',
- 'playlist_mincount': 3,
+ 'url': 'https://www.youtube.com/show/airdisasters',
+ 'playlist_mincount': 5,
'info_dict': {
'id': 'airdisasters',
'title': 'Air Disasters',
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(
- url, playlist_id, 'Downloading show webpage')
+ 'https://www.youtube.com/show/%s/playlists' % playlist_id, playlist_id, 'Downloading show webpage')
# There's one playlist for each season of the show
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
from __future__ import unicode_literals
-__version__ = '2015.09.03'
+__version__ = '2015.09.09'