90,
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
),
+ (
+ u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
+ u'js',
+ 84,
+ u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
+ ),
(
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
u'js',
'Ariset Llerena',
'Adam Malcontenti-Wilson',
'Tobias Bell',
+ 'Naglis Jonaitis',
)
__license__ = 'Public Domain'
from .facebook import FacebookIE
from .faz import FazIE
from .fc2 import FC2IE
+from .firedrive import FiredriveIE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
from .rai import RaiIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
+from .reverbnation import ReverbNationIE
from .ringtv import RingTVIE
from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE
from .rtlnow import RTLnowIE
from .rts import RTSIE
from .rtve import RTVEALaCartaIE
+from .ruhd import RUHDIE
from .rutube import (
RutubeIE,
RutubeChannelIE,
SoundcloudPlaylistIE
)
from .soundgasm import SoundgasmIE
-from .southparkstudios import (
- SouthParkStudiosIE,
+from .southpark import (
+ SouthParkIE,
SouthparkDeIE,
)
from .space import SpaceIE
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ compat_urllib_parse,
+ compat_urllib_request,
+ determine_ext,
+)
+
+
+class FiredriveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
+ '(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
+ _FILE_DELETED_REGEX = r'<div class="removed_file_image">'
+
+ _TESTS = [{
+ 'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
+ 'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
+ 'info_dict': {
+ 'id': 'FEB892FA160EBD01',
+ 'ext': 'flv',
+ 'title': 'bbb_theora_486kbit.flv',
+ 'thumbnail': 're:^http://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ url = 'http://firedrive.com/file/%s' % video_id
+
+ webpage = self._download_webpage(url, video_id)
+
+ if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+ raise ExtractorError('Video %s does not exist' % video_id,
+ expected=True)
+
+ fields = dict(re.findall(r'''(?x)<input\s+
+ type="hidden"\s+
+ name="([^"]+)"\s+
+ (?:id="[^"]+"\s+)?
+ value="([^"]*)"
+ ''', webpage))
+
+ post = compat_urllib_parse.urlencode(fields)
+ req = compat_urllib_request.Request(url, post)
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+ # Apparently, this header is required for confirmation to work.
+ req.add_header('Host', 'www.firedrive.com')
+
+ webpage = self._download_webpage(req, video_id,
+ 'Downloading video page')
+
+ title = self._search_regex(r'class="external_title_left">(.+)</div>',
+ webpage, 'title')
+ thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
+ 'thumbnail', fatal=False)
+ if thumbnail is not None:
+ thumbnail = 'http:' + thumbnail
+
+ ext = self._search_regex(r'type:\s?\'([^\']+)\',',
+ webpage, 'extension', fatal=False)
+ video_url = self._search_regex(
+ r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': video_url,
+ 'ext': ext,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
if mgid.endswith('.swf'):
mgid = mgid[:-4]
except RegexNotFoundError:
+ mgid = None
+
+ if mgid is None or ':' not in mgid:
mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, u'mgid')
_TESTS = [
{
- 'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html',
- 'md5': 'e7a6079ca39d3568f4996cb858dd6708',
+ 'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html',
+ 'md5': '4a4eeafd17c3058b65f0c8f091355855',
'note': 'Video file',
'info_dict': {
- 'id': '7959',
+ 'id': '325',
'ext': 'mp4',
- 'title': 'Markt - die ganze Sendung',
- 'description': 'md5:af9179cf07f67c5c12dc6d9997e05725',
- 'duration': 2655,
+ 'title': 'Blaue Bohnen aus Blocken',
+ 'description': 'md5:190d71ba2ccddc805ed01547718963bc',
+ 'duration': 1715,
},
},
{
return self.url_result(m_youtube.group(1), 'Youtube')
title = self._html_search_regex(
- r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
+ r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
webpage, 'title', flags=re.DOTALL)
video_url = self._search_regex(
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import strip_jsonp
+
+
+class ReverbNationIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
+ _TESTS = [{
+ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
+ 'file': '16965047.mp3',
+ 'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
+ 'info_dict': {
+ "title": "MONA LISA",
+ "uploader": "ALKILADOS",
+ "uploader_id": 216429,
+ "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ song_id = mobj.group('id')
+
+ api_res = self._download_json(
+ 'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
+ % (song_id, int(time.time() * 1000)),
+ song_id,
+ transform_source=strip_jsonp,
+ note='Downloading information of song %s' % song_id
+ )
+
+ return {
+ 'id': song_id,
+ 'title': api_res.get('name'),
+ 'url': api_res.get('url'),
+ 'uploader': api_res.get('artist', {}).get('name'),
+ 'uploader_id': api_res.get('artist', {}).get('id'),
+ 'thumbnail': api_res.get('image', api_res.get('thumbnail')),
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ }
--- /dev/null
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RUHDIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.ruhd.ru/play.php?vid=207',
+ 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83',
+ 'info_dict': {
+ 'id': '207',
+ 'ext': 'divx',
+ 'title': 'КОТ бааааам',
+ 'description': 'классный кот)',
+ 'thumbnail': 're:^http://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._html_search_regex(
+ r'<param name="src" value="([^"]+)"', webpage, 'video url')
+ title = self._html_search_regex(
+ r'<title>([^<]+) RUHD.ru - Видео Высокого качества №1 в России!</title>', webpage, 'title')
+ description = self._html_search_regex(
+ r'(?s)<div id="longdesc">(.+?)<span id="showlink">', webpage, 'description', fatal=False)
+ thumbnail = self._html_search_regex(
+ r'<param name="previewImage" value="([^"]+)"', webpage, 'thumbnail', fatal=False)
+ if thumbnail:
+ thumbnail = 'http://www.ruhd.ru' + thumbnail
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
},
# downloadable song
{
- 'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1',
- 'md5': '56a8b69568acaa967b4c49f9d1d52d19',
+ 'url': 'https://soundcloud.com/oddsamples/bus-brakes',
+ 'md5': 'fee7b8747b09bb755cefd4b853e7249a',
'info_dict': {
- 'id': '105614606',
+ 'id': '128590877',
'ext': 'wav',
- 'title': 'Just Your Problem Baby (Acapella)',
- 'description': 'Vocals',
- 'uploader': 'Sim Gretina',
- 'upload_date': '20130815',
- #'duration': 42,
+ 'title': 'Bus Brakes',
+ 'description': 'md5:0170be75dd395c96025d210d261c784e',
+ 'uploader': 'oddsamples',
+ 'upload_date': '20140109',
+ 'duration': 17,
},
},
]
--- /dev/null
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+
+
+class SouthParkIE(MTVServicesInfoExtractor):
+ IE_NAME = 'southpark.cc.com'
+ _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+
+ _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
+
+ _TESTS = [{
+ 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
+ 'info_dict': {
+ 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'South Park|Bat Daded',
+ 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
+ },
+ }]
+
+
+class SouthparkDeIE(SouthParkIE):
+ IE_NAME = 'southpark.de'
+ _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
+ 'info_dict': {
+ 'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
+ 'ext': 'mp4',
+ 'title': 'The Government Won\'t Respect My Privacy',
+ 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
+ },
+ }]
+++ /dev/null
-from __future__ import unicode_literals
-
-from .mtv import MTVServicesInfoExtractor
-
-
-class SouthParkStudiosIE(MTVServicesInfoExtractor):
- IE_NAME = 'southparkstudios.com'
- _VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
-
- _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
-
- _TESTS = [{
- 'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
- 'info_dict': {
- 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
- 'ext': 'mp4',
- 'title': 'Bat Daded',
- 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
- },
- }]
-
-
-class SouthparkDeIE(SouthParkStudiosIE):
- IE_NAME = 'southpark.de'
- _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
- _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
-
- _TESTS = [{
- 'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
- 'info_dict': {
- 'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
- 'ext': 'mp4',
- 'title': 'The Government Won\'t Respect My Privacy',
- 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
- },
- }]
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from .discovery import DiscoveryIE
+from ..utils import compat_urlparse
class TlcIE(DiscoveryIE):
# Otherwise we don't get the correct 'BrightcoveExperience' element,
# example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
iframe_url = iframe_url.replace('.htm?', '.php?')
+ url_fragment = compat_urlparse.urlparse(url).fragment
+ if url_fragment:
+ # Since the fragment is not send to the server, we always get the same iframe
+ iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
iframe = self._download_webpage(iframe_url, title)
return {
from __future__ import unicode_literals
+
import base64
import re
from .common import InfoExtractor
-from ..utils import (
- compat_parse_qs,
-)
+from ..utils import compat_parse_qs
class TutvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
_TEST = {
- 'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
- 'file': '2742556.flv',
- 'md5': '5eb766671f69b82e528dc1e7769c5cb2',
+ 'url': 'http://tu.tv/videos/robots-futbolistas',
+ 'md5': '627c7c124ac2a9b5ab6addb94e0e65f7',
'info_dict': {
- 'title': 'Noah en pabellon cuahutemoc',
+ 'id': '2973058',
+ 'ext': 'flv',
+ 'title': 'Robots futbolistas',
},
}
webpage = self._download_webpage(url, video_id)
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
- data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
- data_content = self._download_webpage(data_url, video_id, note='Downloading video info')
- data = compat_parse_qs(data_content)
- video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
+ data_content = self._download_webpage(
+ 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
+ video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
return {
'id': internal_id,
def __init__(self, code):
self.code = code
self._functions = {}
+ self._objects = {}
def interpret_statement(self, stmt, local_vars, allow_recursion=20):
if allow_recursion < 0:
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
if m:
member = m.group('member')
- val = local_vars[m.group('in')]
+ variable = m.group('in')
+
+ if variable not in local_vars:
+ if variable not in self._objects:
+ self._objects[variable] = self.extract_object(variable)
+ obj = self._objects[variable]
+ key, args = member.split('(', 1)
+ args = args.strip(')')
+ argvals = [int(v) if v.isdigit() else local_vars[v]
+ for v in args.split(',')]
+ return obj[key](argvals)
+
+ val = local_vars[variable]
if member == 'split("")':
return list(val)
if member == 'join("")':
return self._functions[fname](argvals)
raise ExtractorError('Unsupported JS expression %r' % expr)
+ def extract_object(self, objname):
+ obj = {}
+ obj_m = re.search(
+ (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
+ r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
+ r'\}\s*;',
+ self.code)
+ fields = obj_m.group('fields')
+ # Currently, it only supports function definitions
+ fields_m = re.finditer(
+ r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
+ r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+ fields)
+ for f in fields_m:
+ argnames = f.group('args').split(',')
+ obj[f.group('key')] = self.build_function(argnames, f.group('code'))
+
+ return obj
+
def extract_function(self, funcname):
func_m = re.search(
(r'(?:function %s|[{;]%s\s*=\s*function)' % (
raise ExtractorError('Could not find JS function %r' % funcname)
argnames = func_m.group('args').split(',')
+ return self.build_function(argnames, func_m.group('code'))
+
+ def build_function(self, argnames, code):
def resf(args):
local_vars = dict(zip(argnames, args))
- for stmt in func_m.group('code').split(';'):
+ for stmt in code.split(';'):
res = self.interpret_statement(stmt, local_vars)
return res
return resf
-
def strip_jsonp(code):
- return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
+ return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
def qualities(quality_ids):
-__version__ = '2014.07.11.3'
+__version__ = '2014.07.15'