an empty string (--proxy "") for direct
connection
--no-check-certificate Suppress HTTPS certificate validation.
+ --prefer-insecure Use an unencrypted connection to retrieve
+ information about the video. (Currently
+ supported only for YouTube)
--cache-dir DIR Location in the filesystem where youtube-dl
can store some downloaded information
permanently. By default $XDG_CACHE_HOME
preference using slashes: "-f 22/17/18".
"-f mp4" and "-f flv" are also supported.
You can also use the special names "best",
- "bestaudio", "worst", and "worstaudio". By
- default, youtube-dl will pick the best
- quality.
+ "bestvideo", "bestaudio", "worst",
+ "worstvideo" and "worstaudio". By default,
+ youtube-dl will pick the best quality.
--all-formats download all available video formats
--prefer-free-formats prefer free video formats unless a specific
one is requested
git checkout HEAD -- youtube-dl youtube-dl.exe
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
-for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
+for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
make pypi-files
echo "Uploading to PyPi ..."
-python setup.py sdist upload
+python setup.py sdist bdist_wheel upload
make clean
/bin/echo -e "\n### DONE!"
--- /dev/null
+[wheel]
+universal = True
import youtube_dl.extractor
from youtube_dl import YoutubeDL
-from youtube_dl.utils import preferredencoding
+from youtube_dl.utils import (
+ compat_str,
+ preferredencoding,
+)
def get_params(override=None):
old_report_warning(message)
self.report_warning = types.MethodType(report_warning, self)
-def get_testcases():
+def gettestcases():
for ie in youtube_dl.extractor.gen_extractors():
t = getattr(ie, '_TEST', None)
if t:
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+
+
+def expect_info_dict(self, expected_dict, got_dict):
+ for info_field, expected in expected_dict.items():
+ if isinstance(expected, compat_str) and expected.startswith('re:'):
+ got = got_dict.get(info_field)
+ match_str = expected[len('re:'):]
+ match_rex = re.compile(match_str)
+
+ self.assertTrue(
+ isinstance(got, compat_str) and match_rex.match(got),
+ u'field %s (value: %r) should match %r' % (info_field, got, match_str))
+ elif isinstance(expected, type):
+ got = got_dict.get(info_field)
+ self.assertTrue(isinstance(got, expected),
+ u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
+ else:
+ if isinstance(expected, compat_str) and expected.startswith('md5:'):
+ got = 'md5:' + md5(got_dict.get(info_field))
+ else:
+ got = got_dict.get(info_field)
+ self.assertEqual(expected, got,
+ u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+
+ # Check for the presence of mandatory fields
+ for key in ('id', 'url', 'title', 'ext'):
+ self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+ # Check for mandatory fields that are automatically set by YoutubeDL
+ for key in ['webpage_url', 'extractor', 'extractor_key']:
+ self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
+
+ # Are checkable fields missing from the test case definition?
+ test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
+ for key, value in got_dict.items()
+ if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+ missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
+ if missing_keys:
+ sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
+ self.assertFalse(
+ missing_keys,
+ 'Missing keys in test definition: %s' % (
+ ', '.join(sorted(missing_keys))))
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import get_testcases
+from test.helper import gettestcases
from youtube_dl.extractor import (
FacebookIE,
def test_no_duplicates(self):
ies = gen_extractors()
- for tc in get_testcases():
+ for tc in gettestcases():
url = tc['url']
for ie in ies:
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
def test_pbs(self):
# https://github.com/rg3/youtube-dl/issues/2350
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
+ self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
if __name__ == '__main__':
unittest.main()
from test.helper import (
get_params,
- get_testcases,
- try_rm,
+ gettestcases,
+ expect_info_dict,
md5,
- report_warning
+ try_rm,
+ report_warning,
)
import hashlib
import io
import json
-import re
import socket
import youtube_dl.YoutubeDL
with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
-defs = get_testcases()
+defs = gettestcases()
class TestDownload(unittest.TestCase):
self.assertEqual(md5_for_file, tc['md5'])
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
- for (info_field, expected) in tc.get('info_dict', {}).items():
- if isinstance(expected, compat_str) and expected.startswith('re:'):
- got = info_dict.get(info_field)
- match_str = expected[len('re:'):]
- match_rex = re.compile(match_str)
-
- self.assertTrue(
- isinstance(got, compat_str) and match_rex.match(got),
- u'field %s (value: %r) should match %r' % (info_field, got, match_str))
- elif isinstance(expected, type):
- got = info_dict.get(info_field)
- self.assertTrue(isinstance(got, expected),
- u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
- else:
- if isinstance(expected, compat_str) and expected.startswith('md5:'):
- got = 'md5:' + md5(info_dict.get(info_field))
- else:
- got = info_dict.get(info_field)
- self.assertEqual(expected, got,
- u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
-
- # Check for the presence of mandatory fields
- for key in ('id', 'url', 'title', 'ext'):
- self.assertTrue(key in info_dict.keys() and info_dict[key])
- # Check for mandatory fields that are automatically set by YoutubeDL
- for key in ['webpage_url', 'extractor', 'extractor_key']:
- self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
-
- # If checkable fields are missing from the test case, print the info_dict
- test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
- for key, value in info_dict.items()
- if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
- if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
- sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
+
+ expect_info_dict(self, tc.get('info_dict', {}), info_dict)
finally:
try_rm_tcs_files()
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL
-
+from test.helper import (
+ expect_info_dict,
+ FakeYDL,
+)
from youtube_dl.extractor import (
AcademicEarthCourseIE,
GoogleSearchIE,
GenericIE,
TEDIE,
+ ToypicsUserIE,
+ XTubeUserIE,
+ InstagramUserIE,
)
self.assertEqual(result['title'], 'Who are the hackers?')
self.assertTrue(len(result['entries']) >= 6)
+ def test_toypics_user(self):
+ dl = FakeYDL()
+ ie = ToypicsUserIE(dl)
+ result = ie.extract('http://videos.toypics.net/Mikey')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], 'Mikey')
+ self.assertTrue(len(result['entries']) >= 17)
+
+ def test_xtube_user(self):
+ dl = FakeYDL()
+ ie = XTubeUserIE(dl)
+ result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], 'greenshowers')
+ self.assertTrue(len(result['entries']) >= 155)
+
+ def test_InstagramUser(self):
+ dl = FakeYDL()
+ ie = InstagramUserIE(dl)
+ result = ie.extract('http://instagram.com/porsche')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], 'porsche')
+ self.assertTrue(len(result['entries']) >= 2)
+ test_video = next(
+ e for e in result['entries']
+ if e['id'] == '614605558512799803_462752227')
+ dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
+ dl.process_video_result(test_video, download=False)
+ EXPECTED = {
+ 'id': '614605558512799803_462752227',
+ 'ext': 'mp4',
+ 'title': '#Porsche Intelligent Performance.',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'uploader': 'Porsche',
+ 'uploader_id': 'porsche',
+ }
+ expect_info_dict(self, EXPECTED, test_video)
+
+
if __name__ == '__main__':
unittest.main()
+++ /dev/null
-# Legacy file for backwards compatibility, use youtube_dl.extractor instead!
-
-from .extractor.common import InfoExtractor, SearchInfoExtractor
-from .extractor import gen_extractors, get_info_extractor
again.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
+ prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
+ At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
'_type': 'compat_list',
'entries': ie_result,
}
- self.add_extra_info(ie_result,
- {
- 'extractor': ie.IE_NAME,
- 'webpage_url': url,
- 'webpage_url_basename': url_basename(url),
- 'extractor_key': ie.ie_key(),
- })
+ self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
else:
raise
else:
- self.report_error('no suitable InfoExtractor: %s' % url)
+ self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+ def add_default_extra_info(self, ie_result, ie, url):
+ self.add_extra_info(ie_result, {
+ 'extractor': ie.IE_NAME,
+ 'webpage_url': url,
+ 'webpage_url_basename': url_basename(url),
+ 'extractor_key': ie.ie_key(),
+ })
def process_ie_result(self, ie_result, download=True, extra_info={}):
"""
__license__ = 'Public Domain'
import codecs
-import getpass
import io
import locale
import optparse
from .utils import (
+ compat_getpass,
compat_print,
DateRange,
decodeOption,
'--proxy', dest='proxy', default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
+ general.add_option(
+ '--prefer-insecure', action='store_true', dest='prefer_insecure',
+ help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
-
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
if opts.usetitle and opts.useid:
parser.error(u'using title conflicts with using video ID')
if opts.username is not None and opts.password is None:
- opts.password = getpass.getpass(u'Type account password and press return:')
+ opts.password = compat_getpass(u'Type account password and press [Return]: ')
if opts.ratelimit is not None:
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
if numeric_limit is None:
'download_archive': download_archive_fn,
'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
+ 'prefer_insecure': opts.prefer_insecure,
'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
'bidi_workaround': opts.bidi_workaround,
from .addanime import AddAnimeIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
+from .aol import AolIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE
ArteTvIE,
ArteTVPlus7IE,
ArteTVCreativeIE,
+ ArteTVConcertIE,
ArteTVFutureIE,
ArteTVDDCIE,
)
from .eighttracks import EightTracksIE
from .eitb import EitbIE
from .elpais import ElPaisIE
+from .engadget import EngadgetIE
from .escapist import EscapistIE
from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE
from .faz import FazIE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
+from .fivemin import FiveMinIE
from .fktv import (
FKTVIE,
FKTVPosteckeIE,
)
from .ina import InaIE
from .infoq import InfoQIE
-from .instagram import InstagramIE
+from .instagram import InstagramIE, InstagramUserIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
from .ivi import (
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
from .orf import ORFIE
+from .parliamentliveuk import ParliamentLiveUKIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .playvid import PlayvidIE
RutubeMovieIE,
RutubePersonIE,
)
+from .rutv import RUTVIE
from .savefrom import SaveFromIE
from .servingsys import ServingSysIE
from .sina import SinaIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .trutube import TruTubeIE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
+from .vesti import VestiIE
from .vevo import VevoIE
-from .vgtrk import VGTRKIE
from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
+from .videolecturesnet import VideoLecturesNetIE
from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .vimeo import (
from .wimp import WimpIE
from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE
+from .xbef import XBefIE
from .xhamster import XHamsterIE
from .xnxx import XNXXIE
from .xvideos import XVideosIE
-from .xtube import XTubeIE
+from .xtube import XTubeUserIE, XTubeIE
from .yahoo import (
YahooIE,
YahooNewsIE,
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .fivemin import FiveMinIE
+
+
+class AolIE(InfoExtractor):
+ IE_NAME = 'on.aol.com'
+ _VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
+
+ _TEST = {
+ 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
+ 'md5': '18ef68f48740e86ae94b98da815eec42',
+ 'info_dict': {
+ 'id': '518167793',
+ 'ext': 'mp4',
+ 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
+ },
+ 'add_ie': ['FiveMin'],
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ self.to_screen('Downloading 5min.com video %s' % video_id)
+ return FiveMinIE._build_result(video_id)
class ArteTVPlus7IE(InfoExtractor):
IE_NAME = 'arte.tv:+7'
- _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
@classmethod
def _extract_url_info(cls, url):
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
# The version with sourds/mal subtitles has also lower relevance
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
+ # Prefer http downloads over m3u8
+ 0 if f['url'].endswith('m3u8') else 1,
)
formats = sorted(formats, key=sort_key)
def _format(format_info):
_TEST = {
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
- 'file': '050489-002.mp4',
'info_dict': {
+ 'id': '050489-002',
+ 'ext': 'mp4',
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
},
}
_TEST = {
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
- 'file': '050940-003.mp4',
'info_dict': {
+ 'id': '050940-003',
+ 'ext': 'mp4',
'title': 'Les champignons au secours de la planète',
},
}
class ArteTVDDCIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:ddc'
- _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
+ _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
return self._extract_from_json_url(json_url, video_id, lang)
+
+
+class ArteTVConcertIE(ArteTVPlus7IE):
+ IE_NAME = 'arte.tv:concert'
+ _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
+
+ _TEST = {
+ 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
+ 'md5': '9ea035b7bd69696b67aa2ccaaa218161',
+ 'info_dict': {
+ 'id': '186',
+ 'ext': 'mp4',
+ 'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
+ 'upload_date': '20140128',
+ 'description': 'md5:486eb08f991552ade77439fe6d82c305',
+ },
+ }
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
+ _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
(video-clips|episodes|cc-studios|video-collections)
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
class CSpanIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
+ _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
IE_DESC = 'C-SPAN'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
'md5': '8e44ce11f0f725527daccc453f553eb0',
'info_dict': {
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
},
'skip': 'Regularly fails on travis, for unknown reasons',
- }
+ }, {
+ 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
+ # For whatever reason, the served video alternates between
+ # two different ones
+ #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
+ 'info_dict': {
+ 'id': '340723',
+ 'ext': 'mp4',
+ 'title': 'International Health Care Models',
+ 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
+ }
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_id = mobj.group('id')
webpage = self._download_webpage(url, page_id)
- video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
+ video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
description = self._html_search_regex(
[
# encoding: utf-8
+
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
- determine_ext,
)
class DaumIE(InfoExtractor):
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
- IE_NAME = u'daum.net'
+ IE_NAME = 'daum.net'
_TEST = {
- u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
- u'file': u'52554690.mp4',
- u'info_dict': {
- u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
- u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
- u'upload_date': u'20130831',
- u'duration': 3868,
+ 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+ 'info_dict': {
+ 'id': '52554690',
+ 'ext': 'mp4',
+ 'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
+ 'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
+ 'upload_date': '20130831',
+ 'duration': 3868,
},
}
webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex(
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
- webpage, u'full id')
+ webpage, 'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
- u'Downloading video info')
+ 'Downloading video info')
urls = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
- video_id, u'Downloading video formats info')
+ video_id, 'Downloading video formats info')
self.to_screen(u'%s: Getting video urls' % video_id)
formats = []
format_url = url_doc.find('result/url').text
formats.append({
'url': format_url,
- 'ext': determine_ext(format_url),
'format_id': profile,
})
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .fivemin import FiveMinIE
+from ..utils import (
+ url_basename,
+)
+
+
+class EngadgetIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://www.engadget.com/
+ (?:video/5min/(?P<id>\d+)|
+ [\d/]+/.*?)
+ '''
+
+ _TEST = {
+ 'url': 'http://www.engadget.com/video/5min/518153925/',
+ 'md5': 'c6820d4828a5064447a4d9fc73f312c9',
+ 'info_dict': {
+ 'id': '518153925',
+ 'ext': 'mp4',
+ 'title': 'Samsung Galaxy Tab Pro 8.4 Review',
+ },
+ 'add_ie': ['FiveMin'],
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ if video_id is not None:
+ return FiveMinIE._build_result(video_id)
+ else:
+ title = url_basename(url)
+ webpage = self._download_webpage(url, title)
+ ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
+ return {
+ '_type': 'playlist',
+ 'title': title,
+ 'entries': [FiveMinIE._build_result(id) for id in ids]
+ }
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_str,
+)
+
+
+class FiveMinIE(InfoExtractor):
+ IE_NAME = '5min'
+ _VALID_URL = r'''(?x)
+ (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
+ 5min:)
+ (?P<id>\d+)
+ '''
+
+ _TEST = {
+ # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
+ 'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
+ 'md5': '4f7b0b79bf1a470e5004f7112385941d',
+ 'info_dict': {
+ 'id': '518013791',
+ 'ext': 'mp4',
+ 'title': 'iPad Mini with Retina Display Review',
+ },
+ }
+
+ @classmethod
+ def _build_result(cls, video_id):
+ return cls.url_result('5min:%s' % video_id, cls.ie_key())
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ info = self._download_json(
+ 'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
+ 'playlist=%s&url=https' % video_id,
+ video_id)['binding'][0]
+
+ second_id = compat_str(int(video_id[:-2]) + 1)
+ formats = []
+ for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
+ if any(r['ID'] == quality for r in info['Renditions']):
+ formats.append({
+ 'format_id': compat_str(quality),
+ 'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
+ 'height': height,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': info['Title'],
+ 'formats': formats,
+ }
)
from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE
+from .rutv import RUTVIE
class GenericIE(InfoExtractor):
'title': '2cc213299525360.mov', # that's what we get
},
},
+ # second style of embedded ooyala videos
+ {
+ 'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
+ 'info_dict': {
+ 'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
+ 'ext': 'mp4',
+ 'title': 'Behind-the-scenes: Financial Review Sunday ',
+ 'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
# google redirect
{
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
'ext': 'mp4',
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+ },
+ },
+ # RUTV embed
+ {
+ 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
+ 'info_dict': {
+ 'id': '776940',
+ 'ext': 'mp4',
+ 'title': 'Охотское море стало целиком российским',
+ 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # Embedded TED video
+ {
+ 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
+ 'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
+ 'info_dict': {
+ 'id': '981',
+ 'ext': 'mp4',
+ 'title': 'My web playroom',
+ 'uploader': 'Ze Frank',
+ 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
}
},
+ # nowvideo embed hidden behind percent encoding
+ {
+ 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
+ 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
+ 'info_dict': {
+ 'id': '06e53103ca9aa',
+ 'ext': 'flv',
+ 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
+ 'description': 'No description',
+ },
+ },
]
def report_download_webpage(self, video_id):
newurl = newurl.replace(' ', '%20')
newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type"))
+ try:
+ # This function was deprecated in python 3.3 and removed in 3.4
+ origin_req_host = req.get_origin_req_host()
+ except AttributeError:
+ origin_req_host = req.origin_req_host
return HEADRequest(newurl,
headers=newheaders,
- origin_req_host=req.get_origin_req_host(),
+ origin_req_host=origin_req_host,
unverifiable=True)
else:
raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
except compat_xml_parse_error:
pass
+ # Sometimes embedded video player is hidden behind percent encoding
+ # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+ # Unescaping the whole page allows to handle those cases in a generic way
+ webpage = compat_urllib_parse.unquote(webpage)
+
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
# Look for embedded (iframe) Vimeo player
mobj = re.search(
- r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
if mobj:
- player_url = unescapeHTML(mobj.group(1))
+ player_url = unescapeHTML(mobj.group('url'))
surl = smuggle_url(player_url, {'Referer': url})
return self.url_result(surl, 'Vimeo')
return self.url_result(mobj.group('url'))
# Look for Ooyala videos
- mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
+ mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+ re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
- return OoyalaIE._build_url_result(mobj.group(1))
+ return OoyalaIE._build_url_result(mobj.group('ec'))
# Look for Aparat videos
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
+ # Look for embedded RUTV player
+ rutv_url = RUTVIE._extract_url(webpage)
+ if rutv_url:
+ return self.url_result(rutv_url, 'RUTV')
+
+ # Look for embedded TED player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'TED')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
+
if mobj is None:
# Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
import re
from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+)
class InstagramIE(InfoExtractor):
'uploader_id': uploader_id,
'description': desc,
}
+
+
+class InstagramUserIE(InfoExtractor):
+ _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+ IE_DESC = 'Instagram user profile'
+ IE_NAME = 'instagram:user'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uploader_id = mobj.group('username')
+
+ entries = []
+ page_count = 0
+ media_url = 'http://instagram.com/%s/media' % uploader_id
+ while True:
+ page = self._download_json(
+ media_url, uploader_id,
+ note='Downloading page %d ' % (page_count + 1),
+ )
+ page_count += 1
+
+ for it in page['items']:
+ if it.get('type') != 'video':
+ continue
+ like_count = int_or_none(it.get('likes', {}).get('count'))
+ user = it.get('user', {})
+
+ formats = [{
+ 'format_id': k,
+ 'height': v.get('height'),
+ 'width': v.get('width'),
+ 'url': v['url'],
+ } for k, v in it['videos'].items()]
+ self._sort_formats(formats)
+
+ thumbnails_el = it.get('images', {})
+ thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
+
+ title = it.get('caption', {}).get('text', it['id'])
+
+ entries.append({
+ 'id': it['id'],
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'webpage_url': it.get('link'),
+ 'uploader': user.get('full_name'),
+ 'uploader_id': user.get('username'),
+ 'like_count': like_count,
+ 'upload_timestamp': int_or_none(it.get('created_time')),
+ })
+
+ if not page['items']:
+ break
+ max_id = page['items'][-1]['id']
+ media_url = (
+ 'http://instagram.com/%s/media?max_id=%s' % (
+ uploader_id, max_id))
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': uploader_id,
+ 'title': uploader_id,
+ }
webpage = self._download_webpage(url, video_id)
- if re.search(r'Nemáte oprávnění přistupovat na tuto stránku.\s*</div>', webpage):
+ if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
raise ExtractorError(
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
import re
from .common import InfoExtractor
+from ..utils import int_or_none
class KontrTubeIE(InfoExtractor):
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
- title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
- 'video title')
+ title = self._html_search_regex(
+ r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
description = self._html_search_meta('description', webpage, 'video description')
- mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
- webpage)
+ mobj = re.search(
+ r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
- view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
- 'view count', fatal=False)
- view_count = int(view_count) if view_count is not None else None
+ view_count = self._html_search_regex(
+ r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
comment_count = None
- comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
- fatal=False)
+ comment_str = self._html_search_regex(
+ r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
if comment_str.startswith('комментариев нет'):
comment_count = 0
else:
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
if mobj:
- comment_count = int(mobj.group('total'))
+ comment_count = mobj.group('total')
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
- 'view_count': view_count,
- 'comment_count': comment_count,
+ 'view_count': int_or_none(view_count),
+ 'comment_count': int_or_none(comment_count),
}
\ No newline at end of file
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
ExtractorError,
)
-class MetacafeIE(InfoExtractor):
- """Information Extractor for metacafe.com."""
- _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
+class MetacafeIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
- IE_NAME = u'metacafe'
+ IE_NAME = 'metacafe'
_TESTS = [
- # Youtube video
- {
- u"add_ie": ["Youtube"],
- u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
- u"file": u"_aUehQsCQtM.mp4",
- u"info_dict": {
- u"upload_date": u"20090102",
- u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
- u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8",
- u"uploader": u"PBS",
- u"uploader_id": u"PBS"
- }
- },
- # Normal metacafe video
- {
- u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
- u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
- u'info_dict': {
- u'id': u'11121940',
- u'ext': u'mp4',
- u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
- u'uploader': u'ign',
- u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+ # Youtube video
+ {
+ 'add_ie': ['Youtube'],
+ 'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
+ 'info_dict': {
+ 'id': '_aUehQsCQtM',
+ 'ext': 'mp4',
+ 'upload_date': '20090102',
+ 'title': 'The Electric Company | "Short I" | PBS KIDS GO!',
+ 'description': 'md5:2439a8ef6d5a70e380c22f5ad323e5a8',
+ 'uploader': 'PBS',
+ 'uploader_id': 'PBS'
+ }
},
- },
- # AnyClip video
- {
- u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
- u"file": u"an-dVVXnuY7Jh77J.mp4",
- u"info_dict": {
- u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
- u"uploader": u"anyclip",
- u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
+ # Normal metacafe video
+ {
+ 'url': 'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
+ 'md5': '6e0bca200eaad2552e6915ed6fd4d9ad',
+ 'info_dict': {
+ 'id': '11121940',
+ 'ext': 'mp4',
+ 'title': 'News: Stuff You Won\'t Do with Your PlayStation 4',
+ 'uploader': 'ign',
+ 'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+ },
},
- },
- # age-restricted video
- {
- u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
- u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
- u'info_dict': {
- u'id': u'5186653',
- u'ext': u'mp4',
- u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
- u'uploader': u'Dwayne Pipe',
- u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
- u'age_limit': 18,
+ # AnyClip video
+ {
+ 'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
+ 'info_dict': {
+ 'id': 'an-dVVXnuY7Jh77J',
+ 'ext': 'mp4',
+ 'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3',
+ 'uploader': 'anyclip',
+ 'description': 'md5:38c711dd98f5bb87acf973d573442e67',
+ },
},
- },
- # cbs video
- {
- u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
- u'info_dict': {
- u'id': u'0rOxMBabDXN6',
- u'ext': u'flv',
- u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
- u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
- u'duration': 129,
+ # age-restricted video
+ {
+ 'url': 'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
+ 'md5': '98dde7c1a35d02178e8ab7560fe8bd09',
+ 'info_dict': {
+ 'id': '5186653',
+ 'ext': 'mp4',
+ 'title': 'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
+ 'uploader': 'Dwayne Pipe',
+ 'description': 'md5:950bf4c581e2c059911fa3ffbe377e4b',
+ 'age_limit': 18,
+ },
},
- u'params': {
- # rtmp download
- u'skip_download': True,
+ # cbs video
+ {
+ 'url': 'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/',
+ 'info_dict': {
+ 'id': '8VD4r_Zws8VP',
+ 'ext': 'flv',
+ 'title': 'Open: This is Face the Nation, February 9',
+ 'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
+ 'duration': 96,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
},
- },
]
-
def report_disclaimer(self):
- """Report disclaimer retrieval."""
- self.to_screen(u'Retrieving disclaimer')
+ self.to_screen('Retrieving disclaimer')
def _real_initialize(self):
# Retrieve disclaimer
self.report_disclaimer()
- self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
+ self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
# Confirm age
disclaimer_form = {
'filters': '0',
'submit': "Continue - I'm over 18",
- }
+ }
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.report_age_confirmation()
- self._download_webpage(request, None, False, u'Unable to confirm age')
+ self._download_webpage(request, None, False, 'Unable to confirm age')
def _real_extract(self, url):
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
+ raise ExtractorError('Invalid URL: %s' % url)
video_id = mobj.group(1)
else:
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
if mobj is None:
- raise ExtractorError(u'Unable to extract media URL')
+ raise ExtractorError('Unable to extract media URL')
vardict = compat_parse_qs(mobj.group(1))
if 'mediaData' not in vardict:
- raise ExtractorError(u'Unable to extract media URL')
- mobj = re.search(r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
+ raise ExtractorError('Unable to extract media URL')
+ mobj = re.search(
+ r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
if mobj is None:
- raise ExtractorError(u'Unable to extract media URL')
+ raise ExtractorError('Unable to extract media URL')
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
video_ext = determine_ext(video_url)
- video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
+ video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
video_uploader = self._html_search_regex(
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
- webpage, u'uploader nickname', fatal=False)
+ webpage, 'uploader nickname', fatal=False)
if re.search(r'"contentRating":"restricted"', webpage) is not None:
age_limit = 18
age_limit = 0
return {
- '_type': 'video',
- 'id': video_id,
- 'url': video_url,
+ 'id': video_id,
+ 'url': video_url,
'description': description,
'uploader': video_uploader,
- 'upload_date': None,
- 'title': video_title,
+ 'title': video_title,
'thumbnail':thumbnail,
- 'ext': video_ext,
+ 'ext': video_ext,
'age_limit': age_limit,
}
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
_TEST = {
"url": "http://9gag.tv/v/1912",
- "file": "1912.mp4",
"info_dict": {
+ "id": "1912",
+ "ext": "mp4",
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
"view_count": int,
+from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import unescapeHTML
+
class OoyalaIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
_TEST = {
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
- u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
- u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
- u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
- u'info_dict': {
- u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
- u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+ 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+ 'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
+ 'info_dict': {
+ 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+ 'ext': 'mp4',
+ 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+ 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
},
}
ie=cls.ie_key())
def _extract_result(self, info, more_info):
- return {'id': info['embedCode'],
- 'ext': 'mp4',
- 'title': unescapeHTML(info['title']),
- 'url': info.get('ipad_url') or info['url'],
- 'description': unescapeHTML(more_info['description']),
- 'thumbnail': more_info['promo'],
- }
+ return {
+ 'id': info['embedCode'],
+ 'ext': 'mp4',
+ 'title': unescapeHTML(info['title']),
+ 'url': info.get('ipad_url') or info['url'],
+ 'description': unescapeHTML(more_info['description']),
+ 'thumbnail': more_info['promo'],
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
player = self._download_webpage(player_url, embedCode)
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
- player, u'mobile player url')
+ player, 'mobile player url')
mobile_player = self._download_webpage(mobile_url, embedCode)
videos_info = self._search_regex(
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
- mobile_player, u'info').replace('\\"','"')
- videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
+ mobile_player, 'info').replace('\\"','"')
+ videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
videos_info = json.loads(videos_info)
videos_more_info =json.loads(videos_more_info)
if videos_more_info.get('lineup'):
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
- return {'_type': 'playlist',
- 'id': embedCode,
- 'title': unescapeHTML(videos_more_info['title']),
- 'entries': videos,
- }
+ return {
+ '_type': 'playlist',
+ 'id': embedCode,
+ 'title': unescapeHTML(videos_more_info['title']),
+ 'entries': videos,
+ }
else:
return self._extract_result(videos_info[0], videos_more_info)
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class ParliamentLiveUKIE(InfoExtractor):
+ IE_NAME = 'parliamentlive.tv'
+ IE_DESC = 'UK parliament videos'
+ _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
+ 'info_dict': {
+ 'id': '15121',
+ 'ext': 'asf',
+ 'title': 'hoc home affairs committee, 18 mar 2014.pm',
+ 'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
+ },
+ 'params': {
+ 'skip_download': True, # Requires mplayer (mms)
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+
+ asx_url = self._html_search_regex(
+ r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
+ 'metadata URL')
+ asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
+ video_url = asx.find('.//REF').attrib['HREF']
+
+ title = self._search_regex(
+ r'''(?x)player\.setClipDetails\(
+ (?:(?:[0-9]+|"[^"]+"),\s*){2}
+ "([^"]+",\s*"[^"]+)"
+ ''',
+ webpage, 'title').replace('", "', ', ')
+ description = self._html_search_regex(
+ r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
+ webpage, 'description')
+
+ return {
+ 'id': video_id,
+ 'ext': 'asf',
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ }
import re
from .common import InfoExtractor
+from ..utils import (
+ US_RATINGS,
+)
class PBSIE(InfoExtractor):
# Article with embedded player
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
# Player
- video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
+ video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
'''
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info = self._download_json(info_url, display_id)
+ rating_str = info.get('rating')
+ if rating_str is not None:
+ rating_str = rating_str.rpartition('-')[2]
+ age_limit = US_RATINGS.get(rating_str)
+
return {
'id': video_id,
'title': info['title'],
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
'duration': info.get('duration'),
+ 'age_limit': age_limit,
}
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
+ str_to_int,
)
from ..aes import (
aes_decrypt_text
}
}
+ def _extract_count(self, pattern, webpage, name):
+ count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
+ if count:
+ count = str_to_int(count)
+ return count
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
- video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, 'uploader', fatal=False)
+ video_uploader = self._html_search_regex(
+ r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
+ webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail:
thumbnail = compat_urllib_parse.unquote(thumbnail)
+ view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+ like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+ dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+ comment_count = self._extract_count(
+ r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
'uploader': video_uploader,
'title': video_title,
'thumbnail': thumbnail,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'comment_count': comment_count,
'formats': formats,
'age_limit': 18,
}
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none
+)
+
+
+class RUTVIE(InfoExtractor):
+ IE_DESC = 'RUTV.RU'
+ _VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)'
+
+ _TESTS = [
+ {
+ 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
+ 'info_dict': {
+ 'id': '774471',
+ 'ext': 'mp4',
+ 'title': 'Монологи на все времена',
+ 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
+ 'duration': 2906,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
+ 'info_dict': {
+ 'id': '774016',
+ 'ext': 'mp4',
+ 'title': 'Чужой в семье Сталина',
+ 'description': '',
+ 'duration': 2539,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
+ 'info_dict': {
+ 'id': '766888',
+ 'ext': 'mp4',
+ 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
+ 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
+ 'duration': 279,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
+ 'info_dict': {
+ 'id': '771852',
+ 'ext': 'mp4',
+ 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
+ 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
+ 'duration': 3096,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
+ 'info_dict': {
+ 'id': '51499',
+ 'ext': 'flv',
+ 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+ 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Translation has finished',
+ },
+ ]
+
+ @classmethod
+ def _extract_url(cls, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ mobj = re.search(
+ r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ video_type = mobj.group('type')
+
+ if not video_type or video_type == 'swf':
+ video_type = 'video'
+
+ json_data = self._download_json(
+ 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
+ video_id, 'Downloading JSON')
+
+ if json_data['errors']:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
+
+ playlist = json_data['data']['playlist']
+ medialist = playlist['medialist']
+ media = medialist[0]
+
+ if media['errors']:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
+
+ view_count = playlist.get('count_views')
+ priority_transport = playlist['priority_transport']
+
+ thumbnail = media['picture']
+ width = int_or_none(media['width'])
+ height = int_or_none(media['height'])
+ description = media['anons']
+ title = media['title']
+ duration = int_or_none(media.get('duration'))
+
+ formats = []
+
+ for transport, links in media['sources'].items():
+ for quality, url in links.items():
+ if transport == 'rtmp':
+ mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
+ if not mobj:
+ continue
+ fmt = {
+ 'url': mobj.group('url'),
+ 'play_path': mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'page_url': 'http://player.rutv.ru',
+ 'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
+ 'rtmp_live': True,
+ 'ext': 'flv',
+ 'vbr': int(quality),
+ }
+ elif transport == 'm3u8':
+ fmt = {
+ 'url': url,
+ 'ext': 'mp4',
+ }
+ else:
+ fmt = {
+ 'url': url
+ }
+ fmt.update({
+ 'width': width,
+ 'height': height,
+ 'format_id': '%s-%s' % (transport, quality),
+ 'preference': -1 if priority_transport == transport else -2,
+ })
+ formats.append(fmt)
+
+ if not formats:
+ raise ExtractorError('No media links available for %s' % video_id)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'formats': formats,
+ }
\ No newline at end of file
class TEDIE(SubtitlesInfoExtractor):
- _VALID_URL = r'''(?x)http://www\.ted\.com/
+ _VALID_URL = r'''(?x)
+ (?P<proto>https?://)
+ (?P<type>www|embed)(?P<urlmain>\.ted\.com/
(
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
)
(/lang/(.*?))? # The url may contain the language
/(?P<name>\w+) # Here goes the name and then ".html"
+ .*)$
'''
_TEST = {
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE)
+ if m.group('type') == 'embed':
+ desktop_url = m.group('proto') + 'www' + m.group('urlmain')
+ return self.url_result(desktop_url, 'TED')
name = m.group('name')
if m.group('type_talk'):
return self._talk_info(url, name)
self._list_available_subtitles(video_id, talk_info)
return
+ thumbnail = talk_info['thumb']
+ if not thumbnail.startswith('http'):
+ thumbnail = 'http://' + thumbnail
return {
'id': video_id,
'title': talk_info['title'],
'uploader': talk_info['speaker'],
- 'thumbnail': talk_info['thumb'],
+ 'thumbnail': thumbnail,
'description': self._og_search_description(webpage),
'subtitles': video_subtitles,
'formats': formats,
--- /dev/null
+from .common import InfoExtractor
+import re
+
+
+class ToypicsIE(InfoExtractor):
+ IE_DESC = 'Toypics user profile'
+ _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
+ _TEST = {
+ 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
+ 'md5': '16e806ad6d6f58079d210fe30985e08b',
+ 'info_dict': {
+ 'id': '514',
+ 'ext': 'mp4',
+ 'title': 'Chance-Bulge\'d, 2',
+ 'age_limit': 18,
+ 'uploader': 'kidsune',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ page = self._download_webpage(url, video_id)
+ video_url = self._html_search_regex(
+ r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
+ title = self._html_search_regex(
+ r'<title>Toypics - ([^<]+)</title>', page, 'title')
+ username = self._html_search_regex(
+ r'toypics.net/([^/"]+)" class="user-name">', page, 'username')
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'uploader': username,
+ 'age_limit': 18,
+ }
+
+
+class ToypicsUserIE(InfoExtractor):
+ IE_DESC = 'Toypics user profile'
+ _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ username = mobj.group('username')
+
+ profile_page = self._download_webpage(
+ url, username, note='Retrieving profile page')
+
+ video_count = int(self._search_regex(
+ r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
+ 'video count'))
+
+ PAGE_SIZE = 8
+ urls = []
+ page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
+ for n in range(1, page_count + 1):
+ lpage_url = url + '/public/%d' % n
+ lpage = self._download_webpage(
+ lpage_url, username,
+ note='Downloading page %d/%d' % (n, page_count))
+ urls.extend(
+ re.findall(
+ r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
+ lpage))
+
+ return {
+ '_type': 'playlist',
+ 'id': username,
+ 'entries': [{
+ '_type': 'url',
+ 'url': eurl,
+ 'ie_key': 'Toypics',
+ } for eurl in urls]
+ }
_LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
_NETRC_MACHINE = 'udemy'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
'md5': '98eda5b657e752cf945d8445e261b5c5',
'info_dict': {
'duration': 579.29,
},
'skip': 'Requires udemy account credentials',
- }
+ }]
def _handle_error(self, response):
if not isinstance(response, dict):
_VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)'
_SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<'
_ALREADY_ENROLLED = '>You are already taking this course.<'
+ _TESTS = []
@classmethod
def suitable(cls, url):
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from .rutv import RUTVIE
+
+
+class VestiIE(InfoExtractor):
+ IE_DESC = 'Вести.Ru'
+ _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.vesti.ru/videos?vid=575582&cid=1',
+ 'info_dict': {
+ 'id': '765035',
+ 'ext': 'mp4',
+ 'title': 'Вести.net: биткоины в России не являются законными',
+ 'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b',
+ 'duration': 302,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.vesti.ru/doc.html?id=1349233',
+ 'info_dict': {
+ 'id': '773865',
+ 'ext': 'mp4',
+ 'title': 'Участники митинга штурмуют Донецкую областную администрацию',
+ 'description': 'md5:1a160e98b3195379b4c849f2f4958009',
+ 'duration': 210,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.vesti.ru/only_video.html?vid=576180',
+ 'info_dict': {
+ 'id': '766048',
+ 'ext': 'mp4',
+ 'title': 'США заморозило, Британию затопило',
+ 'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1',
+ 'duration': 87,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://hitech.vesti.ru/news/view/id/4000',
+ 'info_dict': {
+ 'id': '766888',
+ 'ext': 'mp4',
+ 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
+ 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
+ 'duration': 279,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
+ 'info_dict': {
+ 'id': '766403',
+ 'ext': 'mp4',
+ 'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
+ 'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
+ 'duration': 271,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Blocked outside Russia',
+ },
+ {
+ 'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
+ 'info_dict': {
+ 'id': '51499',
+ 'ext': 'flv',
+ 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+ 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Translation has finished'
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ page = self._download_webpage(url, video_id, 'Downloading page')
+
+ mobj = re.search(
+ r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
+ page)
+ if mobj:
+ video_id = mobj.group('id')
+ page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
+ 'Downloading video page')
+
+ rutv_url = RUTVIE._extract_url(page)
+ if rutv_url:
+ return self.url_result(rutv_url, 'RUTV')
+
+ raise ExtractorError('No video found', expected=True)
\ No newline at end of file
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
vevo:)
(?P<id>[^&?#]+)'''
+
_TESTS = [{
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
"duration": 230.12,
"width": 1920,
"height": 1080,
- 'timestamp': 1372057200,
+ # timestamp and upload_date are often incorrect; seem to change randomly
+ 'timestamp': int,
}
}, {
'note': 'v3 SMIL format',
'title': 'I Wish I Could Break Your Heart',
'duration': 226.101,
'age_limit': 0,
- 'timestamp': 1392796919,
+ 'timestamp': int,
}
}, {
'note': 'Age-limited video',
'age_limit': 18,
'title': 'Tunnel Vision (Explicit)',
'uploader': 'Justin Timberlake',
- # timestamp and upload_date are often incorrect; seem to change randomly
'upload_date': 're:2013070[34]',
'timestamp': int,
},
+++ /dev/null
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none
-)
-
-
-class VGTRKIE(InfoExtractor):
- IE_DESC = 'ВГТРК'
- _VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia2?\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)'
-
- _TESTS = [
- {
- 'url': 'http://www.vesti.ru/videos?vid=575582&cid=1',
- 'info_dict': {
- 'id': '765035',
- 'ext': 'mp4',
- 'title': 'Вести.net: биткоины в России не являются законными',
- 'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b',
- 'duration': 302,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.vesti.ru/doc.html?id=1349233',
- 'info_dict': {
- 'id': '773865',
- 'ext': 'mp4',
- 'title': 'Участники митинга штурмуют Донецкую областную администрацию',
- 'description': 'md5:1a160e98b3195379b4c849f2f4958009',
- 'duration': 210,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.vesti.ru/only_video.html?vid=576180',
- 'info_dict': {
- 'id': '766048',
- 'ext': 'mp4',
- 'title': 'США заморозило, Британию затопило',
- 'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1',
- 'duration': 87,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://hitech.vesti.ru/news/view/id/4000',
- 'info_dict': {
- 'id': '766888',
- 'ext': 'mp4',
- 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
- 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
- 'duration': 279,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
- 'info_dict': {
- 'id': '766403',
- 'ext': 'mp4',
- 'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
- 'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
- 'duration': 271,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'Blocked outside Russia',
- },
- {
- 'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
- 'info_dict': {
- 'id': '51499',
- 'ext': 'flv',
- 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
- 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Translation has finished'
- },
- {
- 'url': 'http://russia.tv/video/show/brand_id/5169/episode_id/970443/video_id/975648',
- 'info_dict': {
- 'id': '771852',
- 'ext': 'mp4',
- 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
- 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
- 'duration': 3096,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://russia.tv/brand/show/brand_id/57638',
- 'info_dict': {
- 'id': '774016',
- 'ext': 'mp4',
- 'title': 'Чужой в семье Сталина',
- 'description': '',
- 'duration': 2539,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://2.russia.tv/video/show/brand_id/48863/episode_id/972920/video_id/978667/viewtype/picture',
- 'info_dict': {
- 'id': '775081',
- 'ext': 'mp4',
- 'title': 'XXII зимние Олимпийские игры. Россияне заняли весь пьедестал в лыжных гонках',
- 'description': 'md5:15d3741dd8d04b203fbc031c6a47fb0f',
- 'duration': 101,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'Blocked outside Russia',
- },
- {
- 'url': 'http://tvkultura.ru/video/show/brand_id/31724/episode_id/972347/video_id/978186',
- 'info_dict': {
- 'id': '774471',
- 'ext': 'mp4',
- 'title': 'Монологи на все времена',
- 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
- 'duration': 2906,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://rutv.ru/brand/show/id/6792/channel/75',
- 'info_dict': {
- 'id': '125521',
- 'ext': 'mp4',
- 'title': 'Грустная дама червей. Х/ф',
- 'description': '',
- 'duration': 4882,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- page = self._download_webpage(url, video_id, 'Downloading page')
-
- mobj = re.search(
- r'<meta property="og:video" content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
- page)
- if mobj:
- video_id = mobj.group('id')
- page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
- 'Downloading video page')
-
- mobj = re.search(
- r'<meta property="og:video" content="http://player\.rutv\.ru/flash2v/container\.swf\?id=(?P<id>\d+)', page)
- if mobj:
- video_type = 'video'
- video_id = mobj.group('id')
- else:
- mobj = re.search(
- r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>',
- page)
-
- if not mobj:
- raise ExtractorError('No media found', expected=True)
-
- video_type = mobj.group('type')
- video_id = mobj.group('id')
-
- json_data = self._download_json(
- 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
- video_id, 'Downloading JSON')
-
- if json_data['errors']:
- raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True)
-
- playlist = json_data['data']['playlist']
- medialist = playlist['medialist']
- media = medialist[0]
-
- if media['errors']:
- raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True)
-
- view_count = playlist.get('count_views')
- priority_transport = playlist['priority_transport']
-
- thumbnail = media['picture']
- width = int_or_none(media['width'])
- height = int_or_none(media['height'])
- description = media['anons']
- title = media['title']
- duration = int_or_none(media.get('duration'))
-
- formats = []
-
- for transport, links in media['sources'].items():
- for quality, url in links.items():
- if transport == 'rtmp':
- mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
- if not mobj:
- continue
- fmt = {
- 'url': mobj.group('url'),
- 'play_path': mobj.group('playpath'),
- 'app': mobj.group('app'),
- 'page_url': 'http://player.rutv.ru',
- 'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
- 'rtmp_live': True,
- 'ext': 'flv',
- 'vbr': int(quality),
- }
- elif transport == 'm3u8':
- fmt = {
- 'url': url,
- 'ext': 'mp4',
- }
- else:
- fmt = {
- 'url': url
- }
- fmt.update({
- 'width': width,
- 'height': height,
- 'format_id': '%s-%s' % (transport, quality),
- 'preference': -1 if priority_transport == transport else -2,
- })
- formats.append(fmt)
-
- if not formats:
- raise ExtractorError('No media links available for %s' % video_id)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'view_count': view_count,
- 'duration': duration,
- 'formats': formats,
- }
\ No newline at end of file
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ find_xpath_attr,
+ int_or_none,
+ parse_duration,
+ unified_strdate,
+)
+
+
+class VideoLecturesNetIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
+ IE_NAME = 'videolectures.net'
+
+ _TEST = {
+ 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
+ 'info_dict': {
+ 'id': 'promogram_igor_mekjavic_eng',
+ 'ext': 'mp4',
+ 'title': 'Automatics, robotics and biocybernetics',
+ 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ 'upload_date': '20130627',
+ 'duration': 565,
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
+ smil = self._download_xml(smil_url, video_id)
+
+ title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
+ description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
+ description = (
+ None if description_el is None
+ else description_el.attrib['content'])
+ upload_date = unified_strdate(
+ find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
+
+ switch = smil.find('.//switch')
+ duration = parse_duration(switch.attrib.get('dur'))
+ thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
+ thumbnail = (
+ None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
+
+ formats = [{
+ 'url': v.attrib['src'],
+ 'width': int_or_none(v.attrib.get('width')),
+ 'height': int_or_none(v.attrib.get('height')),
+ 'filesize': int_or_none(v.attrib.get('size')),
+ 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
+ 'ext': v.attrib.get('ext'),
+ } for v in switch.findall('./video')
+ if v.attrib.get('proto') == 'http']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+from __future__ import unicode_literals
+
import re
from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
+ US_RATINGS,
)
from .subtitles import SubtitlesInfoExtractor
class VikiIE(SubtitlesInfoExtractor):
- IE_NAME = u'viki'
+ IE_NAME = 'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
_TEST = {
- u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
- u'file': u'1023585v.mp4',
- u'md5': u'a21454021c2646f5433514177e2caa5f',
- u'info_dict': {
- u'title': u'Heirs Episode 14',
- u'uploader': u'SBS',
- u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
- u'upload_date': u'20131121',
- u'age_limit': 13,
+ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
+ 'md5': 'a21454021c2646f5433514177e2caa5f',
+ 'info_dict': {
+ 'id': '1023585v',
+ 'ext': 'mp4',
+ 'title': 'Heirs Episode 14',
+ 'uploader': 'SBS',
+ 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+ 'upload_date': '20131121',
+ 'age_limit': 13,
},
- u'skip': u'Blocked in the US',
+ 'skip': 'Blocked in the US',
}
def _real_extract(self, url):
rating_str = self._html_search_regex(
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
- u'rating information', default='').strip()
- RATINGS = {
- 'G': 0,
- 'PG': 10,
- 'PG-13': 13,
- 'R': 16,
- 'NC': 18,
- }
- age_limit = RATINGS.get(rating_str)
+ 'rating information', default='').strip()
+ age_limit = US_RATINGS.get(rating_str)
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
info_webpage = self._download_webpage(
- info_url, video_id, note=u'Downloading info page')
+ info_url, video_id, note='Downloading info page')
if re.match(r'\s*<div\s+class="video-error', info_webpage):
raise ExtractorError(
- u'Video %s is blocked from your location.' % video_id,
+ 'Video %s is blocked from your location.' % video_id,
expected=True)
video_url = self._html_search_regex(
- r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
+ r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
upload_date_str = self._html_search_regex(
- r'"created_at":"([^"]+)"', info_webpage, u'upload date')
+ r'"created_at":"([^"]+)"', info_webpage, 'upload date')
upload_date = (
unified_strdate(upload_date_str)
if upload_date_str is not None
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
_TEST = {
"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
- "file": "wshh6a7q1ny0G34ZwuIO.mp4",
"md5": "9d04de741161603bf7071bbf4e883186",
"info_dict": {
+ "id": "wshh6a7q1ny0G34ZwuIO",
+ "ext": "mp4",
"title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
}
}
-
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
m_vevo_id = re.search(r'videoId=(.*?)&?',
webpage_src)
-
if m_vevo_id is not None:
- self.to_screen(u'Vevo video detected:')
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
- video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
- webpage_src, u'video URL')
+ video_url = self._search_regex(
+ r'so\.addVariable\("file","(.*?)"\)', webpage_src, 'video URL')
if 'youtube' in video_url:
- self.to_screen(u'Youtube video detected:')
return self.url_result(video_url, ie='Youtube')
- if 'mp4' in video_url:
- ext = 'mp4'
- else:
- ext = 'flv'
-
- video_title = self._html_search_regex(r"<title>(.*)</title>",
- webpage_src, u'title')
+ video_title = self._html_search_regex(
+ r"<title>(.*)</title>", webpage_src, 'title')
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
- thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
- webpage_src, u'thumbnail', fatal=False)
-
+ thumbnail = self._html_search_regex(
+ r'rel="image_src" href="(.*)" />', webpage_src, 'thumbnail',
+ fatal=False)
if not thumbnail:
_title = r"""candytitles.*>(.*)</span>"""
mobj = re.search(_title, webpage_src)
if mobj is not None:
video_title = mobj.group(1)
- results = [{
- 'id': video_id,
- 'url' : video_url,
- 'title' : video_title,
- 'thumbnail' : thumbnail,
- 'ext' : ext,
- }]
- return results
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': video_title,
+ 'thumbnail': thumbnail,
+ }
+
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+)
+
+
+class XBefIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',
+ 'md5': 'a478b565baff61634a98f5e5338be995',
+ 'info_dict': {
+ 'id': '5119',
+ 'ext': 'mp4',
+ 'title': 'md5:7358a9faef8b7b57acda7c04816f170e',
+ 'age_limit': 18,
+ 'thumbnail': 're:^http://.*\.jpg',
+ }
+ }
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ video_id = m.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(
+ r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
+
+ config_url_enc = self._download_webpage(
+ 'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
+ note='Retrieving config URL')
+ config_url = compat_urllib_parse.unquote(config_url_enc)
+ config = self._download_xml(
+ config_url, video_id, note='Retrieving config')
+
+ video_url = config.find('./file').text
+ thumbnail = config.find('./image').text
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'age_limit': 18,
+ }
+
from __future__ import unicode_literals
-import os
import re
+import json
from .common import InfoExtractor
from ..utils import (
- compat_urllib_parse_urlparse,
compat_urllib_request,
parse_duration,
str_to_int,
r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
video_description = self._html_search_regex(
r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
- video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
duration = parse_duration(self._html_search_regex(
r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
view_count = self._html_search_regex(
if comment_count:
comment_count = str_to_int(comment_count)
- path = compat_urllib_parse_urlparse(video_url).path
- extension = os.path.splitext(path)[1][1:]
- format = path.split('/')[5].split('_')[:2]
- format[0] += 'p'
- format[1] += 'k'
- format = "-".join(format)
+ player_quality_option = json.loads(self._html_search_regex(
+ r'playerQualityOption = ({.+?});', webpage, 'player quality option'))
+
+ QUALITIES = ['3gp', 'mp4_normal', 'mp4_high', 'flv', 'mp4_ultra', 'mp4_720', 'mp4_1080']
+ formats = [
+ {
+ 'url': furl,
+ 'format_id': format_id,
+ 'preference': QUALITIES.index(format_id) if format_id in QUALITIES else -1,
+ } for format_id, furl in player_quality_option.items()
+ ]
+ self._sort_formats(formats)
return {
'id': video_id,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
- 'url': video_url,
- 'ext': extension,
- 'format': format,
- 'format_id': format,
+ 'formats': formats,
'age_limit': 18,
}
+
+class XTubeUserIE(InfoExtractor):
+ IE_DESC = 'XTube user profile'
+ _VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ username = mobj.group('username')
+
+ profile_page = self._download_webpage(
+ url, username, note='Retrieving profile page')
+
+ video_count = int(self._search_regex(
+ r'<strong>%s\'s Videos \(([0-9]+)\)</strong>'%username, profile_page,
+ 'video count'))
+
+ PAGE_SIZE = 25
+ urls = []
+ page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
+ for n in range(1, page_count + 1):
+ lpage_url = 'http://www.xtube.com/user_videos.php?page=%d&u=%s' % (n, username)
+ lpage = self._download_webpage(
+ lpage_url, username,
+ note='Downloading page %d/%d' % (n, page_count))
+ urls.extend(
+ re.findall(r'addthis:url="([^"]+)"', lpage))
+
+ return {
+ '_type': 'playlist',
+ 'id': username,
+ 'entries': [{
+ '_type': 'url',
+ 'url': eurl,
+ 'ie_key': 'XTube',
+ } for eurl in urls]
+ }
+from __future__ import unicode_literals
+
+
import json
import re
import sys
class YouPornIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
+ _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
_TEST = {
- u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
- u'file': u'505835.mp4',
- u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
- u'info_dict': {
- u"upload_date": u"20101221",
- u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
- u"uploader": u"Ask Dan And Jennifer",
- u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
- u"age_limit": 18,
+ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ 'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
+ 'info_dict': {
+ 'id': '505835',
+ 'ext': 'mp4',
+ 'upload_date': '20101221',
+ 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
+ 'uploader': 'Ask Dan And Jennifer',
+ 'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
+ 'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
- url = 'http://www.' + mobj.group('url')
+ url = mobj.group('proto') + 'www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
age_limit = self._rta_search(webpage)
# Get JSON parameters
- json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
+ json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
try:
params = json.loads(json_params)
except:
# Get all of the links from the page
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
- webpage, u'download list').strip()
+ webpage, 'download list').strip()
LINK_RE = r'<a href="([^"]+)">'
links = re.findall(LINK_RE, download_list_html)
resolution = format_parts[0]
height = int(resolution[:-len('p')])
bitrate = int(format_parts[1][:-len('k')])
- format = u'-'.join(format_parts) + u'-' + dn
+ format = '-'.join(format_parts) + '-' + dn
formats.append({
'url': video_url,
# 3d videos
- '82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
- '83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
- '84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
- '85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20},
- '100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
- '101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
- '102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
+ '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
+ '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
+ '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
+ '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
+ '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
+ '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
+ '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
# Apple HTTP Live Streaming
- '92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
- '93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10},
- '94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10},
- '95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10},
- '96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10},
- '132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
- '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
+ '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
+ '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
+ '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
+ '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
+ '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
+ '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
+ '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
# DASH mp4 video
- '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash mp4 audio
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
- '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
- '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
- '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
- '245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
- '246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
- '247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40},
- '248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40},
+ '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
+ '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
+ '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
+ '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
+ '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
+ '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
+ '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
def _real_extract(self, url):
+ proto = (
+ u'http' if self._downloader.params.get('prefer_insecure', False)
+ else u'https')
+
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
- url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+ url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
video_id = self.extract_id(url)
# Get video webpage
- url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+ url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL
'asv': 3,
'sts':'1588',
})
- video_info_url = 'https://www.youtube.com/get_video_info?' + data
+ video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
errnote='unable to download video info webpage')
else:
age_gate = False
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
- video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+ video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
- 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+ 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
import datetime
import email.utils
import errno
+import getpass
import gzip
import itertools
import io
def unified_strdate(date_str):
"""Return a string with the date in the format YYYYMMDD"""
+
+ if date_str is None:
+ return None
+
upload_date = None
#Replace commas
date_str = date_str.replace(',', ' ')
libc = ctypes.cdll.LoadLibrary("libc.so.6")
except OSError:
return
- title = title
- buf = ctypes.create_string_buffer(len(title) + 1)
- buf.value = title.encode('utf-8')
+ title_bytes = title.encode('utf-8')
+ buf = ctypes.create_string_buffer(len(title_bytes))
+ buf.value = title_bytes
try:
- libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
+ libc.prctl(15, buf, 0, 0, 0)
except AttributeError:
return # Strange libc, just skip this
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+
+
+if sys.version_info < (3, 0) and sys.platform == 'win32':
+ def compat_getpass(prompt, *args, **kwargs):
+ if isinstance(prompt, compat_str):
+ prompt = prompt.encode(preferredencoding())
+ return getpass.getpass(prompt, *args, **kwargs)
+else:
+ compat_getpass = getpass.getpass
+
+
+US_RATINGS = {
+ 'G': 0,
+ 'PG': 10,
+ 'PG-13': 13,
+ 'R': 16,
+ 'NC': 18,
+}
-__version__ = '2014.03.12'
+__version__ = '2014.03.23'