self.assertTrue(
isinstance(got, compat_str),
- 'Expected a %r object, but got %r' % (compat_str, type(got)))
+ u'Expected a %s object, but got %s for field %s' % (
+ compat_str.__name__, type(got).__name__, info_field))
self.assertTrue(
match_rex.match(got),
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
"rejecttitle": null,
"retries": 10,
"simulate": false,
- "skip_download": false,
"subtitleslang": null,
"subtitlesformat": "srt",
"test": true,
def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
- def try_rm_tcs_files():
- for tc in test_cases:
+ res_dict = None
+ def try_rm_tcs_files(tcs=None):
+ if tcs is None:
+ tcs = test_cases
+ for tc in tcs:
tc_filename = get_tc_filename(tc)
try_rm(tc_filename)
try_rm(tc_filename + '.part')
self.assertEqual(
len(res_dict['entries']),
test_case['playlist_count'],
- 'Expected at %d in playlist %s, but got %d.')
+ 'Expected %d entries in playlist %s, but got %d.' % (
+ test_case['playlist_count'],
+ test_case['url'],
+ len(res_dict['entries']),
+ ))
+ if 'playlist_duration_sum' in test_case:
+ got_duration = sum(e['duration'] for e in res_dict['entries'])
+ self.assertEqual(
+ test_case['playlist_duration_sum'], got_duration)
for tc in test_cases:
tc_filename = get_tc_filename(tc)
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
finally:
try_rm_tcs_files()
+ if is_playlist and res_dict is not None:
+ # Remove all other files that may have been extracted if the
+ # extractor returns full results even with extract_flat
+ res_tcs = [{'info_dict': e} for e in res_dict['entries']]
+ try_rm_tcs_files(res_tcs)
return test_template
+++ /dev/null
-#!/usr/bin/env python
-# encoding: utf-8
-
-## DEPRECATED FILE!
-# Add new tests to the extractors themselves, like this:
-# _TEST = {
-# 'url': 'http://example.com/playlist/42',
-# 'playlist_mincount': 99,
-# 'info_dict': {
-# 'id': '42',
-# 'title': 'Playlist number forty-two',
-# }
-# }
-
-from __future__ import unicode_literals
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import (
- assertRegexpMatches,
- assertGreaterEqual,
- expect_info_dict,
- FakeYDL,
-)
-
-from youtube_dl.extractor import (
- AcademicEarthCourseIE,
- DailymotionPlaylistIE,
- DailymotionUserIE,
- VimeoChannelIE,
- VimeoUserIE,
- VimeoAlbumIE,
- VimeoGroupsIE,
- VineUserIE,
- UstreamChannelIE,
- SoundcloudSetIE,
- SoundcloudUserIE,
- SoundcloudPlaylistIE,
- TeacherTubeUserIE,
- LivestreamIE,
- LivestreamOriginalIE,
- NHLVideocenterIE,
- BambuserChannelIE,
- BandcampAlbumIE,
- SmotriCommunityIE,
- SmotriUserIE,
- IviCompilationIE,
- ImdbListIE,
- KhanAcademyIE,
- EveryonesMixtapeIE,
- RutubeChannelIE,
- RutubePersonIE,
- GoogleSearchIE,
- GenericIE,
- TEDIE,
- ToypicsUserIE,
- XTubeUserIE,
- InstagramUserIE,
- CSpanIE,
- AolIE,
- GameOnePlaylistIE,
-)
-
-
-class TestPlaylists(unittest.TestCase):
- def assertIsPlaylist(self, info):
- """Make sure the info has '_type' set to 'playlist'"""
- self.assertEqual(info['_type'], 'playlist')
-
- def test_dailymotion_playlist(self):
- dl = FakeYDL()
- ie = DailymotionPlaylistIE(dl)
- result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
- self.assertIsPlaylist(result)
- self.assertEqual(result['title'], 'SPORT')
- self.assertTrue(len(result['entries']) > 20)
-
- def test_dailymotion_user(self):
- dl = FakeYDL()
- ie = DailymotionUserIE(dl)
- result = ie.extract('https://www.dailymotion.com/user/nqtv')
- self.assertIsPlaylist(result)
- assertGreaterEqual(self, len(result['entries']), 100)
- self.assertEqual(result['title'], 'Rémi Gaillard')
-
- def test_vine_user(self):
- dl = FakeYDL()
- ie = VineUserIE(dl)
- result = ie.extract('https://vine.co/Visa')
- self.assertIsPlaylist(result)
- assertGreaterEqual(self, len(result['entries']), 47)
-
- def test_ustream_channel(self):
- dl = FakeYDL()
- ie = UstreamChannelIE(dl)
- result = ie.extract('http://www.ustream.tv/channel/channeljapan')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '10874166')
- assertGreaterEqual(self, len(result['entries']), 54)
-
- def test_soundcloud_set(self):
- dl = FakeYDL()
- ie = SoundcloudSetIE(dl)
- result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
- self.assertIsPlaylist(result)
- self.assertEqual(result['title'], 'The Royal Concept EP')
- assertGreaterEqual(self, len(result['entries']), 6)
-
- def test_soundcloud_user(self):
- dl = FakeYDL()
- ie = SoundcloudUserIE(dl)
- result = ie.extract('https://soundcloud.com/the-concept-band')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '9615865')
- assertGreaterEqual(self, len(result['entries']), 12)
-
- def test_soundcloud_likes(self):
- dl = FakeYDL()
- ie = SoundcloudUserIE(dl)
- result = ie.extract('https://soundcloud.com/the-concept-band/likes')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '9615865')
- assertGreaterEqual(self, len(result['entries']), 1)
-
- def test_soundcloud_playlist(self):
- dl = FakeYDL()
- ie = SoundcloudPlaylistIE(dl)
- result = ie.extract('http://api.soundcloud.com/playlists/4110309')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '4110309')
- self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
- assertRegexpMatches(
- self, result['description'], r'.*?TILT Brass - Bowery Poetry Club')
- self.assertEqual(len(result['entries']), 6)
-
- def test_livestream_event(self):
- dl = FakeYDL()
- ie = LivestreamIE(dl)
- result = ie.extract('http://new.livestream.com/tedx/cityenglish')
- self.assertIsPlaylist(result)
- self.assertEqual(result['title'], 'TEDCity2.0 (English)')
- assertGreaterEqual(self, len(result['entries']), 4)
-
- def test_livestreamoriginal_folder(self):
- dl = FakeYDL()
- ie = LivestreamOriginalIE(dl)
- result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
- assertGreaterEqual(self, len(result['entries']), 28)
-
- def test_nhl_videocenter(self):
- dl = FakeYDL()
- ie = NHLVideocenterIE(dl)
- result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '999')
- self.assertEqual(result['title'], 'Highlights')
- self.assertEqual(len(result['entries']), 12)
-
- def test_bambuser_channel(self):
- dl = FakeYDL()
- ie = BambuserChannelIE(dl)
- result = ie.extract('http://bambuser.com/channel/pixelversity')
- self.assertIsPlaylist(result)
- self.assertEqual(result['title'], 'pixelversity')
- assertGreaterEqual(self, len(result['entries']), 60)
-
- def test_bandcamp_album(self):
- dl = FakeYDL()
- ie = BandcampAlbumIE(dl)
- result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
- self.assertIsPlaylist(result)
- self.assertEqual(result['title'], 'Hierophany of the Open Grave')
- assertGreaterEqual(self, len(result['entries']), 9)
-
- def test_smotri_community(self):
- dl = FakeYDL()
- ie = SmotriCommunityIE(dl)
- result = ie.extract('http://smotri.com/community/video/kommuna')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'kommuna')
- self.assertEqual(result['title'], 'КПРФ')
- assertGreaterEqual(self, len(result['entries']), 4)
-
- def test_smotri_user(self):
- dl = FakeYDL()
- ie = SmotriUserIE(dl)
- result = ie.extract('http://smotri.com/user/inspector')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'inspector')
- self.assertEqual(result['title'], 'Inspector')
- assertGreaterEqual(self, len(result['entries']), 9)
-
- def test_AcademicEarthCourse(self):
- dl = FakeYDL()
- ie = AcademicEarthCourseIE(dl)
- result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'laws-of-nature')
- self.assertEqual(result['title'], 'Laws of Nature')
- self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
- self.assertEqual(len(result['entries']), 4)
-
- def test_ivi_compilation(self):
- dl = FakeYDL()
- ie = IviCompilationIE(dl)
- result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'dvoe_iz_lartsa')
- self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
- assertGreaterEqual(self, len(result['entries']), 24)
-
- def test_ivi_compilation_season(self):
- dl = FakeYDL()
- ie = IviCompilationIE(dl)
- result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
- self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
- assertGreaterEqual(self, len(result['entries']), 12)
-
- def test_imdb_list(self):
- dl = FakeYDL()
- ie = ImdbListIE(dl)
- result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'JFs9NWw6XI0')
- self.assertEqual(result['title'], 'March 23, 2012 Releases')
- self.assertEqual(len(result['entries']), 7)
-
- def test_khanacademy_topic(self):
- dl = FakeYDL()
- ie = KhanAcademyIE(dl)
- result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'cryptography')
- self.assertEqual(result['title'], 'Journey into cryptography')
- self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
- assertGreaterEqual(self, len(result['entries']), 3)
-
- def test_EveryonesMixtape(self):
- dl = FakeYDL()
- ie = EveryonesMixtapeIE(dl)
- result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'm7m0jJAbMQi')
- self.assertEqual(result['title'], 'Driving')
- self.assertEqual(len(result['entries']), 24)
-
- def test_rutube_channel(self):
- dl = FakeYDL()
- ie = RutubeChannelIE(dl)
- result = ie.extract('http://rutube.ru/tags/video/1800/')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '1800')
- assertGreaterEqual(self, len(result['entries']), 68)
-
- def test_rutube_person(self):
- dl = FakeYDL()
- ie = RutubePersonIE(dl)
- result = ie.extract('http://rutube.ru/video/person/313878/')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '313878')
- assertGreaterEqual(self, len(result['entries']), 37)
-
- def test_multiple_brightcove_videos(self):
- # https://github.com/rg3/youtube-dl/issues/2283
- dl = FakeYDL()
- ie = GenericIE(dl)
- result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
- self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
- self.assertEqual(len(result['entries']), 3)
-
- def test_ted_playlist(self):
- dl = FakeYDL()
- ie = TEDIE(dl)
- result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '10')
- self.assertEqual(result['title'], 'Who are the hackers?')
- assertGreaterEqual(self, len(result['entries']), 6)
-
- def test_toypics_user(self):
- dl = FakeYDL()
- ie = ToypicsUserIE(dl)
- result = ie.extract('http://videos.toypics.net/Mikey')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'Mikey')
- assertGreaterEqual(self, len(result['entries']), 17)
-
- def test_xtube_user(self):
- dl = FakeYDL()
- ie = XTubeUserIE(dl)
- result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'greenshowers')
- assertGreaterEqual(self, len(result['entries']), 155)
-
- def test_InstagramUser(self):
- dl = FakeYDL()
- ie = InstagramUserIE(dl)
- result = ie.extract('http://instagram.com/porsche')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'porsche')
- assertGreaterEqual(self, len(result['entries']), 2)
- test_video = next(
- e for e in result['entries']
- if e['id'] == '614605558512799803_462752227')
- dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
- dl.process_video_result(test_video, download=False)
- EXPECTED = {
- 'id': '614605558512799803_462752227',
- 'ext': 'mp4',
- 'title': '#Porsche Intelligent Performance.',
- 'thumbnail': 're:^https?://.*\.jpg',
- 'uploader': 'Porsche',
- 'uploader_id': 'porsche',
- 'timestamp': 1387486713,
- 'upload_date': '20131219',
- }
- expect_info_dict(self, EXPECTED, test_video)
-
- def test_CSpan_playlist(self):
- dl = FakeYDL()
- ie = CSpanIE(dl)
- result = ie.extract(
- 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '342759')
- self.assertEqual(
- result['title'], 'General Motors Ignition Switch Recall')
- whole_duration = sum(e['duration'] for e in result['entries'])
- self.assertEqual(whole_duration, 14855)
-
- def test_aol_playlist(self):
- dl = FakeYDL()
- ie = AolIE(dl)
- result = ie.extract(
- 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '152147')
- self.assertEqual(
- result['title'], 'Brace Yourself - Today\'s Weirdest News')
- assertGreaterEqual(self, len(result['entries']), 10)
-
- def test_TeacherTubeUser(self):
- dl = FakeYDL()
- ie = TeacherTubeUserIE(dl)
- result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
- self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'rbhagwati2')
- assertGreaterEqual(self, len(result['entries']), 179)
-
-
-if __name__ == '__main__':
- unittest.main()
#!/usr/bin/env python
# coding: utf-8
+from __future__ import unicode_literals
+
# Allow direct execution
import os
import sys
import json
import xml.etree.ElementTree
-#from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import (
DateRange,
encodeFilename,
uppercase_escape,
)
-if sys.version_info < (3, 0):
- _compat_str = lambda b: b.decode('unicode-escape')
-else:
- _compat_str = lambda s: s
-
class TestUtil(unittest.TestCase):
def test_timeconvert(self):
self.assertEqual('this - that', sanitize_filename('this: that'))
self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
- aumlaut = _compat_str('\xe4')
+ aumlaut = 'ä'
self.assertEqual(sanitize_filename(aumlaut), aumlaut)
- tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430')
+ tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430'
self.assertEqual(sanitize_filename(tests), tests)
forbidden = '"\0\\/'
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
- tests = _compat_str('a\xe4b\u4e2d\u56fd\u7684c')
+ tests = 'a\xe4b\u4e2d\u56fd\u7684c'
self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
- self.assertTrue(sanitize_filename(_compat_str('\xf6'), restricted=True) != '') # No empty filename
+ self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
for fc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
# Handle a common case more neatly
- self.assertEqual(sanitize_filename(_compat_str('\u5927\u58f0\u5e26 - Song'), restricted=True), 'Song')
- self.assertEqual(sanitize_filename(_compat_str('\u603b\u7edf: Speech'), restricted=True), 'Speech')
+ self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song')
+ self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech')
# .. but make sure the file name is never empty
self.assertTrue(sanitize_filename('-', restricted=True) != '')
self.assertTrue(sanitize_filename(':', restricted=True) != '')
self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
def test_unescape_html(self):
- self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
+ self.assertEqual(unescapeHTML('%20;'), '%20;')
+ self.assertEqual(
+ unescapeHTML('é'), 'é')
def test_daterange(self):
_20century = DateRange("19000101","20000101")
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
def test_find_xpath_attr(self):
- testxml = u'''<root>
+ testxml = '''<root>
<node/>
<node x="a"/>
<node x="a" y="c" />
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
def test_meta_parser(self):
- testhtml = u'''
+ testhtml = '''
<head>
<meta name="description" content="foo & bar">
<meta content='Plato' name='author'/>
</head>
'''
get_meta = lambda name: get_meta_content(name, testhtml)
- self.assertEqual(get_meta('description'), u'foo & bar')
+ self.assertEqual(get_meta('description'), 'foo & bar')
self.assertEqual(get_meta('author'), 'Plato')
def test_xpath_with_ns(self):
- testxml = u'''<root xmlns:media="http://example.com/">
+ testxml = '''<root xmlns:media="http://example.com/">
<media:song>
<media:author>The Author</media:author>
<url>http://server.com/download.mp3</url>
doc = xml.etree.ElementTree.fromstring(testxml)
find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
self.assertTrue(find('media:song') is not None)
- self.assertEqual(find('media:song/media:author').text, u'The Author')
- self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
+ self.assertEqual(find('media:song/media:author').text, 'The Author')
+ self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
def test_smuggle_url(self):
data = {u"ö": u"ö", u"abc": [3]}
self.assertEqual(res_data, None)
def test_shell_quote(self):
- args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
- self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
+ args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
+ self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
def test_url_basename(self):
- self.assertEqual(url_basename(u'http://foo.de/'), u'')
- self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
- self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
- self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
- self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
+ self.assertEqual(url_basename('http://foo.de/'), '')
+ self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz')
+ self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz')
+ self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz')
+ self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz')
self.assertEqual(
- url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
- u'trailer.mp4')
+ url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
+ 'trailer.mp4')
def test_parse_duration(self):
self.assertEqual(parse_duration(None), None)
testPL(5, 2, (20, 99), [])
def test_struct_unpack(self):
- self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
+ self.assertEqual(struct_unpack('!B', b'\x00'), (0,))
def test_read_batch_urls(self):
- f = io.StringIO(u'''\xef\xbb\xbf foo
+ f = io.StringIO('''\xef\xbb\xbf foo
bar\r
baz
# More after this line\r
; or after this
bam''')
- self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
+ self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam'])
def test_urlencode_postdata(self):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertEqual(d, [{"id": "532cb", "x": 3}])
def test_uppercase_escape(self):
- self.assertEqual(uppercase_escape(u'aä'), u'aä')
- self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
+ self.assertEqual(uppercase_escape('aä'), 'aä')
+ self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
if __name__ == '__main__':
unittest.main()
class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
IE_NAME = 'AcademicEarth:Course'
+ _TEST = {
+ 'url': 'http://academicearth.org/playlists/laws-of-nature/',
+ 'info_dict': {
+ 'id': 'laws-of-nature',
+ 'title': 'Laws of Nature',
+ 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
+ },
+ 'playlist_count': 4,
+ }
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
(?:$|\?)
'''
- _TEST = {
+ _TESTS = [{
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
'md5': '18ef68f48740e86ae94b98da815eec42',
'info_dict': {
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
},
'add_ie': ['FiveMin'],
- }
+ }, {
+ 'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
+ 'info_dict': {
+ 'id': '152147',
+ 'title': 'Brace Yourself - Today\'s Weirdest News',
+ },
+ 'playlist_mincount': 10,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50
+ _TEST = {
+ 'url': 'http://bambuser.com/channel/pixelversity',
+ 'info_dict': {
+ 'title': 'pixelversity',
+ },
+ 'playlist_mincount': 60,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
req = compat_urllib_request.Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
- info_json = self._download_webpage(req, user,
- 'Downloading page %d' % i)
- results = json.loads(info_json)['result']
- if len(results) == 0:
+ data = self._download_json(
+ req, user, 'Downloading page %d' % i)
+ results = data['result']
+ if not results:
break
last_id = results[-1]['vid']
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
- _TEST = {
+ _TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
'playlist': [
{
'playlistend': 2
},
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
- }
+ }, {
+ 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
+ 'info_dict': {
+ 'title': 'Hierophany of the Open Grave',
+ },
+ 'playlist_mincount': 9,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
'title': 'International Health Care Models',
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
}
+ }, {
+ 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+ 'info_dict': {
+ 'id': '342759',
+ 'title': 'General Motors Ignition Switch Recall',
+ },
+ 'playlist_duration_sum': 14855,
}]
def _real_extract(self, url):
+#coding: utf-8
+from __future__ import unicode_literals
+
import re
import json
import itertools
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
- IE_NAME = u'dailymotion'
+ IE_NAME = 'dailymotion'
_FORMATS = [
- (u'stream_h264_ld_url', u'ld'),
- (u'stream_h264_url', u'standard'),
- (u'stream_h264_hq_url', u'hq'),
- (u'stream_h264_hd_url', u'hd'),
- (u'stream_h264_hd1080_url', u'hd180'),
+ ('stream_h264_ld_url', 'ld'),
+ ('stream_h264_url', 'standard'),
+ ('stream_h264_hq_url', 'hq'),
+ ('stream_h264_hd_url', 'hd'),
+ ('stream_h264_hd1080_url', 'hd180'),
]
_TESTS = [
{
- u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
- u'file': u'x33vw9.mp4',
- u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
- u'info_dict': {
- u"uploader": u"Amphora Alex and Van .",
- u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
+ 'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
+ 'md5': '392c4b85a60a90dc4792da41ce3144eb',
+ 'info_dict': {
+ 'id': 'x33vw9',
+ 'ext': 'mp4',
+ 'uploader': 'Amphora Alex and Van .',
+ 'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
}
},
# Vevo video
{
- u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
- u'file': u'USUV71301934.mp4',
- u'info_dict': {
- u'title': u'Roar (Official)',
- u'uploader': u'Katy Perry',
- u'upload_date': u'20130905',
+ 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
+ 'info_dict': {
+ 'title': 'Roar (Official)',
+ 'id': 'USUV71301934',
+ 'ext': 'mp4',
+ 'uploader': 'Katy Perry',
+ 'upload_date': '20130905',
},
- u'params': {
- u'skip_download': True,
+ 'params': {
+ 'skip_download': True,
},
- u'skip': u'VEVO is only available in some countries',
+ 'skip': 'VEVO is only available in some countries',
},
# age-restricted video
{
- u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
- u'file': u'xyh2zz.mp4',
- u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
- u'info_dict': {
- u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
- u'uploader': 'HotWaves1012',
- u'age_limit': 18,
+ 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+ 'md5': '0d667a7b9cebecc3c89ee93099c4159d',
+ 'info_dict': {
+ 'id': 'xyh2zz',
+ 'ext': 'mp4',
+ 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+ 'uploader': 'HotWaves1012',
+ 'age_limit': 18,
}
-
}
]
webpage)
if m_vevo is not None:
vevo_id = m_vevo.group('id')
- self.to_screen(u'Vevo video detected: %s' % vevo_id)
- return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
+ self.to_screen('Vevo video detected: %s' % vevo_id)
+ return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
age_limit = self._rta_search(webpage)
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
- u'Downloading embed page')
+ 'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
'height': height,
})
if not formats:
- raise ExtractorError(u'Unable to extract video URL')
+ raise ExtractorError('Unable to extract video URL')
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
return
view_count = self._search_regex(
- r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
+ r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, 'view count', fatal=False)
if view_count is not None:
view_count = str_to_int(view_count)
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
video_id, note=False)
except ExtractorError as err:
- self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+ self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
return {}
info = json.loads(sub_list)
if (info['total'] > 0):
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
return sub_lang_list
- self._downloader.report_warning(u'video doesn\'t have subtitles')
+ self._downloader.report_warning('video doesn\'t have subtitles')
return {}
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
- IE_NAME = u'dailymotion:playlist'
+ IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
+ _TESTS = [{
+ 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
+ 'info_dict': {
+ 'title': 'SPORT',
+ },
+ 'playlist_mincount': 20,
+ }]
def _extract_entries(self, id):
video_ids = []
for pagenum in itertools.count(1):
request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
webpage = self._download_webpage(request,
- id, u'Downloading page %s' % pagenum)
+ id, 'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
class DailymotionUserIE(DailymotionPlaylistIE):
- IE_NAME = u'dailymotion:user'
+ IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
+ _TESTS = [{
+ 'url': 'https://www.dailymotion.com/user/nqtv',
+ 'info_dict': {
+ 'id': 'nqtv',
+ 'title': 'Rémi Gaillard',
+ },
+ 'playlist_mincount': 100,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, user)
full_user = unescapeHTML(self._html_search_regex(
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
- webpage, u'user', flags=re.DOTALL))
+ webpage, 'user'))
return {
'_type': 'playlist',
class EveryonesMixtapeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
- _TEST = {
+ _TESTS = [{
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
- 'file': '5bfseWNmlds.mp4',
"info_dict": {
+ 'id': '5bfseWNmlds',
+ 'ext': 'mp4',
"title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
"uploader": "FKR.TV",
"uploader_id": "frenchkissrecords",
'params': {
'skip_download': True, # This is simply YouTube
}
- }
+ }, {
+ 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
+ 'info_dict': {
+ 'id': 'm7m0jJAbMQi',
+ 'title': 'Driving',
+ },
+ 'playlist_count': 24
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
compat_urlparse,
compat_xml_parse_error,
+ determine_ext,
ExtractorError,
float_or_none,
HEADRequest,
'description': 're:'
},
'playlist_mincount': 11,
+ },
+ # Multiple brightcove videos
+ # https://github.com/rg3/youtube-dl/issues/2283
+ {
+ 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+ 'info_dict': {
+ 'id': 'always-never',
+ 'title': 'Always / Never - The New Yorker',
+ },
+ 'playlist_count': 3,
+ 'params': {
+ 'extract_flat': False,
+ 'skip_download': True,
+ }
}
]
IE_NAME = 'imdb:list'
IE_DESC = 'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+ _TEST = {
+ 'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
+ 'info_dict': {
+ 'id': 'JFs9NWw6XI0',
+ 'title': 'March 23, 2012 Releases',
+ },
+ 'playlist_count': 7,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
_VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
+ _TEST = {
+ 'url': 'http://instagram.com/porsche',
+ 'info_dict': {
+ 'id': 'porsche',
+ 'title': 'porsche',
+ },
+ 'playlist_mincount': 2,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '614605558512799803_462752227',
+ 'ext': 'mp4',
+ 'title': '#Porsche Intelligent Performance.',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'uploader': 'Porsche',
+ 'uploader_id': 'porsche',
+ 'timestamp': 1387486713,
+ 'upload_date': '20131219',
+ },
+ }],
+ 'params': {
+ 'extract_flat': True,
+ 'skip_download': True,
+ }
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
IE_DESC = 'ivi.ru compilations'
IE_NAME = 'ivi:compilation'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+ _TESTS = [{
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
+ 'info_dict': {
+ 'id': 'dvoe_iz_lartsa',
+ 'title': 'Двое из ларца (2006 - 2008)',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
+ 'info_dict': {
+ 'id': 'dvoe_iz_lartsa/season1',
+ 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
+ },
+ 'playlist_mincount': 12,
+ }]
def _extract_entries(self, html, compilation_id):
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
_VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
IE_NAME = 'KhanAcademy'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.khanacademy.org/video/one-time-pad',
- 'file': 'one-time-pad.mp4',
'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
'info_dict': {
+ 'id': 'one-time-pad',
+ 'ext': 'mp4',
'title': 'The one-time pad',
'description': 'The perfect cipher',
'duration': 176,
'uploader': 'Brit Cruise',
'upload_date': '20120411',
}
- }
+ }, {
+ 'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
+ 'info_dict': {
+ 'id': 'cryptography',
+ 'title': 'Journey into cryptography',
+ 'description': 'How have humans protected their secret messages through history? What has changed today?',
+ },
+ 'playlist_mincount': 3,
+ }]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream'
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
- _TEST = {
+ _TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b',
'info_dict': {
'view_count': int,
'thumbnail': 're:^http://.*\.jpg$'
}
- }
+ }, {
+ 'url': 'http://new.livestream.com/tedx/cityenglish',
+ 'info_dict': {
+ 'title': 'TEDCity2.0 (English)',
+ },
+ 'playlist_mincount': 4,
+ }]
def _parse_smil(self, video_id, smil_url):
formats = []
event_name = mobj.group('event_name')
webpage = self._download_webpage(url, video_id or event_name)
- og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
- if og_video is None:
- config_json = self._search_regex(
- r'window.config = ({.*?});', webpage, 'window config')
- info = json.loads(config_json)['event']
-
- def is_relevant(vdata, vid):
- result = vdata['type'] == 'video'
- if video_id is not None:
- result = result and compat_str(vdata['data']['id']) == vid
- return result
-
- videos = [self._extract_video_info(video_data['data'])
- for video_data in info['feed']['data']
- if is_relevant(video_data, video_id)]
- if video_id is None:
- # This is an event page:
- return self.playlist_result(videos, info['id'], info['full_name'])
- else:
- if videos:
- return videos[0]
- else:
+ og_video = self._og_search_video_url(
+ webpage, 'player url', fatal=False, default=None)
+ if og_video is not None:
query_str = compat_urllib_parse_urlparse(og_video).query
query = compat_urlparse.parse_qs(query_str)
- api_url = query['play_url'][0].replace('.smil', '')
- info = json.loads(self._download_webpage(
- api_url, video_id, 'Downloading video info'))
- return self._extract_video_info(info)
+ if 'play_url' in query:
+ api_url = query['play_url'][0].replace('.smil', '')
+ info = json.loads(self._download_webpage(
+ api_url, video_id, 'Downloading video info'))
+ return self._extract_video_info(info)
+
+ config_json = self._search_regex(
+ r'window.config = ({.*?});', webpage, 'window config')
+ info = json.loads(config_json)['event']
+
+ def is_relevant(vdata, vid):
+ result = vdata['type'] == 'video'
+ if video_id is not None:
+ result = result and compat_str(vdata['data']['id']) == vid
+ return result
+
+ videos = [self._extract_video_info(video_data['data'])
+ for video_data in info['feed']['data']
+ if is_relevant(video_data, video_id)]
+ if video_id is None:
+ # This is an event page:
+ return self.playlist_result(videos, info['id'], info['full_name'])
+ else:
+ if not videos:
+ raise ExtractorError('Cannot find video %s' % video_id)
+ return videos[0]
# The original version of Livestream uses a different system
(?P<user>[^/]+)/(?P<type>video|folder)
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
'''
- _TEST = {
+ _TESTS = [{
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': {
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
# rtmp
'skip_download': True,
},
- }
+ }, {
+ 'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+ 'info_dict': {
+ 'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+ },
+ 'playlist_mincount': 4,
+ }]
def _extract_video(self, user, video_id):
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
def _extract_folder(self, url, folder_id):
webpage = self._download_webpage(url, folder_id)
- urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
+ paths = orderedSet(re.findall(
+ r'''(?x)(?:
+ <li\s+class="folder">\s*<a\s+href="|
+ <a\s+href="(?=https?://livestre\.am/)
+ )([^"]+)"''', webpage))
return {
'_type': 'playlist',
'id': folder_id,
'entries': [{
'_type': 'url',
- 'url': video_url,
- } for video_url in urls],
+ 'url': compat_urlparse.urljoin(url, p),
+ } for p in paths],
}
def _real_extract(self, url):
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
from ..utils import (
- strip_jsonp,
parse_duration,
unified_strdate,
)
+from __future__ import unicode_literals
+
import re
import json
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
- path_doc = self._download_xml(path_url, video_id,
- u'Downloading final video url')
+ path_doc = self._download_xml(
+ path_url, video_id, 'Downloading final video url')
video_url = path_doc.find('path').text
join = compat_urlparse.urljoin
class NHLIE(NHLBaseInfoExtractor):
- IE_NAME = u'nhl.com'
- _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
+ IE_NAME = 'nhl.com'
+ _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'
_TEST = {
- u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
- u'file': u'453614.mp4',
- u'info_dict': {
- u'title': u'Quick clip: Weise 4-3 goal vs Flames',
- u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.',
- u'duration': 18,
- u'upload_date': u'20131006',
+ 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
+ 'info_dict': {
+ 'id': '453614',
+ 'ext': 'mp4',
+ 'title': 'Quick clip: Weise 4-3 goal vs Flames',
+ 'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
+ 'duration': 18,
+ 'upload_date': '20131006',
},
}
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
- info_json = self._download_webpage(json_url, video_id,
- u'Downloading info json')
- info_json = self._fix_json(info_json)
- info = json.loads(info_json)[0]
- return self._extract_video(info)
+ data = self._download_json(
+ json_url, video_id, transform_source=self._fix_json)
+ return self._extract_video(data[0])
class NHLVideocenterIE(NHLBaseInfoExtractor):
- IE_NAME = u'nhl.com:videocenter'
- IE_DESC = u'NHL videocenter category'
- _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
-
- @classmethod
- def suitable(cls, url):
- if NHLIE.suitable(url):
- return False
- return super(NHLVideocenterIE, cls).suitable(url)
+ IE_NAME = 'nhl.com:videocenter'
+ IE_DESC = 'NHL videocenter category'
+ _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
+ _TEST = {
+ 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
+ 'info_dict': {
+ 'id': '999',
+ 'title': 'Highlights',
+ },
+ 'playlist_count': 12,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
cat_id = self._search_regex(
[r'var defaultCatId = "(.+?)";',
r'{statusIndex:0,index:0,.*?id:(.*?),'],
- webpage, u'category id')
+ webpage, 'category id')
playlist_title = self._html_search_regex(
r'tab0"[^>]*?>(.*?)</td>',
- webpage, u'playlist title', flags=re.DOTALL).lower().capitalize()
+ webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
data = compat_urllib_parse.urlencode({
'cid': cat_id,
response = self._fix_json(response)
if not response.strip():
self._downloader.report_warning(u'Got an empty reponse, trying '
- u'adding the "newvideos" parameter')
+ 'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true',
playlist_title)
response = self._fix_json(response)
'_type': 'playlist',
'title': playlist_title,
'id': cat_id,
- 'entries': [self._extract_video(i) for i in videos],
+ 'entries': [self._extract_video(v) for v in videos],
}
IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channels'
_VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://rutube.ru/tags/video/1800/',
+ 'info_dict': {
+ 'id': '1800',
+ },
+ 'playlist_mincount': 68,
+ }]
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
IE_NAME = 'rutube:movie'
IE_DESC = 'Rutube movies'
_VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+ _TESTS = []
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
IE_NAME = 'rutube:person'
IE_DESC = 'Rutube person videos'
_VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://rutube.ru/video/person/313878/',
+ 'info_dict': {
+ 'id': '313878',
+ },
+ 'playlist_mincount': 37,
+ }]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
IE_DESC = 'Smotri.com community videos'
IE_NAME = 'smotri:community'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
+ _TEST = {
+ 'url': 'http://smotri.com/community/video/kommuna',
+ 'info_dict': {
+ 'id': 'kommuna',
+ 'title': 'КПРФ',
+ },
+ 'playlist_mincount': 4,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
IE_DESC = 'Smotri.com user videos'
IE_NAME = 'smotri:user'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
+ _TESTS = [{
+ 'url': 'http://smotri.com/user/inspector',
+ 'info_dict': {
+ 'id': 'inspector',
+ 'title': 'Inspector',
+ },
+ 'playlist_mincount': 9,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
- (?!sets/)(?P<title>[\w\d-]+)/?
+ (?!sets/|likes/?(?:$|[?#]))
+ (?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
class SoundcloudSetIE(SoundcloudIE):
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
IE_NAME = 'soundcloud:set'
- # it's in tests/test_playlists.py
- _TESTS = []
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
+ 'info_dict': {
+ 'title': 'The Royal Concept EP',
+ },
+ 'playlist_mincount': 6,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
# extract uploader (which is in the url)
uploader = mobj.group(1)
self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
return
- self.report_extraction(full_title)
- return {'_type': 'playlist',
- 'entries': [self._extract_info_dict(track) for track in info['tracks']],
- 'id': info['id'],
- 'title': info['title'],
- }
+ return {
+ '_type': 'playlist',
+ 'entries': [self._extract_info_dict(track) for track in info['tracks']],
+ 'id': info['id'],
+ 'title': info['title'],
+ }
class SoundcloudUserIE(SoundcloudIE):
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user'
-
- # it's in tests/test_playlists.py
- _TESTS = []
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/the-concept-band',
+ 'info_dict': {
+ 'id': '9615865',
+ 'title': 'The Royal Concept',
+ },
+ 'playlist_mincount': 12
+ }, {
+ 'url': 'https://soundcloud.com/the-concept-band/likes',
+ 'info_dict': {
+ 'id': '9615865',
+ 'title': 'The Royal Concept',
+ },
+ 'playlist_mincount': 1,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
class SoundcloudPlaylistIE(SoundcloudIE):
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
IE_NAME = 'soundcloud:playlist'
+ _TESTS = [
- # it's in tests/test_playlists.py
- _TESTS = []
+ {
+ 'url': 'http://api.soundcloud.com/playlists/4110309',
+ 'info_dict': {
+ 'id': '4110309',
+ 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
+ 'description': 're:.*?TILT Brass - Bowery Poetry Club',
+ },
+ 'playlist_count': 6,
+ }
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
\s*
<a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
'''
+ _TEST = {
+ 'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
+ 'info_dict': {
+ 'id': 'rbhagwati2'
+ },
+ 'playlist_mincount': 179,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
}
}, {
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
- 'md5': '49144e345a899b8cb34d315f3b9cfeeb',
'info_dict': {
'id': '1972',
'ext': 'mp4',
'uploader': 'Gabby Giffords and Mark Kelly',
'description': 'md5:5174aed4d0f16021b704120360f72b92',
},
+ }, {
+ 'url': 'http://www.ted.com/playlists/who_are_the_hackers',
+ 'info_dict': {
+ 'id': '10',
+ 'title': 'Who are the hackers?',
+ },
+ 'playlist_mincount': 6,
}]
_NATIVE_FORMATS = {
class ToypicsUserIE(InfoExtractor):
IE_DESC = 'Toypics user profile'
_VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+ _TEST = {
+ 'url': 'http://videos.toypics.net/Mikey',
+ 'info_dict': {
+ 'id': 'Mikey',
+ },
+ 'playlist_mincount': 9917,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
class UstreamChannelIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
IE_NAME = 'ustream:channel'
+ _TEST = {
+ 'url': 'http://www.ustream.tv/channel/channeljapan',
+ 'info_dict': {
+ 'id': '10874166',
+ },
+ 'playlist_mincount': 54,
+ }
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
- slug = m.group('slug')
- webpage = self._download_webpage(url, slug)
+ display_id = m.group('slug')
+ webpage = self._download_webpage(url, display_id)
channel_id = get_meta_content('ustream:channel_id', webpage)
BASE = 'http://www.ustream.tv'
next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
video_ids = []
while next_url:
- reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
+ reply = self._download_json(
+ compat_urlparse.urljoin(BASE, next_url), display_id,
+ note='Downloading video information (next: %d)' % (len(video_ids) + 1))
video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
next_url = reply['nextUrl']
- urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
- url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
- return self.playlist_result(url_entries, channel_id)
+ entries = [
+ self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
+ for vid in video_ids]
+ return {
+ '_type': 'playlist',
+ 'id': channel_id,
+ 'display_id': display_id,
+ 'entries': entries,
+ }
IE_NAME = 'vine:user'
_VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
_VINE_BASE_URL = "https://vine.co/"
+ _TEST = {
+ 'url': 'https://vine.co/Visa',
+ 'info_dict': {
+ 'id': 'Visa',
+ },
+ 'playlist_mincount': 47,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
'age_limit': 18,
}
+
class XTubeUserIE(InfoExtractor):
IE_DESC = 'XTube user profile'
_VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
+ _TEST = {
+ 'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
+ 'info_dict': {
+ 'id': 'greenshowers',
+ },
+ 'playlist_mincount': 155,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
replaced.append('{%s}%s' % (ns_map[ns], tag))
return '/'.join(replaced)
-def htmlentity_transform(matchobj):
- """Transforms an HTML entity to a character.
-
- This function receives a match object and is intended to be used with
- the re.sub() function.
- """
- entity = matchobj.group(1)
-
- # Known non-numeric HTML entity
- if entity in compat_html_entities.name2codepoint:
- return compat_chr(compat_html_entities.name2codepoint[entity])
-
- mobj = re.match(u'(?u)#(x?\\d+)', entity)
- if mobj is not None:
- numstr = mobj.group(1)
- if numstr.startswith(u'x'):
- base = 16
- numstr = u'0%s' % numstr
- else:
- base = 10
- return compat_chr(int(numstr, base))
-
- # Unknown entity in name, return its literal representation
- return (u'&%s;' % entity)
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
class BaseHTMLParser(compat_html_parser.HTMLParser):
return res
+def _htmlentity_transform(entity):
+ """Transforms an HTML entity to a character."""
+ # Known non-numeric HTML entity
+ if entity in compat_html_entities.name2codepoint:
+ return compat_chr(compat_html_entities.name2codepoint[entity])
+
+ mobj = re.match(r'#(x?[0-9]+)', entity)
+ if mobj is not None:
+ numstr = mobj.group(1)
+ if numstr.startswith(u'x'):
+ base = 16
+ numstr = u'0%s' % numstr
+ else:
+ base = 10
+ return compat_chr(int(numstr, base))
+
+ # Unknown entity in name, return its literal representation
+ return (u'&%s;' % entity)
+
+
def unescapeHTML(s):
if s is None:
return None
assert type(s) == compat_str
- result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
- return result
+ return re.sub(
+ r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
def encodeFilename(s, for_subprocess=False):