From: Philipp Hagemeister Date: Sun, 19 Jan 2014 04:42:51 +0000 (-0800) Subject: Merge pull request #2153 from jaimeMF/ffmpeg-merger-check-install X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=dfa50793d8541ff2c5603f7c3b727c0f6e551d8d;hp=58c3c7ae38b9c0f5837595eb4027bd2650ae4e09;p=youtube-dl Merge pull request #2153 from jaimeMF/ffmpeg-merger-check-install Don’t try to merge the formats if ffmpeg or avconv are not installed --- diff --git a/README.md b/README.md index bc7dfac69..cf0bb7b65 100644 --- a/README.md +++ b/README.md @@ -93,13 +93,13 @@ which means you can modify it, redistribute it or use it however you like. different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for the format description - (like "22 - 1280x720" or "HD"),%(format_id)s for + (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's - itags: "137"),%(upload_date)s for the upload date - (YYYYMMDD), %(extractor)s for the provider - (youtube, metacafe, etc), %(id)s for the video id - , %(playlist)s for the playlist the video is in, - %(playlist_index)s for the position in the + itags: "137"), %(upload_date)s for the upload + date (YYYYMMDD), %(extractor)s for the provider + (youtube, metacafe, etc), %(id)s for the video + id, %(playlist)s for the playlist the video is + in, %(playlist_index)s for the position in the playlist and %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o '/my/d @@ -111,7 +111,7 @@ which means you can modify it, redistribute it or use it however you like. avoid "&" and spaces in filenames -a, --batch-file FILE file containing URLs to download ('-' for stdin) --load-info FILE json file containing the video information - (created with the "--write-json" option + (created with the "--write-json" option) -w, --no-overwrites do not overwrite files -c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if @@ -145,7 +145,7 @@ which means you can modify it, redistribute it or use it however you like. --no-progress do not print progress bar --console-title display progress in console titlebar -v, --verbose print various debugging information - --dump-intermediate-pages print downloaded pages to debug problems(very + --dump-intermediate-pages print downloaded pages to debug problems (very verbose) --write-pages Write downloaded intermediary pages to files in the current directory to debug problems @@ -158,8 +158,7 @@ which means you can modify it, redistribute it or use it however you like. --prefer-free-formats prefer free video formats unless a specific one is requested --max-quality FORMAT highest quality format to download - -F, --list-formats list all available formats (currently youtube - only) + -F, --list-formats list all available formats ## Subtitle Options: --write-sub write subtitle file @@ -177,7 +176,7 @@ which means you can modify it, redistribute it or use it however you like. -u, --username USERNAME account username -p, --password PASSWORD account password -n, --netrc use .netrc authentication data - --video-password PASSWORD video password (vimeo only) + --video-password PASSWORD video password (vimeo, smotri) ## Post-processing Options: -x, --extract-audio convert video files to audio-only files (requires diff --git a/test/test_download.py b/test/test_download.py index d0be8d27c..0d925ae69 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -148,7 +148,7 @@ def generator(test_case): for key, value in info_dict.items() if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): - sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n') + sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') # Check for the presence of mandatory fields for key in ('id', 'url', 'title', 'ext'): diff --git a/test/test_playlists.py b/test/test_playlists.py index b3bfbd923..5eeba091e 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -32,6 +32,7 @@ from youtube_dl.extractor import ( IviCompilationIE, ImdbListIE, KhanAcademyIE, + EveryonesMixtapeIE, ) @@ -210,6 +211,15 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?') self.assertTrue(len(result['entries']) >= 3) + def test_EveryonesMixtape(self): + dl = FakeYDL() + ie = EveryonesMixtapeIE(dl) + result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'm7m0jJAbMQi') + self.assertEqual(result['title'], 'Driving') + self.assertEqual(len(result['entries']), 24) + if __name__ == '__main__': unittest.main() diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 263b5ac69..1e4e62faa 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -167,13 +167,13 @@ class TestTedSubtitles(BaseTestSubtitles): def test_subtitles(self): self.DL.params['writesubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d') + self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') def test_subtitles_lang(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitleslangs'] = ['fr'] subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6') + self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') def test_allsubtitles(self): self.DL.params['writesubtitles'] = True diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 056700614..a3fc53047 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -27,12 +27,6 @@ _TESTS = [ 85, u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', ), - ( - u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf', - u'swf', - 82, - u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321' - ), ] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 8f783a86c..82b1ff4f4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -39,6 +39,7 @@ __authors__ = ( 'Sergey M.', 'Michael Orlitzky', 'Chris Gahan', + 'Saimadhav Heblikar', ) __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b887c7f10..d66f7b026 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -52,6 +52,7 @@ from .ehow import EHowIE from .eighttracks import EightTracksIE from .eitb import EitbIE from .escapist import EscapistIE +from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .extremetube import ExtremeTubeIE from .facebook import FacebookIE @@ -61,6 +62,7 @@ from .fktv import ( FKTVPosteckeIE, ) from .flickr import FlickrIE +from .franceinter import FranceInterIE from .francetv import ( PluzzIE, FranceTvInfoIE, diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index c9e7cc561..80bf59ade 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -25,12 +25,13 @@ class CNNIE(InfoExtractor): }, }, { - u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29", - u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4", - u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e", - u"info_dict": { - u"title": "Student's epic speech stuns new freshmen", - u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"" + "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29", + "file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4", + "md5": "b5cc60c60a3477d185af8f19a2a26f4e", + "info_dict": { + "title": "Student's epic speech stuns new freshmen", + "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", + "upload_date": "20130821", } }] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ce3d16903..692d828da 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,4 +1,5 @@ import base64 +import hashlib import json import os import re @@ -234,6 +235,9 @@ class InfoExtractor(object): url = url_or_request.get_full_url() except AttributeError: url = url_or_request + if len(url) > 200: + h = hashlib.md5(url).hexdigest() + url = url[:200 - len(h)] + h raw_filename = ('%s_%s.dump' % (video_id, url)) filename = sanitize_filename(raw_filename, restricted=True) self.to_screen(u'Saving request to ' + filename) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index f336a3c62..03b75b80d 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -1,4 +1,5 @@ # coding: utf-8 +from __future__ import unicode_literals import re import json @@ -20,30 +21,31 @@ class CondeNastIE(InfoExtractor): # The keys are the supported sites and the values are the name to be shown # to the user and in the extractor description. - _SITES = {'wired': u'WIRED', - 'gq': u'GQ', - 'vogue': u'Vogue', - 'glamour': u'Glamour', - 'wmagazine': u'W Magazine', - 'vanityfair': u'Vanity Fair', - } + _SITES = { + 'wired': 'WIRED', + 'gq': 'GQ', + 'vogue': 'Vogue', + 'glamour': 'Glamour', + 'wmagazine': 'W Magazine', + 'vanityfair': 'Vanity Fair', + } _VALID_URL = r'http://(video|www).(?P%s).com/(?Pwatch|series|video)/(?P.+)' % '|'.join(_SITES.keys()) - IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) + IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) _TEST = { - u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', - u'file': u'5171b343c2b4c00dd0c1ccb3.mp4', - u'md5': u'1921f713ed48aabd715691f774c451f7', - u'info_dict': { - u'title': u'3D Printed Speakers Lit With LED', - u'description': u'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', + 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', + 'file': '5171b343c2b4c00dd0c1ccb3.mp4', + 'md5': '1921f713ed48aabd715691f774c451f7', + 'info_dict': { + 'title': '3D Printed Speakers Lit With LED', + 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', } } def _extract_series(self, url, webpage): title = self._html_search_regex(r'
.*?

(.+?)

', - webpage, u'series title', flags=re.DOTALL) + webpage, 'series title', flags=re.DOTALL) url_object = compat_urllib_parse_urlparse(url) base_url = '%s://%s' % (url_object.scheme, url_object.netloc) m_paths = re.finditer(r'

.*?(.+?)

', r'
(.+?)
', ], - webpage, u'description', + webpage, 'description', fatal=False, flags=re.DOTALL) params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, - u'player params', flags=re.DOTALL) - video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id') - player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id') - target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target') + 'player params', flags=re.DOTALL) + video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') + player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id') + target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target') data = compat_urllib_parse.urlencode({'videoId': video_id, 'playerId': player_id, 'target': target, }) base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]', - webpage, u'base info url', + webpage, 'base info url', default='http://player.cnevids.com/player/loader.js?') info_url = base_info_url + data info_page = self._download_webpage(info_url, video_id, - u'Downloading video info') - video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info') + 'Downloading video info') + video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info') video_info = json.loads(video_info) - def _formats_sort_key(f): - type_ord = 1 if f['type'] == 'video/mp4' else 0 - quality_ord = 1 if f['quality'] == 'high' else 0 - return (quality_ord, type_ord) - best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1] + formats = [{ + 'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']), + 'url': fdata['src'], + 'ext': fdata['type'].split('/')[-1], + 'quality': 1 if fdata['quality'] == 'high' else 0, + } for fdata in video_info['sources'][0]] + self._sort_formats(formats) - return {'id': video_id, - 'url': best_format['src'], - 'ext': best_format['type'].split('/')[-1], - 'title': video_info['title'], - 'thumbnail': video_info['poster_frame'], - 'description': description, - } + return { + 'id': video_id, + 'formats': formats, + 'title': video_info['title'], + 'thumbnail': video_info['poster_frame'], + 'description': description, + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/everyonesmixtape.py b/youtube_dl/extractor/everyonesmixtape.py new file mode 100644 index 000000000..12829cbcc --- /dev/null +++ b/youtube_dl/extractor/everyonesmixtape.py @@ -0,0 +1,69 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, + ExtractorError, +) + + +class EveryonesMixtapeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P[0-9a-zA-Z]+)(?:/(?P[0-9]))?$' + + _TEST = { + 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5', + 'file': '5bfseWNmlds.mp4', + "info_dict": { + "title": "Passion Pit - \"Sleepyhead\" (Official Music Video)", + "uploader": "FKR.TV", + "uploader_id": "frenchkissrecords", + "description": "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com", + "upload_date": "20081015" + }, + 'params': { + 'skip_download': True, # This is simply YouTube + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + + pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id + pllist_req = compat_urllib_request.Request(pllist_url) + pllist_req.add_header('X-Requested-With', 'XMLHttpRequest') + + playlist_list = self._download_json( + pllist_req, playlist_id, note='Downloading playlist metadata') + try: + playlist_no = next(playlist['id'] + for playlist in playlist_list + if playlist['code'] == playlist_id) + except StopIteration: + raise ExtractorError('Playlist id not found') + + pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no + pl_req = compat_urllib_request.Request(pl_url) + pl_req.add_header('X-Requested-With', 'XMLHttpRequest') + playlist = self._download_json( + pl_req, playlist_id, note='Downloading playlist info') + + entries = [{ + '_type': 'url', + 'url': t['url'], + 'title': t['title'], + } for t in playlist['tracks']] + + if mobj.group('songnr'): + songnr = int(mobj.group('songnr')) - 1 + return entries[songnr] + + playlist_title = playlist['mixData']['name'] + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': playlist_title, + 'entries': entries, + } diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index e1d2f0526..21ea5ec2b 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -11,13 +13,13 @@ class FlickrIE(InfoExtractor): """Information Extractor for Flickr videos""" _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P[\w\-_@]+)/(?P\d+).*' _TEST = { - u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', - u'file': u'5645318632.mp4', - u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b', - u'info_dict': { - u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", - u"uploader_id": u"forestwander-nature-pictures", - u"title": u"Dark Hollow Waterfalls" + 'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', + 'file': '5645318632.mp4', + 'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b', + 'info_dict': { + "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", + "uploader_id": "forestwander-nature-pictures", + "title": "Dark Hollow Waterfalls" } } @@ -29,13 +31,13 @@ class FlickrIE(InfoExtractor): webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id webpage = self._download_webpage(webpage_url, video_id) - secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret') + secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret') first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self' first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage') node_id = self._html_search_regex(r'(\d+-\d+)', - first_xml, u'node_id') + first_xml, 'node_id') second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1' second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage') @@ -44,7 +46,7 @@ class FlickrIE(InfoExtractor): mobj = re.search(r'[0-9]{6})' + _TEST = { + 'url': 'http://www.franceinter.fr/player/reecouter?play=793962', + 'file': '793962.mp3', + 'md5': '4764932e466e6f6c79c317d2e74f6884', + "info_dict": { + "title": "L’Histoire dans les jeux vidéo", + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex( + r'(.*?)', webpage, 'title') + path = self._search_regex( + r'&urlAOD=(.*?)&startTime', webpage, 'video url') + video_url = 'http://www.franceinter.fr/' + path + + return { + 'id': video_id, + 'formats': [{ + 'url': video_url, + 'vcodec': 'none', + }], + 'title': title, + } diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 26b7d2ae5..380ebbe55 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -13,12 +15,12 @@ from ..utils import ( class GameSpotIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P\d+)/?' _TEST = { - u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", - u"file": u"gs-2300-6410818.mp4", - u"md5": u"b2a30deaa8654fcccd43713a6b6a4825", - u"info_dict": { - u"title": u"Arma 3 - Community Guide: SITREP I", - u'description': u'Check out this video where some of the basics of Arma 3 is explained.', + "url": "http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", + "file": "gs-2300-6410818.mp4", + "md5": "b2a30deaa8654fcccd43713a6b6a4825", + "info_dict": { + "title": "Arma 3 - Community Guide: SITREP I", + 'description': 'Check out this video where some of the basics of Arma 3 is explained.', } } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a9023f38d..839530982 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -328,7 +328,7 @@ class GenericIE(InfoExtractor): mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) if mobj is None: # Broaden the search a little bit: JWPlayer JS loader - mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage) + mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) if mobj is None: # Try to find twitter cards info mobj = re.search(r'\d+)\.shtml' _TEST = { - u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml', - u'file': u'48863.flv', - u'md5': u'29aca1e47ae68fc28804aca89f29507e', - u'info_dict': { - u'title': u'Ready To Go', + 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml', + 'file': '48863.flv', + 'md5': '29aca1e47ae68fc28804aca89f29507e', + 'info_dict': { + 'title': 'Ready To Go', }, + 'skip': 'Only available from China', } def _real_extract(self, url): @@ -23,22 +26,23 @@ class KankanIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title') + title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title') surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0) gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls) gcid = gcids[-1] - video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid, - video_id, u'Downloading video url info') - ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip') - path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path') - param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1') - param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2') + info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid + video_info_page = self._download_webpage( + info_url, video_id, 'Downloading video url info') + ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip') + path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path') + param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1') + param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2') key = _md5('xl_mp43651' + param1 + param2) video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2) - return {'id': video_id, - 'title': title, - 'url': video_url, - 'ext': determine_ext(video_url), - } + return { + 'id': video_id, + 'title': title, + 'url': video_url, + } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 7c54ea0f4..f3356db50 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,4 +1,5 @@ -import json +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -10,17 +11,17 @@ from ..utils import ( class MixcloudIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' - IE_NAME = u'mixcloud' + IE_NAME = 'mixcloud' _TEST = { - u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/', - u'file': u'dholbach-cryptkeeper.mp3', - u'info_dict': { - u'title': u'Cryptkeeper', - u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', - u'uploader': u'Daniel Holbach', - u'uploader_id': u'dholbach', - u'upload_date': u'20111115', + 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', + 'file': 'dholbach-cryptkeeper.mp3', + 'info_dict': { + 'title': 'Cryptkeeper', + 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', + 'uploader': 'Daniel Holbach', + 'uploader_id': 'dholbach', + 'upload_date': '20111115', }, } @@ -42,17 +43,18 @@ class MixcloudIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - uploader = mobj.group(1) cloudcast_name = mobj.group(2) track_id = '-'.join((uploader, cloudcast_name)) - api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) + webpage = self._download_webpage(url, track_id) - json_data = self._download_webpage(api_url, track_id, - u'Downloading cloudcast info') - info = json.loads(json_data) - preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') + api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) + info = self._download_json( + api_url, track_id, 'Downloading cloudcast info') + + preview_url = self._search_regex( + r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') song_url = preview_url.replace('/previews/', '/c/originals/') template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) final_song_url = self._get_url(template_url) diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 0836243ea..6a8e2cc44 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -34,7 +34,7 @@ class MporaIE(InfoExtractor): data = json.loads(data_json) - uploader = data['info_overlay']['name'] + uploader = data['info_overlay'].get('username') duration = data['video']['duration'] // 1000 thumbnail = data['video']['encodings']['sd']['poster'] title = data['info_overlay']['title'] diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index c2254ae8a..5c4cd2068 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class RedTubeIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P[0-9]+)' + _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P[0-9]+)' _TEST = { u'url': u'http://www.redtube.com/66418', u'file': u'66418.mp4', diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 951e977bd..393b5f17c 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,4 +1,6 @@ # encoding: utf-8 +from __future__ import unicode_literals + import json import re import itertools @@ -32,58 +34,58 @@ class SoundcloudIE(InfoExtractor): |(?P(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) ) ''' - IE_NAME = u'soundcloud' + IE_NAME = 'soundcloud' _TESTS = [ { - u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', - u'file': u'62986583.mp3', - u'md5': u'ebef0a451b909710ed1d7787dddbf0d7', - u'info_dict': { - u"upload_date": u"20121011", - u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", - u"uploader": u"E.T. ExTerrestrial Music", - u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" + 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', + 'file': '62986583.mp3', + 'md5': 'ebef0a451b909710ed1d7787dddbf0d7', + 'info_dict': { + "upload_date": "20121011", + "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", + "uploader": "E.T. ExTerrestrial Music", + "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" } }, # not streamable song { - u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', - u'info_dict': { - u'id': u'47127627', - u'ext': u'mp3', - u'title': u'Goldrushed', - u'uploader': u'The Royal Concept', - u'upload_date': u'20120521', + 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', + 'info_dict': { + 'id': '47127627', + 'ext': 'mp3', + 'title': 'Goldrushed', + 'uploader': 'The Royal Concept', + 'upload_date': '20120521', }, - u'params': { + 'params': { # rtmp - u'skip_download': True, + 'skip_download': True, }, }, # private link { - u'url': u'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp', - u'md5': u'aa0dd32bfea9b0c5ef4f02aacd080604', - u'info_dict': { - u'id': u'123998367', - u'ext': u'mp3', - u'title': u'Youtube - Dl Test Video \'\' Ä↭', - u'uploader': u'jaimeMF', - u'description': u'test chars: \"\'/\\ä↭', - u'upload_date': u'20131209', + 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp', + 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', + 'info_dict': { + 'id': '123998367', + 'ext': 'mp3', + 'title': 'Youtube - Dl Test Video \'\' Ä↭', + 'uploader': 'jaimeMF', + 'description': 'test chars: \"\'/\\ä↭', + 'upload_date': '20131209', }, }, # downloadable song { - u'url': u'https://soundcloud.com/simgretina/just-your-problem-baby-1', - u'md5': u'56a8b69568acaa967b4c49f9d1d52d19', - u'info_dict': { - u'id': u'105614606', - u'ext': u'wav', - u'title': u'Just Your Problem Baby (Acapella)', - u'description': u'Vocals', - u'uploader': u'Sim Gretina', - u'upload_date': u'20130815', + 'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1', + 'md5': '56a8b69568acaa967b4c49f9d1d52d19', + 'info_dict': { + 'id': '105614606', + 'ext': 'wav', + 'title': 'Just Your Problem Baby (Acapella)', + 'description': 'Vocals', + 'uploader': 'Sim Gretina', + 'upload_date': '20130815', }, }, ] @@ -112,7 +114,7 @@ class SoundcloudIE(InfoExtractor): thumbnail = info['artwork_url'] if thumbnail is not None: thumbnail = thumbnail.replace('-large', '-t500x500') - ext = u'mp3' + ext = 'mp3' result = { 'id': track_id, 'uploader': info['user']['username'], @@ -124,11 +126,11 @@ class SoundcloudIE(InfoExtractor): if info.get('downloadable', False): # We can build a direct link to the song format_url = ( - u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format( + 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format( track_id, self._CLIENT_ID)) result['formats'] = [{ 'format_id': 'download', - 'ext': info.get('original_format', u'mp3'), + 'ext': info.get('original_format', 'mp3'), 'url': format_url, 'vcodec': 'none', }] @@ -138,7 +140,7 @@ class SoundcloudIE(InfoExtractor): 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token)) stream_json = self._download_webpage( streams_url, - track_id, u'Downloading track url') + track_id, 'Downloading track url') formats = [] format_dict = json.loads(stream_json) @@ -165,20 +167,19 @@ class SoundcloudIE(InfoExtractor): # We fallback to the stream_url in the original info, this # cannot be always used, sometimes it can give an HTTP 404 error formats.append({ - 'format_id': u'fallback', + 'format_id': 'fallback', 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, 'ext': ext, 'vcodec': 'none', }) - def format_pref(f): + for f in formats: if f['format_id'].startswith('http'): - return 2 + f['protocol'] = 'http' if f['format_id'].startswith('rtmp'): - return 1 - return 0 + f['protocol'] = 'rtmp' - formats.sort(key=format_pref) + self._sort_formats(formats) result['formats'] = formats return result @@ -210,14 +211,14 @@ class SoundcloudIE(InfoExtractor): url = 'http://soundcloud.com/%s' % resolve_title info_json_url = self._resolv_url(url) - info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON') + info_json = self._download_webpage(info_json_url, full_title, 'Downloading info JSON') info = json.loads(info_json) return self._extract_info_dict(info, full_title, secret_token=token) class SoundcloudSetIE(SoundcloudIE): _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' - IE_NAME = u'soundcloud:set' + IE_NAME = 'soundcloud:set' # it's in tests/test_playlists.py _TESTS = [] @@ -254,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE): _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P[^/]+)(/?(tracks/)?)?(\?.*)?$' - IE_NAME = u'soundcloud:user' + IE_NAME = 'soundcloud:user' # it's in tests/test_playlists.py _TESTS = [] @@ -266,7 +267,7 @@ class SoundcloudUserIE(SoundcloudIE): url = 'http://soundcloud.com/%s/' % uploader resolv_url = self._resolv_url(url) user_json = self._download_webpage(resolv_url, uploader, - u'Downloading user info') + 'Downloading user info') user = json.loads(user_json) tracks = [] @@ -276,7 +277,7 @@ class SoundcloudUserIE(SoundcloudIE): }) tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data response = self._download_webpage(tracks_url, uploader, - u'Downloading tracks page %s' % (i+1)) + 'Downloading tracks page %s' % (i+1)) new_tracks = json.loads(response) tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks) if len(new_tracks) < 50: diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 9e2ad0d99..3362b3db8 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import os import re @@ -11,17 +13,18 @@ from ..aes import ( aes_decrypt_text ) + class SpankwireIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pspankwire\.com/[^/]*/video(?P[0-9]+)/?)' _TEST = { - u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', - u'file': u'103545.mp4', - u'md5': u'1b3f55e345500552dbc252a3e9c1af43', - u'info_dict': { - u"uploader": u"oreusz", - u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch", - u"description": u"Crazy Bitch X rated music video.", - u"age_limit": 18, + 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', + 'file': '103545.mp4', + 'md5': '1b3f55e345500552dbc252a3e9c1af43', + 'info_dict': { + "uploader": "oreusz", + "title": "Buckcherry`s X Rated Music Video Crazy Bitch", + "description": "Crazy Bitch X rated music video.", + "age_limit": 18, } } @@ -34,17 +37,17 @@ class SpankwireIE(InfoExtractor): req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) - video_title = self._html_search_regex(r'

([^<]+)', webpage, u'title') + video_title = self._html_search_regex(r'

([^<]+)', webpage, 'title') video_uploader = self._html_search_regex( - r'by:\s*]*>(.+?)', webpage, u'uploader', fatal=False) + r'by:\s*]*>(.+?)', webpage, 'uploader', fatal=False) thumbnail = self._html_search_regex( - r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False) + r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False) description = self._html_search_regex( - r'([^<]+)<', webpage, u'description', fatal=False) + r'([^<]+)<', webpage, 'description', fatal=False) video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage))) if webpage.find('flashvars\.encrypted = "true"') != -1: - password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ') + password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ') video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) formats = [] @@ -52,14 +55,21 @@ class SpankwireIE(InfoExtractor): path = compat_urllib_parse_urlparse(video_url).path extension = os.path.splitext(path)[1][1:] format = path.split('/')[4].split('_')[:2] + resolution, bitrate_str = format format = "-".join(format) + height = int(resolution.rstrip('P')) + tbr = int(bitrate_str.rstrip('K')) + formats.append({ 'url': video_url, 'ext': extension, + 'resolution': resolution, 'format': format, + 'tbr': tbr, + 'height': height, 'format_id': format, }) - formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) + self._sort_formats(formats) age_limit = self._rta_search(webpage) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 2bf26d056..9dcffead0 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -9,61 +11,66 @@ from ..utils import ( class TeamcocoIE(InfoExtractor): _VALID_URL = r'http://teamcoco\.com/video/(?P.*)' _TEST = { - u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush', - u'file': u'19705.mp4', - u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a', - u'info_dict': { - u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", - u"title": u"Louis C.K. Interview Pt. 1 11/3/11" + 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', + 'file': '19705.mp4', + 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', + 'info_dict': { + "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", + "title": "Louis C.K. Interview Pt. 1 11/3/11" } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError('Invalid URL: %s' % url) url_title = mobj.group('url_title') webpage = self._download_webpage(url, url_title) - video_id = self._html_search_regex(r'
\w+) # Here goes the name and then ".html" ''' _TEST = { - u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', - u'file': u'102.mp4', - u'md5': u'2d76ee1576672e0bd8f187513267adf6', - u'info_dict': { - u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922", - u"title": u"Dan Dennett: The illusion of consciousness" + 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', + 'file': '102.mp4', + 'md5': '4ea1dada91e4174b53dac2bb8ace429d', + 'info_dict': { + "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922", + "title": "Dan Dennett: The illusion of consciousness" } } @@ -47,7 +50,7 @@ class TEDIE(SubtitlesInfoExtractor): '''Returns the videos of the playlist''' webpage = self._download_webpage( - url, playlist_id, u'Downloading playlist webpage') + url, playlist_id, 'Downloading playlist webpage') matches = re.finditer( r'/talks/[^"]+\.html)">[^<]*

', webpage) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 28c88ffc7..bf3fde610 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -131,6 +131,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): ( (?:https?://|//)? # http(s):// or protocol-independent URL (optional) (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| + (?:www\.)?deturl\.com/www\.youtube\.com/| + (?:www\.)?pwnyoutube\.com| tube\.majestyc\.net/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 8c19ed7fa..c22f2cdc6 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -479,6 +479,7 @@ class FFmpegMergerPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] args = ['-c', 'copy'] + self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) return True, info diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d1233be65..b9c25c4a9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.01.08' +__version__ = '2014.01.17.2'