From: Philipp Hagemeister Date: Mon, 27 Jan 2014 02:03:26 +0000 (-0800) Subject: Merge pull request #2221 from Rudloff/master X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=0f2999fe2b352795d54e6fcc4027e6a64ce5bc1d;hp=67ccb7719715d8edaee291f7ab4f5d5caad3d48f;p=youtube-dl Merge pull request #2221 from Rudloff/master Removed websurg extractor --- diff --git a/README.md b/README.md index 54d59ea3e..d795ef6f2 100644 --- a/README.md +++ b/README.md @@ -325,11 +325,27 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). -# COPYRIGHT +# BUILD INSTRUCTIONS -youtube-dl is released into the public domain by the copyright holders. +Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. -This README file was originally written by Daniel Bolton () and is likewise released into the public domain. +To run youtube-dl as a developer, you don't need to build anything either. Simply execute + + python -m youtube_dl + +To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: + + python -m unittest discover + python test/test_download.py + nosetests + +If you want to create a build of youtube-dl yourself, you'll need + +* python +* make +* pandoc +* zip +* nosetests # BUGS @@ -388,3 +404,9 @@ Only post features that you (or an incapicated friend you can personally talk to ### Is your question about youtube-dl? It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. + +# COPYRIGHT + +youtube-dl is released into the public domain by the copyright holders. + +This README file was originally written by Daniel Bolton () and is likewise released into the public domain. diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index d9fe5af4e..de157f657 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -120,7 +120,7 @@ class TestYoutubeLists(unittest.TestCase): def test_youtube_toplist(self): dl = FakeYDL() ie = YoutubeTopListIE(dl) - result = ie.extract('yttoplist:music:Top Tracks') + result = ie.extract('yttoplist:music:Trending') entries = result['entries'] self.assertTrue(len(entries) >= 5) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f202ba4f0..42cbcf699 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -331,7 +331,7 @@ class YoutubeDL(object): def __exit__(self, *args): self.restore_console_title() - + if self.params.get('cookiefile') is not None: self.cookiejar.save() @@ -710,10 +710,10 @@ class YoutubeDL(object): # TODO Central sorting goes here - if formats[0] is not info_dict: + if formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) - # element in the 'formats' field in info_dict is info_dict itself, + # element in the 'formats' field in info_dict is info_dict itself, # wich can't be exported to json info_dict['formats'] = formats if self.params.get('listformats', None): @@ -1094,9 +1094,15 @@ class YoutubeDL(object): res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: res += '%4dk ' % fdict['tbr'] + if fdict.get('container') is not None: + if res: + res += ', ' + res += '%s container' % fdict['container'] if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): - res += '%-5s' % fdict['vcodec'] + if res: + res += ', ' + res += fdict['vcodec'] if fdict.get('vbr') is not None: res += '@' elif fdict.get('vbr') is not None and fdict.get('abr') is not None: @@ -1106,7 +1112,10 @@ class YoutubeDL(object): if fdict.get('acodec') is not None: if res: res += ', ' - res += '%-5s' % fdict['acodec'] + if fdict['acodec'] == 'none': + res += 'video only' + else: + res += '%-5s' % fdict['acodec'] elif fdict.get('abr') is not None: if res: res += ', ' diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index f19b490f1..0d9eb0001 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from ..utils import ( determine_ext, ) + def get_suitable_downloader(info_dict): """Get the downloader class that can handle the info dict.""" url = info_dict['url'] @@ -20,4 +21,3 @@ def get_suitable_downloader(info_dict): return MplayerFD else: return HttpFD - diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 10143d56a..5a068aa8b 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -314,4 +314,3 @@ class FileDownloader(object): if the download is successful. """ self._progress_hooks.append(ph) - diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 8407727ba..748f9f3ad 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -27,7 +27,7 @@ class HttpFD(FileDownloader): request = compat_urllib_request.Request(url, None, headers) if self.params.get('test', False): - request.add_header('Range','bytes=0-10240') + request.add_header('Range', 'bytes=0-10240') # Establish possible resume length if os.path.isfile(encodeFilename(tmpfilename)): @@ -39,7 +39,7 @@ class HttpFD(FileDownloader): if resume_len != 0: if self.params.get('continuedl', False): self.report_resuming_byte(resume_len) - request.add_header('Range','bytes=%d-' % resume_len) + request.add_header('Range', 'bytes=%d-' % resume_len) open_mode = 'ab' else: resume_len = 0 @@ -100,7 +100,7 @@ class HttpFD(FileDownloader): if data_len is not None: data_len = int(data_len) + resume_len min_data_len = self.params.get("min_filesize", None) - max_data_len = self.params.get("max_filesize", None) + max_data_len = self.params.get("max_filesize", None) if min_data_len is not None and data_len < min_data_len: self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) return False diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index 67e0e4189..4de7f15f4 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -18,10 +18,10 @@ class MplayerFD(FileDownloader): try: subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) except (OSError, IOError): - self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] ) + self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0]) return False - # Download using mplayer. + # Download using mplayer. retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 90c6a8fdb..8daf995b9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -72,6 +72,7 @@ from .francetv import ( CultureboxIE, ) from .freesound import FreesoundIE +from .freespeech import FreespeechIE from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE from .gamespot import GameSpotIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 02a82dc57..3cf742a3b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -66,6 +66,7 @@ class InfoExtractor(object): * asr Audio sampling rate in Hertz * vbr Average video bitrate in KBit/s * vcodec Name of the video codec in use + * container Name of the container format * filesize The number of bytes, if known in advance * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual @@ -239,7 +240,7 @@ class InfoExtractor(object): except AttributeError: url = url_or_request if len(url) > 200: - h = u'___' + hashlib.md5(url).hexdigest() + h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest() url = url[:200 - len(h)] + h raw_filename = ('%s_%s.dump' % (video_id, url)) filename = sanitize_filename(raw_filename, restricted=True) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 03b75b80d..91c1c1348 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -30,7 +30,7 @@ class CondeNastIE(InfoExtractor): 'vanityfair': 'Vanity Fair', } - _VALID_URL = r'http://(video|www).(?P%s).com/(?Pwatch|series|video)/(?P.+)' % '|'.join(_SITES.keys()) + _VALID_URL = r'http://(video|www)\.(?P%s)\.com/(?Pwatch|series|video)/(?P.+)' % '|'.join(_SITES.keys()) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) _TEST = { diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py new file mode 100644 index 000000000..c210177f7 --- /dev/null +++ b/youtube_dl/extractor/freespeech.py @@ -0,0 +1,37 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class FreespeechIE(InfoExtractor): + IE_NAME = 'freespeech.org' + _VALID_URL = r'https://www\.freespeech\.org/video/(?P.+)' + _TEST = { + 'add_ie': ['Youtube'], + 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', + 'info_dict': { + 'id': 'poKsVCZ64uU', + 'ext': 'mp4', + 'title': 'Obama, Romney Campaign in Colorado Ahead of Debate', + 'description': 'Obama, Romney Campaign in Colorado Ahead of Debate', + 'uploader': 'freespeechtv', + 'uploader_id': 'freespeechtv', + 'upload_date': '20121002', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group('title') + webpage = self._download_webpage(url, title) + info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info') + info = json.loads(info_json) + + return { + '_type': 'url', + 'url': info['jw_player']['basic_video_node_player']['file'], + 'ie_key': 'Youtube', + } diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index a106f81d2..80b48b1b3 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -13,7 +13,7 @@ from ..utils import ( class HotNewHipHopIE(InfoExtractor): - _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html' + _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _TEST = { 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'file': '1435540.mp3', diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index a43d6ced5..cd50f708d 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -1,4 +1,7 @@ # encoding: utf-8 + +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -12,78 +15,77 @@ class RTLnowIE(InfoExtractor): """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' _TESTS = [{ - u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', - u'file': u'90419.flv', - u'info_dict': { - u'upload_date': u'20070416', - u'title': u'Ahornallee - Folge 1 - Der Einzug', - u'description': u'Folge 1 - Der Einzug', + 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', + 'file': '90419.flv', + 'info_dict': { + 'upload_date': '20070416', + 'title': 'Ahornallee - Folge 1 - Der Einzug', + 'description': 'Folge 1 - Der Einzug', }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, - u'skip': u'Only works from Germany', + 'skip': 'Only works from Germany', }, { - u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', - u'file': u'69756.flv', - u'info_dict': { - u'upload_date': u'20120519', - u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', - u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', - u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', + 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', + 'file': '69756.flv', + 'info_dict': { + 'upload_date': '20120519', + 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', + 'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', + 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, - u'skip': u'Only works from Germany', + 'skip': 'Only works from Germany', }, { - u'url': u'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', - u'file': u'13883.flv', - u'info_dict': { - u'upload_date': u'20090627', - u'title': u'Voxtours - Südafrika-Reporter II', - u'description': u'Südafrika-Reporter II', + 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', + 'file': '13883.flv', + 'info_dict': { + 'upload_date': '20090627', + 'title': 'Voxtours - Südafrika-Reporter II', + 'description': 'Südafrika-Reporter II', }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, }, { - u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', - u'file': u'99205.flv', - u'info_dict': { - u'upload_date': u'20080928', - u'title': u'Medicopter 117 - Angst!', - u'description': u'Angst!', - u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' + 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', + 'file': '99205.flv', + 'info_dict': { + 'upload_date': '20080928', + 'title': 'Medicopter 117 - Angst!', + 'description': 'Angst!', + 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, }, { - u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', - u'file': u'124903.flv', - u'info_dict': { - u'upload_date': u'20130101', - u'title': u'Top Gear vom 01.01.2013', - u'description': u'Episode 1', + 'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', + 'file': '124903.flv', + 'info_dict': { + 'upload_date': '20130101', + 'title': 'Top Gear vom 01.01.2013', + 'description': 'Episode 1', }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, - u'skip': u'Only works from Germany', + 'skip': 'Only works from Germany', }] - - def _real_extract(self,url): + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - webpage_url = u'http://' + mobj.group('url') - video_page_url = u'http://' + mobj.group('domain') + u'/' - video_id = mobj.group(u'video_id') + webpage_url = 'http://' + mobj.group('url') + video_page_url = 'http://' + mobj.group('domain') + '/' + video_id = mobj.group('video_id') webpage = self._download_webpage(webpage_url, video_id) @@ -94,51 +96,53 @@ class RTLnowIE(InfoExtractor): msg = clean_html(note_m.group(1)) raise ExtractorError(msg) - video_title = self._html_search_regex(r'<title>(?P<title>[^<]+?)( \| [^<]*)?', - webpage, u'title') - playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P[^\']+)\'', - webpage, u'playerdata_url') + video_title = self._html_search_regex( + r'(?P<title>[^<]+?)( \| [^<]*)?', + webpage, 'title') + playerdata_url = self._html_search_regex( + r'\'playerdata\': \'(?P[^\']+)\'', + webpage, 'playerdata_url') playerdata = self._download_webpage(playerdata_url, video_id) mobj = re.search(r'<!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]>', playerdata) if mobj: - video_description = mobj.group(u'description') + video_description = mobj.group('description') if mobj.group('upload_date_Y'): video_upload_date = mobj.group('upload_date_Y') elif mobj.group('upload_date_y'): - video_upload_date = u'20' + mobj.group('upload_date_y') + video_upload_date = '20' + mobj.group('upload_date_y') else: video_upload_date = None if video_upload_date: - video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') + video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d') else: video_description = None video_upload_date = None - self._downloader.report_warning(u'Unable to extract description and upload date') + self._downloader.report_warning('Unable to extract description and upload date') # Thumbnail: not every video has an thumbnail mobj = re.search(r'', webpage) if mobj: - video_thumbnail = mobj.group(u'thumbnail') + video_thumbnail = mobj.group('thumbnail') else: video_thumbnail = None mobj = re.search(r']+>rtmpe://(?:[^/]+/){2})(?P[^\]]+)\]\]>', playerdata) if mobj is None: - raise ExtractorError(u'Unable to extract media URL') - video_url = mobj.group(u'url') - video_play_path = u'mp4:' + mobj.group(u'play_path') - video_player_url = video_page_url + u'includes/vodplayer.swf' + raise ExtractorError('Unable to extract media URL') + video_url = mobj.group('url') + video_play_path = 'mp4:' + mobj.group('play_path') + video_player_url = video_page_url + 'includes/vodplayer.swf' - return [{ - 'id': video_id, - 'url': video_url, - 'play_path': video_play_path, - 'page_url': video_page_url, - 'player_url': video_player_url, - 'ext': 'flv', - 'title': video_title, + return { + 'id': video_id, + 'url': video_url, + 'play_path': video_play_path, + 'page_url': video_page_url, + 'player_url': video_player_url, + 'ext': 'flv', + 'title': video_title, 'description': video_description, 'upload_date': video_upload_date, - 'thumbnail': video_thumbnail, - }] + 'thumbnail': video_thumbnail, + } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 193675549..a50170ce7 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -291,7 +291,7 @@ class VimeoIE(InfoExtractor): class VimeoChannelIE(InfoExtractor): IE_NAME = 'vimeo:channel' - _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P[^/]+)' + _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P[^/]+)' _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' @@ -327,7 +327,7 @@ class VimeoChannelIE(InfoExtractor): class VimeoUserIE(VimeoChannelIE): IE_NAME = 'vimeo:user' - _VALID_URL = r'(?:https?://)?vimeo.\com/(?P[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'(?:https?://)?vimeo\.com/(?P[^/]+)(?:/videos|[#?]|$)' _TITLE_RE = r']+?class="user">([^<>]+?)' @classmethod @@ -344,7 +344,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'(?:https?://)?vimeo.\com/album/(?P\d+)' + _VALID_URL = r'(?:https?://)?vimeo\.com/album/(?P\d+)' _TITLE_RE = r'