From: Philipp Hagemeister Date: Thu, 13 Nov 2014 14:53:16 +0000 (+0100) Subject: Merge remote-tracking branch 'yaccz/add-extractor/freevideo' X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=11b28e93d3a5496916e55a717c362fe7a6a1c7e7;hp=3898c8a7b2835b1632ef0e34481bdf5e006cee2b;p=youtube-dl Merge remote-tracking branch 'yaccz/add-extractor/freevideo' --- diff --git a/AUTHORS b/AUTHORS index a574949b3..48189e26c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -79,4 +79,5 @@ Dennis Scheiba Damon Timm winwon Xavier Beynon -Gabriel Schubiner \ No newline at end of file +Gabriel Schubiner +xantares diff --git a/README.md b/README.md index 933411fa2..c284db495 100644 --- a/README.md +++ b/README.md @@ -131,17 +131,19 @@ which means you can modify it, redistribute it or use it however you like. %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the - video id, %(playlist)s for the playlist the + video id, %(playlist_title)s, + %(playlist_id)s, or %(playlist)s (=title if + present, ID otherwise) for the playlist the video is in, %(playlist_index)s for the - position in the playlist and %% for a - literal percent. %(height)s and %(width)s - for the width and height of the video - format. %(resolution)s for a textual + position in the playlist. %(height)s and + %(width)s for the width and height of the + video format. %(resolution)s for a textual description of the resolution of the video - format. Use - to output to stdout. Can also - be used to download to a different - directory, for example with -o '/my/downloa - ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' . + format. %% for a literal percent. Use - to + output to stdout. Can also be used to + download to a different directory, for + example with -o '/my/downloads/%(uploader)s + /%(title)s-%(id)s.%(ext)s' . --autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option @@ -239,8 +241,13 @@ which means you can modify it, redistribute it or use it however you like. "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple - audio formats, such as -f - 136/137/mp4/bestvideo,140/m4a/bestaudio + audio formats, such as -f + 136/137/mp4/bestvideo,140/m4a/bestaudio. + You can merge the video and audio of two + formats into a single file using -f + (requires ffmpeg or + avconv), for example -f + bestvideo+bestaudio. --all-formats download all available video formats --prefer-free-formats prefer free video formats unless a specific one is requested @@ -500,7 +507,7 @@ If you want to add support for a new site, you can follow this quick list (assum 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. 8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501). -9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this: +9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/__init__.py $ git add youtube_dl/extractor/yourextractor.py diff --git a/test/helper.py b/test/helper.py index 325f72f0a..8be37a183 100644 --- a/test/helper.py +++ b/test/helper.py @@ -145,7 +145,8 @@ def expect_info_dict(self, expected_dict, got_dict): info_dict_str = ''.join( ' %s: %s,\n' % (_repr(k), _repr(v)) for k, v in test_info_dict.items()) - write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr) + write_string( + '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr) self.assertFalse( missing_keys, 'Missing keys in test definition: %s' % ( diff --git a/test/test_utils.py b/test/test_utils.py index e59547784..a9649397f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -16,6 +16,7 @@ import json import xml.etree.ElementTree from youtube_dl.utils import ( + clean_html, DateRange, encodeFilename, find_xpath_attr, @@ -45,6 +46,7 @@ from youtube_dl.utils import ( escape_url, js_to_json, get_filesystem_encoding, + intlist_to_bytes, ) @@ -345,5 +347,14 @@ class TestUtil(unittest.TestCase): on = js_to_json('{"abc": true}') self.assertEqual(json.loads(on), {'abc': True}) + def test_clean_html(self): + self.assertEqual(clean_html('a:\nb'), 'a: b') + self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') + + def test_intlist_to_bytes(self): + self.assertEqual( + intlist_to_bytes([0, 1, 127, 128, 255]), + b'\x00\x01\x7f\x80\xff') + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index df2cb09f2..13d228cd8 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -14,7 +14,7 @@ import re import string from youtube_dl.extractor import YoutubeIE -from youtube_dl.utils import compat_str, compat_urlretrieve +from youtube_dl.compat import compat_str, compat_urlretrieve _TESTS = [ ( diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8732f3db4..5c875b497 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -658,6 +658,8 @@ class YoutubeDL(object): extra = { 'n_entries': n_entries, 'playlist': playlist, + 'playlist_id': ie_result.get('id'), + 'playlist_title': ie_result.get('title'), 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], @@ -836,6 +838,13 @@ class YoutubeDL(object): formats_info = (self.select_format(format_1, formats), self.select_format(format_2, formats)) if all(formats_info): + # The first format must contain the video and the + # second the audio + if formats_info[0].get('vcodec') == 'none': + self.report_error('The first format must ' + 'contain the video, try using ' + '"-f %s+%s"' % (format_2, format_1)) + return selected_format = { 'requested_formats': formats_info, 'format': rf, @@ -1297,11 +1306,13 @@ class YoutubeDL(object): self.report_warning( 'Your Python is broken! Update to a newer and supported version') + stdout_encoding = getattr( + sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__) encoding_str = ( '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), - sys.stdout.encoding, + stdout_encoding, self.get_encoding())) write_string(encoding_str, encoding=None) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index db76e8870..f45ce05ab 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -142,6 +142,7 @@ from .generic import GenericIE from .glide import GlideIE from .globo import GloboIE from .godtube import GodTubeIE +from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE @@ -324,6 +325,7 @@ from .sbs import SBSIE from .scivee import SciVeeIE from .screencast import ScreencastIE from .servingsys import ServingSysIE +from .sexu import SexuIE from .sexykarma import SexyKarmaIE from .shared import SharedIE from .sharesix import ShareSixIE diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 69f89320c..dc0fb85d6 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -11,13 +11,13 @@ class ABCIE(InfoExtractor): _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P\d+)' _TEST = { - 'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716', - 'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742', + 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', + 'md5': 'cb3dd03b18455a661071ee1e28344d9f', 'info_dict': { - 'id': '5624716', + 'id': '5868334', 'ext': 'mp4', - 'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor', - 'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af', + 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone', + 'description': 'md5:809ad29c67a05f54eb41f2a105693a67', }, } diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 7bd797884..398e93bfb 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -22,7 +22,7 @@ class AllocineIE(InfoExtractor): 'id': '19546517', 'ext': 'mp4', 'title': 'Astérix - Le Domaine des Dieux Teaser VF', - 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', + 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2', 'thumbnail': 're:http://.*\.jpg', }, }, { diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index c13446665..1b8da43ca 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -110,20 +110,25 @@ class BandcampAlbumIE(InfoExtractor): 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'playlist': [ { - 'file': '1353101989.mp3', 'md5': '39bc1eded3476e927c724321ddf116cf', 'info_dict': { + 'id': '1353101989', + 'ext': 'mp3', 'title': 'Intro', } }, { - 'file': '38097443.mp3', 'md5': '1a2c32e2691474643e912cc6cd4bffaa', 'info_dict': { + 'id': '38097443', + 'ext': 'mp3', 'title': 'Kero One - Keep It Alive (Blazo remix)', } }, ], + 'info_dict': { + 'title': 'Jazz Format Mixtape vol.1', + }, 'params': { 'playlistend': 2 }, diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index cf19b7b0c..6252be05b 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -10,12 +10,12 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' _TEST = { - 'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking', + 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'info_dict': { - 'id': 'granite-flats-talking', + 'id': 'studio-c-season-5-episode-5', 'ext': 'mp4', - 'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c', - 'title': 'Talking', + 'description': 'md5:5438d33774b6bdc662f9485a340401cc', + 'title': 'Season 5 Episode 5', 'thumbnail': 're:^https?://.*promo.*' }, 'params': { diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index dbcf5d6a7..fd4bc75b2 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -94,7 +94,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): # It may just embed a vevo video: m_vevo = re.search( - r'[\w]*)', + r'[\w]*)', webpage) if m_vevo is not None: vevo_id = m_vevo.group('id') diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 487d5d1b4..a49fc1151 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -21,7 +21,6 @@ class FunnyOrDieIE(InfoExtractor): }, }, { 'url': 'http://www.funnyordie.com/embed/e402820827', - 'md5': '29f4c5e5a61ca39dfd7e8348a75d0aad', 'info_dict': { 'id': 'e402820827', 'ext': 'mp4', diff --git a/youtube_dl/extractor/goldenmoustache.py b/youtube_dl/extractor/goldenmoustache.py new file mode 100644 index 000000000..f2e231bf4 --- /dev/null +++ b/youtube_dl/extractor/goldenmoustache.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import ( + parse_duration, + int_or_none, +) + + +class GoldenMoustacheIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P[\w-]+)-(?P\d+)' + _TEST = { + 'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/', + 'md5': '0f904432fa07da5054d6c8beb5efb51a', + 'info_dict': { + 'id': '3700', + 'ext': 'mp4', + 'title': 'Suricate - Le Poker', + 'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9', + 'thumbnail': 're:^https?://.*\.jpg$', + 'view_count': int, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL') + title = self._html_search_regex( + r'(.*?) - Golden Moustache', webpage, 'title') + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) + view_count = int_or_none(self._html_search_regex( + r'([0-9]+)\s*VUES', + webpage, 'view count', fatal=False)) + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'view_count': view_count, + } diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py index 726adff77..fff74a70a 100644 --- a/youtube_dl/extractor/grooveshark.py +++ b/youtube_dl/extractor/grooveshark.py @@ -8,12 +8,13 @@ import re from .common import InfoExtractor -from ..utils import ExtractorError, compat_urllib_request, compat_html_parser - -from ..utils import ( +from ..compat import ( + compat_html_parser, compat_urllib_parse, + compat_urllib_request, compat_urlparse, ) +from ..utils import ExtractorError class GroovesharkHtmlParser(compat_html_parser.HTMLParser): diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 228b42d2b..f0e28e66a 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -33,7 +33,7 @@ class MTVServicesInfoExtractor(InfoExtractor): m = re.match(r'^rtmpe?://.*?/(?Pgsp\..+?/.*)$', rtmp_video_url) if not m: return rtmp_video_url - base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' + base = 'http://viacommtvstrmfs.fplive.net/' return base + m.group('finalid') def _get_feed_url(self, uri): diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 4fa0575f8..51e540814 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -13,9 +13,10 @@ class MySpassIE(InfoExtractor): _VALID_URL = r'http://www\.myspass\.de/.*' _TEST = { 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', - 'file': '11741.mp4', 'md5': '0b49f4844a068f8b33f4b7c88405862b', 'info_dict': { + 'id': '11741', + 'ext': 'mp4', "description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2", }, diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index add4b3e5d..c31ff8160 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -67,7 +67,7 @@ class NDRIE(InfoExtractor): thumbnail = None - video_url = re.search(r'''3: \{src:'(?P