Merge remote-tracking branch 'yaccz/add-extractor/freevideo'
authorPhilipp Hagemeister <phihag@phihag.de>
Thu, 13 Nov 2014 14:53:16 +0000 (15:53 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 13 Nov 2014 14:53:16 +0000 (15:53 +0100)
32 files changed:
AUTHORS
README.md
test/helper.py
test/test_utils.py
test/test_youtube_signature.py
youtube_dl/YoutubeDL.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/allocine.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/byutv.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/goldenmoustache.py [new file with mode: 0644]
youtube_dl/extractor/grooveshark.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/myspass.py
youtube_dl/extractor/ndr.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/sexu.py [new file with mode: 0644]
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/streamcloud.py
youtube_dl/extractor/swrmediathek.py
youtube_dl/extractor/tapely.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/wrzuta.py
youtube_dl/extractor/youjizz.py
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
index a574949b3105a6c69a875d281e67eb9bd65944ed..48189e26c6518781b924a3791964464721c3c214 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -79,4 +79,5 @@ Dennis Scheiba
 Damon Timm
 winwon
 Xavier Beynon
-Gabriel Schubiner
\ No newline at end of file
+Gabriel Schubiner
+xantares
index 933411fa2f4fd01b3195f7834d35e08ddfa45672..c284db4955130065504aabe046ac4c0526917ba4 100644 (file)
--- a/README.md
+++ b/README.md
@@ -131,17 +131,19 @@ which means you can modify it, redistribute it or use it however you like.
                                      %(upload_date)s for the upload date
                                      (YYYYMMDD), %(extractor)s for the provider
                                      (youtube, metacafe, etc), %(id)s for the
-                                     video id, %(playlist)s for the playlist the
+                                     video id, %(playlist_title)s,
+                                     %(playlist_id)s, or %(playlist)s (=title if
+                                     present, ID otherwise) for the playlist the
                                      video is in, %(playlist_index)s for the
-                                     position in the playlist and %% for a
-                                     literal percent. %(height)s and %(width)s
-                                     for the width and height of the video
-                                     format. %(resolution)s for a textual
+                                     position in the playlist. %(height)s and
+                                     %(width)s for the width and height of the
+                                     video format. %(resolution)s for a textual
                                      description of the resolution of the video
-                                     format. Use - to output to stdout. Can also
-                                     be used to download to a different
-                                     directory, for example with -o '/my/downloa
-                                     ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+                                     format. %% for a literal percent. Use - to
+                                     output to stdout. Can also be used to
+                                     download to a different directory, for
+                                     example with -o '/my/downloads/%(uploader)s
+                                     /%(title)s-%(id)s.%(ext)s' .
     --autonumber-size NUMBER         Specifies the number of digits in
                                      %(autonumber)s when it is present in output
                                      filename template or --auto-number option
@@ -239,8 +241,13 @@ which means you can modify it, redistribute it or use it however you like.
                                      "worst", "worstvideo" and "worstaudio". By
                                      default, youtube-dl will pick the best
                                      quality. Use commas to download multiple
-                                     audio formats, such as  -f
-                                     136/137/mp4/bestvideo,140/m4a/bestaudio
+                                     audio formats, such as -f
+                                     136/137/mp4/bestvideo,140/m4a/bestaudio.
+                                     You can merge the video and audio of two
+                                     formats into a single file using -f <video-
+                                     format>+<audio-format> (requires ffmpeg or
+                                     avconv), for example -f
+                                     bestvideo+bestaudio.
     --all-formats                    download all available video formats
     --prefer-free-formats            prefer free video formats unless a specific
                                      one is requested
@@ -500,7 +507,7 @@ If you want to add support for a new site, you can follow this quick list (assum
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
 8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
-9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
+9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
 
         $ git add youtube_dl/extractor/__init__.py
         $ git add youtube_dl/extractor/yourextractor.py
index 325f72f0a35c5c4fe6ac3a1464858b200c152aad..8be37a183abcedc80945d5bb663ed000dc33f738 100644 (file)
@@ -145,7 +145,8 @@ def expect_info_dict(self, expected_dict, got_dict):
         info_dict_str = ''.join(
             '    %s: %s,\n' % (_repr(k), _repr(v))
             for k, v in test_info_dict.items())
-        write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr)
+        write_string(
+            '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
         self.assertFalse(
             missing_keys,
             'Missing keys in test definition: %s' % (
index e59547784578e8ed7eef6b88c571629c44ccec43..a9649397f3734db8903c5a943a7470cd6328f271 100644 (file)
@@ -16,6 +16,7 @@ import json
 import xml.etree.ElementTree
 
 from youtube_dl.utils import (
+    clean_html,
     DateRange,
     encodeFilename,
     find_xpath_attr,
@@ -45,6 +46,7 @@ from youtube_dl.utils import (
     escape_url,
     js_to_json,
     get_filesystem_encoding,
+    intlist_to_bytes,
 )
 
 
@@ -345,5 +347,14 @@ class TestUtil(unittest.TestCase):
         on = js_to_json('{"abc": true}')
         self.assertEqual(json.loads(on), {'abc': True})
 
+    def test_clean_html(self):
+        self.assertEqual(clean_html('a:\nb'), 'a: b')
+        self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
+
+    def test_intlist_to_bytes(self):
+        self.assertEqual(
+            intlist_to_bytes([0, 1, 127, 128, 255]),
+            b'\x00\x01\x7f\x80\xff')
+
 if __name__ == '__main__':
     unittest.main()
index df2cb09f2a87dcacbb97de9193265f9bf1e852af..13d228cd85e9e260942635a36652c4cdc010dc8d 100644 (file)
@@ -14,7 +14,7 @@ import re
 import string
 
 from youtube_dl.extractor import YoutubeIE
-from youtube_dl.utils import compat_str, compat_urlretrieve
+from youtube_dl.compat import compat_str, compat_urlretrieve
 
 _TESTS = [
     (
index 8732f3db4e3a7c0eb91182954f0c3a7801dc681c..5c875b4972f61e2ce925c40d13a94fa53fb059d3 100755 (executable)
@@ -658,6 +658,8 @@ class YoutubeDL(object):
                 extra = {
                     'n_entries': n_entries,
                     'playlist': playlist,
+                    'playlist_id': ie_result.get('id'),
+                    'playlist_title': ie_result.get('title'),
                     'playlist_index': i + playliststart,
                     'extractor': ie_result['extractor'],
                     'webpage_url': ie_result['webpage_url'],
@@ -836,6 +838,13 @@ class YoutubeDL(object):
                         formats_info = (self.select_format(format_1, formats),
                             self.select_format(format_2, formats))
                         if all(formats_info):
+                            # The first format must contain the video and the
+                            # second the audio
+                            if formats_info[0].get('vcodec') == 'none':
+                                self.report_error('The first format must '
+                                    'contain the video, try using '
+                                    '"-f %s+%s"' % (format_2, format_1))
+                                return
                             selected_format = {
                                 'requested_formats': formats_info,
                                 'format': rf,
@@ -1297,11 +1306,13 @@ class YoutubeDL(object):
             self.report_warning(
                 'Your Python is broken! Update to a newer and supported version')
 
+        stdout_encoding = getattr(
+            sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
         encoding_str = (
             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
                 locale.getpreferredencoding(),
                 sys.getfilesystemencoding(),
-                sys.stdout.encoding,
+                stdout_encoding,
                 self.get_encoding()))
         write_string(encoding_str, encoding=None)
 
index db76e88708fef55622e46bd6538563cdb0af1945..f45ce05ab6587fb5d2906eddcc8546e7ec5729ec 100644 (file)
@@ -142,6 +142,7 @@ from .generic import GenericIE
 from .glide import GlideIE
 from .globo import GloboIE
 from .godtube import GodTubeIE
+from .goldenmoustache import GoldenMoustacheIE
 from .golem import GolemIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
@@ -324,6 +325,7 @@ from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
+from .sexu import SexuIE
 from .sexykarma import SexyKarmaIE
 from .shared import SharedIE
 from .sharesix import ShareSixIE
index 69f89320ce7e30a5fdf65acff01df1668620334f..dc0fb85d6048962505d1d207ae590940d69f52e6 100644 (file)
@@ -11,13 +11,13 @@ class ABCIE(InfoExtractor):
     _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
-        'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
+        'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
+        'md5': 'cb3dd03b18455a661071ee1e28344d9f',
         'info_dict': {
-            'id': '5624716',
+            'id': '5868334',
             'ext': 'mp4',
-            'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
-            'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
+            'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
+            'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
         },
     }
 
index 7bd7978841d06747145feeda56624de84747fcc1..398e93bfb4f8472a23d2b8669e7f83f867933244 100644 (file)
@@ -22,7 +22,7 @@ class AllocineIE(InfoExtractor):
             'id': '19546517',
             'ext': 'mp4',
             'title': 'Astérix - Le Domaine des Dieux Teaser VF',
-            'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
+            'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
index c13446665d2fb0e202973a26f5c7499c325719d1..1b8da43cae83253531e6f542af4ad0b22e588502 100644 (file)
@@ -110,20 +110,25 @@ class BandcampAlbumIE(InfoExtractor):
         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
         'playlist': [
             {
-                'file': '1353101989.mp3',
                 'md5': '39bc1eded3476e927c724321ddf116cf',
                 'info_dict': {
+                    'id': '1353101989',
+                    'ext': 'mp3',
                     'title': 'Intro',
                 }
             },
             {
-                'file': '38097443.mp3',
                 'md5': '1a2c32e2691474643e912cc6cd4bffaa',
                 'info_dict': {
+                    'id': '38097443',
+                    'ext': 'mp3',
                     'title': 'Kero One - Keep It Alive (Blazo remix)',
                 }
             },
         ],
+        'info_dict': {
+            'title': 'Jazz Format Mixtape vol.1',
+        },
         'params': {
             'playlistend': 2
         },
index cf19b7b0cf952c3b14d9ef5b91f541332d3e5e69..6252be05b7f4b57787152b4edae5378675a96847 100644 (file)
@@ -10,12 +10,12 @@ from ..utils import ExtractorError
 class BYUtvIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
     _TEST = {
-        'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
+        'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
         'info_dict': {
-            'id': 'granite-flats-talking',
+            'id': 'studio-c-season-5-episode-5',
             'ext': 'mp4',
-            'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
-            'title': 'Talking',
+            'description': 'md5:5438d33774b6bdc662f9485a340401cc',
+            'title': 'Season 5 Episode 5',
             'thumbnail': 're:^https?://.*promo.*'
         },
         'params': {
index dbcf5d6a72a5a8f44e988b506e2361e6279229ec..fd4bc75b2a91a1ad115a9152bf85debce7880431 100644 (file)
@@ -94,7 +94,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # It may just embed a vevo video:
         m_vevo = re.search(
-            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)',
+            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
             webpage)
         if m_vevo is not None:
             vevo_id = m_vevo.group('id')
index 487d5d1b41b90f6afe3e3f739f80fc46262108be..a49fc1151cf324f5e4b61cbd4f1d586718410626 100644 (file)
@@ -21,7 +21,6 @@ class FunnyOrDieIE(InfoExtractor):
         },
     }, {
         'url': 'http://www.funnyordie.com/embed/e402820827',
-        'md5': '29f4c5e5a61ca39dfd7e8348a75d0aad',
         'info_dict': {
             'id': 'e402820827',
             'ext': 'mp4',
diff --git a/youtube_dl/extractor/goldenmoustache.py b/youtube_dl/extractor/goldenmoustache.py
new file mode 100644 (file)
index 0000000..f2e231b
--- /dev/null
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    int_or_none,
+)
+
+
+class GoldenMoustacheIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/',
+        'md5': '0f904432fa07da5054d6c8beb5efb51a',
+        'info_dict': {
+            'id': '3700',
+            'ext': 'mp4',
+            'title': 'Suricate - Le Poker',
+            'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'view_count': int,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL')
+        title = self._html_search_regex(
+            r'<title>(.*?) - Golden Moustache</title>', webpage, 'title')
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+        view_count = int_or_none(self._html_search_regex(
+            r'<strong>([0-9]+)</strong>\s*VUES</span>',
+            webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'view_count': view_count,
+        }
index 726adff773305844a5e8950b4467509646b79d1d..fff74a70a891fc163ff488408e4df348564b8a29 100644 (file)
@@ -8,12 +8,13 @@ import re
 
 
 from .common import InfoExtractor
-from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
-
-from ..utils import (
+from ..compat import (
+    compat_html_parser,
     compat_urllib_parse,
+    compat_urllib_request,
     compat_urlparse,
 )
+from ..utils import ExtractorError
 
 
 class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
index 228b42d2b940d8eadd0fa3d5e61d0836fd19b7b7..f0e28e66ace8a5f59093eb2c9857968bcabf2dd7 100644 (file)
@@ -33,7 +33,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
         if not m:
             return rtmp_video_url
-        base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
+        base = 'http://viacommtvstrmfs.fplive.net/'
         return base + m.group('finalid')
 
     def _get_feed_url(self, uri):
index 4fa0575f8a282aa6f8f561a7f18bc0129fceea8c..51e540814be209856a9a71f891e55eeb4ba559c2 100644 (file)
@@ -13,9 +13,10 @@ class MySpassIE(InfoExtractor):
     _VALID_URL = r'http://www\.myspass\.de/.*'
     _TEST = {
         'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
-        'file': '11741.mp4',
         'md5': '0b49f4844a068f8b33f4b7c88405862b',
         'info_dict': {
+            'id': '11741',
+            'ext': 'mp4',
             "description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
             "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
         },
index add4b3e5d3b1fcdd4580f8a6fddcd115556ae1cb..c31ff8160851f824042804a06d905f85d3217416 100644 (file)
@@ -67,7 +67,7 @@ class NDRIE(InfoExtractor):
 
         thumbnail = None
 
-        video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
+        video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page)
         if video_url:
             thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
             if thumbnails:
index 45cbd4ee97b8cb66e3227d72e0ec7c3f238fa5f6..3b5784e8f5cb5ae8014c4f51d2d451d989b87860 100644 (file)
@@ -175,7 +175,8 @@ class NiconicoPlaylistIE(InfoExtractor):
         entries = [{
             '_type': 'url',
             'ie_key': NiconicoIE.ie_key(),
-            'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'],
+            'url': ('http://www.nicovideo.jp/watch/%s' %
+                entry['item_data']['video_id']),
         } for entry in entries]
 
         return {
diff --git a/youtube_dl/extractor/sexu.py b/youtube_dl/extractor/sexu.py
new file mode 100644 (file)
index 0000000..6365a87
--- /dev/null
@@ -0,0 +1,61 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SexuIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://sexu.com/961791/',
+        'md5': 'ff615aca9691053c94f8f10d96cd7884',
+        'info_dict': {
+            'id': '961791',
+            'ext': 'mp4',
+            'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
+            'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
+            'categories': list,  # NSFW
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        quality_arr = self._search_regex(
+            r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
+        formats = [{
+            'url': fmt[0].replace('\\', ''),
+            'format_id': fmt[1],
+            'height': int(fmt[1][:3]),
+        } for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
+        self._sort_formats(formats)
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
+
+        description = self._html_search_meta(
+            'description', webpage, 'description')
+
+        thumbnail = self._html_search_regex(
+            r'image:\s*"([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+
+        categories_str = self._html_search_meta(
+            'keywords', webpage, 'categories')
+        categories = (
+            None if categories_str is None
+            else categories_str.split(','))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'formats': formats,
+            'age_limit': 18,
+        }
index 9ed7d3b39e227806971fe98f43e1c1018b84ad3c..9586a7da2226ff822992cb21bc1698952c14b29e 100644 (file)
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import compat_urlparse
+from ..compat import compat_urlparse
 
 
 class SpiegelIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
+    _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
     _TESTS = [{
         'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
         'md5': '2c2754212136f35fb4b19767d242f66e',
@@ -29,16 +29,24 @@ class SpiegelIE(InfoExtractor):
             'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
             'duration': 983,
         },
+    }, {
+        'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
+        'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
+        'info_dict': {
+            'id': '1519126',
+            'ext': 'mp4',
+            'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
+            'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
+        }
     }]
 
     def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('videoID')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_regex(
-            r'<div class="module-title">(.*?)</div>', webpage, 'title')
+        title = re.sub(r'\s+', ' ', self._html_search_regex(
+            r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
+            webpage, 'title'))
         description = self._html_search_meta('description', webpage, 'description')
 
         base_url = self._search_regex(
@@ -79,7 +87,7 @@ class SpiegelArticleIE(InfoExtractor):
     _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
     IE_NAME = 'Spiegel:Article'
     IE_DESC = 'Articles on spiegel.de'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
         'info_dict': {
             'id': '1516455',
@@ -87,20 +95,34 @@ class SpiegelArticleIE(InfoExtractor):
             'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
             'description': 're:^Patrick Kämnitz gehört.{100,}',
         },
-    }
+    }, {
+        'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
+        'info_dict': {
 
-    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
+        },
+        'playlist_count': 6,
+    }]
 
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
+        # Single video on top of the page
         video_link = self._search_regex(
             r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
-            'video page URL')
-        video_url = compat_urlparse.urljoin(
-            self.http_scheme() + '//spiegel.de/', video_link)
-
-        return {
-            '_type': 'url',
-            'url': video_url,
-        }
+            'video page URL', default=None)
+        if video_link:
+            video_url = compat_urlparse.urljoin(
+                self.http_scheme() + '//spiegel.de/', video_link)
+            return self.url_result(video_url)
+
+        # Multiple embedded videos
+        embeds = re.findall(
+            r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
+            webpage)
+        entries = [
+            self.url_result(compat_urlparse.urljoin(
+                self.http_scheme() + '//spiegel.de/', embed_path))
+            for embed_path in embeds
+        ]
+        return self.playlist_result(entries)
index 172def221e1277298dc355a2cfdbea3ae4f9fdce..c1178f26de0b961ad68eb6d1ddb89550746f4dd7 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 class StreamcloudIE(InfoExtractor):
     IE_NAME = 'streamcloud.eu'
-    _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
+    _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
 
     _TEST = {
         'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
@@ -27,8 +27,8 @@ class StreamcloudIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
+        url = 'http://streamcloud.eu/%s' % video_id
 
         orig_webpage = self._download_webpage(url, video_id)
 
index 13c6ea67728d040a9e1f17111031952492d921b5..bf430d870125a78788d364b0ea615983ae170948 100644 (file)
@@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor):
 
             if media_type == 'Video':
                 fmt.update({
-                    'format_note': ['144p', '288p', '544p'][quality-1],
+                    'format_note': ['144p', '288p', '544p', '720p'][quality-1],
                     'vcodec': codec,
                 })
             elif media_type == 'Audio':
@@ -101,4 +101,4 @@ class SWRMediathekIE(InfoExtractor):
             'uploader': attr['channel_title'],
             'uploader_id': attr['channel_idkey'],
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 77e0562425f0577869c01cb10e690fa9bef7d230..283e11350b212db0c857f1ccdb8982519a78cfbb 100644 (file)
@@ -50,6 +50,7 @@ class TapelyIE(InfoExtractor):
         request = compat_urllib_request.Request(playlist_url)
         request.add_header('X-Requested-With', 'XMLHttpRequest')
         request.add_header('Accept', 'application/json')
+        request.add_header('Referer', url)
 
         playlist = self._download_json(request, display_id)
 
index cd4af96fdb02b2f4ea392fe19482371b6a02acad..8550380779168a80b95e526f8921059e2eddf8f4 100644 (file)
@@ -38,6 +38,7 @@ class TEDIE(SubtitlesInfoExtractor):
                 'actively fooling us.'),
             'uploader': 'Dan Dennett',
             'width': 854,
+            'duration': 1308,
         }
     }, {
         'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
@@ -57,6 +58,7 @@ class TEDIE(SubtitlesInfoExtractor):
             'title': 'Be passionate. Be courageous. Be your best.',
             'uploader': 'Gabby Giffords and Mark Kelly',
             'description': 'md5:5174aed4d0f16021b704120360f72b92',
+            'duration': 1128,
         },
     }, {
         'url': 'http://www.ted.com/playlists/who_are_the_hackers',
@@ -178,6 +180,7 @@ class TEDIE(SubtitlesInfoExtractor):
             'description': self._og_search_description(webpage),
             'subtitles': video_subtitles,
             'formats': formats,
+            'duration': talk_info.get('duration'),
         }
 
     def _get_available_subtitles(self, video_id, talk_info):
index 445e0ec419ccc7eb2e23e522f6f3eba6010dcd69..eb94737546fb725e992249d356588417d90ad81a 100644 (file)
@@ -4,9 +4,9 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
-    compat_str,
     parse_iso8601,
     qualities,
 )
@@ -176,8 +176,7 @@ class TVPlayIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         video = self._download_json(
             'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
@@ -208,6 +207,10 @@ class TVPlayIE(InfoExtractor):
                     'app': m.group('app'),
                     'play_path': m.group('playpath'),
                 })
+            elif video_url.endswith('.f4m'):
+                formats.extend(self._extract_f4m_formats(
+                    video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id))
+                continue
             else:
                 fmt.update({
                     'url': video_url,
index 41756784a323c00675f4d720eee98d8ff66f1d1b..c427649211079715a5510eef3eaf35981bdb1034 100644 (file)
@@ -27,15 +27,15 @@ class WrzutaIE(InfoExtractor):
             'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
         },
     }, {
-        'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad',
-        'md5': '1e546a18e1c22ac6e9adce17b8961ff5',
+        'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty',
+        'md5': 'bc78077859bea7bcfe4295d7d7fc9025',
         'info_dict': {
-            'id': '9oXJqdcndqv',
+            'id': '063jOPX5ue2',
             'ext': 'ogg',
-            'title': 'David Guetta & Showtek ft. Vassy - Bad',
-            'duration': 270,
-            'uploader_id': 'w729',
-            'description': 'md5:4628f01c666bbaaecefa83476cfa794a',
+            'title': 'Liber & Natalia Szroeder - Teraz Ty',
+            'duration': 203,
+            'uploader_id': 'jolka85',
+            'description': 'md5:2d2b6340f9188c8c4cd891580e481096',
         },
     }]
 
@@ -49,16 +49,17 @@ class WrzutaIE(InfoExtractor):
 
         quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
 
-        audio_table = {'flv': 'mp3', 'webm': 'ogg', 'mp3': 'mp3'}
+        audio_table = {'flv': 'mp3', 'webm': 'ogg', '???': 'mp3'}
 
         embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
 
         formats = []
         for media in embedpage['url']:
+            fmt = media['type'].split('@')[0]
             if typ == 'audio':
-                ext = audio_table[media['type'].split('@')[0]]
+                ext = audio_table.get(fmt, fmt)
             else:
-                ext = media['type'].split('@')[0]
+                ext = fmt
 
             formats.append({
                 'format_id': '%s_%s' % (ext, media['quality'].lower()),
index b86331e3cfa39ec8d3f287e829900b414892beee..c642075dcfabbfb025d64b92e392d614578f42b1 100644 (file)
@@ -9,40 +9,30 @@ from ..utils import (
 
 
 class YouJizzIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
+    _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P<id>[0-9]+)\.html(?:$|[?#])'
     _TEST = {
         'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
-        'file': '2189178.flv',
         'md5': '07e15fa469ba384c7693fd246905547c',
         'info_dict': {
+            'id': '2189178',
+            'ext': 'flv',
             "title": "Zeichentrick 1",
             "age_limit": 18,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('videoid')
-
-        # Get webpage content
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-
         age_limit = self._rta_search(webpage)
-
-        # Get the video title
-        video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
-            webpage, 'title').strip()
-
-        # Get the embed page
-        result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
-        if result is None:
-            raise ExtractorError('ERROR: unable to extract embed page')
-
-        embed_page_url = result.group(0).strip()
-        video_id = result.group('videoid')
-
-        webpage = self._download_webpage(embed_page_url, video_id)
+        video_title = self._html_search_regex(
+            r'<title>\s*(.*)\s*</title>', webpage, 'title')
+
+        embed_page_url = self._search_regex(
+            r'(https?://www.youjizz.com/videos/embed/[0-9]+)',
+            webpage, 'embed page')
+        webpage = self._download_webpage(
+            embed_page_url, video_id, note='downloading embed page')
 
         # Get the video URL
         m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)
index c77d4056f7a49c1c39a33cca5bbb8086e01ecd4f..cea3d5c951d4209af24ed32111dd368802c0a183 100644 (file)
@@ -510,7 +510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
-            r'signature=([$a-zA-Z]+)', jscode,
+            r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode,
              'Initial JS player signature function name')
 
         jsi = JSInterpreter(jscode)
@@ -1043,6 +1043,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
         'info_dict': {
             'title': 'ytdl test PL',
+            'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
         },
         'playlist_count': 3,
     }, {
index 997e92ad77e53b44a03e1ae8268a9460a60255f4..c182abfdc8f211b7c7520547926ba09b40bf1071 100644 (file)
@@ -261,7 +261,16 @@ def parseOpts(overrideArguments=None):
     video_format.add_option(
         '-f', '--format',
         action='store', dest='format', metavar='FORMAT', default=None,
-        help='video format code, specify the order of preference using slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as  -f  136/137/mp4/bestvideo,140/m4a/bestaudio')
+        help='video format code, specify the order of preference using'
+            ' slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also'
+            ' supported. You can also use the special names "best",'
+            ' "bestvideo", "bestaudio", "worst", "worstvideo" and'
+            ' "worstaudio". By default, youtube-dl will pick the best quality.'
+            ' Use commas to download multiple audio formats, such as'
+            ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'
+            ' You can merge the video and audio of two formats into a single'
+            ' file using -f <video-format>+<audio-format> (requires ffmpeg or'
+            ' avconv), for example -f bestvideo+bestaudio.')
     video_format.add_option(
         '--all-formats',
         action='store_const', dest='format', const='all',
@@ -481,10 +490,12 @@ def parseOpts(overrideArguments=None):
               '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
               '%(upload_date)s for the upload date (YYYYMMDD), '
               '%(extractor)s for the provider (youtube, metacafe, etc), '
-              '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
-              '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
+              '%(id)s for the video id, '
+              '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, '
+              '%(playlist_index)s for the position in the playlist. '
               '%(height)s and %(width)s for the width and height of the video format. '
               '%(resolution)s for a textual description of the resolution of the video format. '
+              '%% for a literal percent. '
               'Use - to output to stdout. Can also be used to download to a different directory, '
               'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
     filesystem.add_option(
index 7c0fb1592914e961a6b1e790bf14fb5525eff08a..0b2ba39e2ec7ab919a2cff022ac50ee9377c05c0 100644 (file)
@@ -34,7 +34,6 @@ from .compat import (
     compat_chr,
     compat_getenv,
     compat_html_entities,
-    compat_html_parser,
     compat_parse_qs,
     compat_str,
     compat_urllib_error,
@@ -844,10 +843,7 @@ def bytes_to_intlist(bs):
 def intlist_to_bytes(xs):
     if not xs:
         return b''
-    if isinstance(chr(0), bytes):  # Python 2
-        return ''.join([chr(x) for x in xs])
-    else:
-        return bytes(xs)
+    return struct.pack('%dB' % len(xs), *xs)
 
 
 # Cross-platform file locking
index 07cdb22ad44bd9cfb2504faf6d4d18b435032523..ed0c828ad6de298d10570fe6a8d911922ab1fd51 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.11.04'
+__version__ = '2014.11.13.1'