Merge pull request #2153 from jaimeMF/ffmpeg-merger-check-install
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 19 Jan 2014 04:42:51 +0000 (20:42 -0800)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 19 Jan 2014 04:42:51 +0000 (20:42 -0800)
Don’t try to merge the formats if ffmpeg or avconv are not installed

26 files changed:
README.md
test/test_download.py
test/test_playlists.py
test/test_subtitles.py
test/test_youtube_signature.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/cnn.py
youtube_dl/extractor/common.py
youtube_dl/extractor/condenast.py
youtube_dl/extractor/everyonesmixtape.py [new file with mode: 0644]
youtube_dl/extractor/flickr.py
youtube_dl/extractor/franceinter.py [new file with mode: 0644]
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/kankan.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mpora.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spankwire.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/youtube.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/version.py

index bc7dfac69e00c634ae5fb07c66fc3db3ecdd5db0..cf0bb7b654cba513c598697186abb3e4507dd3dd 100644 (file)
--- a/README.md
+++ b/README.md
@@ -93,13 +93,13 @@ which means you can modify it, redistribute it or use it however you like.
                                different, %(autonumber)s to get an automatically
                                incremented number, %(ext)s for the filename
                                extension, %(format)s for the format description
-                               (like "22 - 1280x720" or "HD"),%(format_id)s for
+                               (like "22 - 1280x720" or "HD"), %(format_id)s for
                                the unique id of the format (like Youtube's
-                               itags: "137"),%(upload_date)s for the upload date
-                               (YYYYMMDD), %(extractor)s for the provider
-                               (youtube, metacafe, etc), %(id)s for the video id
-                               , %(playlist)s for the playlist the video is in,
-                               %(playlist_index)s for the position in the
+                               itags: "137"), %(upload_date)s for the upload
+                               date (YYYYMMDD), %(extractor)s for the provider
+                               (youtube, metacafe, etc), %(id)s for the video
+                               id, %(playlist)s for the playlist the video is
+                               in, %(playlist_index)s for the position in the
                                playlist and %% for a literal percent. Use - to
                                output to stdout. Can also be used to download to
                                a different directory, for example with -o '/my/d
@@ -111,7 +111,7 @@ which means you can modify it, redistribute it or use it however you like.
                                avoid "&" and spaces in filenames
     -a, --batch-file FILE      file containing URLs to download ('-' for stdin)
     --load-info FILE           json file containing the video information
-                               (created with the "--write-json" option
+                               (created with the "--write-json" option)
     -w, --no-overwrites        do not overwrite files
     -c, --continue             force resume of partially downloaded files. By
                                default, youtube-dl will resume downloads if
@@ -145,7 +145,7 @@ which means you can modify it, redistribute it or use it however you like.
     --no-progress              do not print progress bar
     --console-title            display progress in console titlebar
     -v, --verbose              print various debugging information
-    --dump-intermediate-pages  print downloaded pages to debug problems(very
+    --dump-intermediate-pages  print downloaded pages to debug problems (very
                                verbose)
     --write-pages              Write downloaded intermediary pages to files in
                                the current directory to debug problems
@@ -158,8 +158,7 @@ which means you can modify it, redistribute it or use it however you like.
     --prefer-free-formats      prefer free video formats unless a specific one
                                is requested
     --max-quality FORMAT       highest quality format to download
-    -F, --list-formats         list all available formats (currently youtube
-                               only)
+    -F, --list-formats         list all available formats
 
 ## Subtitle Options:
     --write-sub                write subtitle file
@@ -177,7 +176,7 @@ which means you can modify it, redistribute it or use it however you like.
     -u, --username USERNAME    account username
     -p, --password PASSWORD    account password
     -n, --netrc                use .netrc authentication data
-    --video-password PASSWORD  video password (vimeo only)
+    --video-password PASSWORD  video password (vimeo, smotri)
 
 ## Post-processing Options:
     -x, --extract-audio        convert video files to audio-only files (requires
index d0be8d27ca93631b73e6cf82d459cbdbf722b2fa..0d925ae69bcf63e13ff35ec1df3c5de0c3b09b3c 100644 (file)
@@ -148,7 +148,7 @@ def generator(test_case):
                     for key, value in info_dict.items()
                     if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
                 if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
-                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')
+                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
 
                 # Check for the presence of mandatory fields
                 for key in ('id', 'url', 'title', 'ext'):
index b3bfbd9238d185c471640b00db150961cc67bad2..5eeba091eefb5fcd3e4d4b57f94d97b79c91361d 100644 (file)
@@ -32,6 +32,7 @@ from youtube_dl.extractor import (
     IviCompilationIE,
     ImdbListIE,
     KhanAcademyIE,
+    EveryonesMixtapeIE,
 )
 
 
@@ -210,6 +211,15 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
         self.assertTrue(len(result['entries']) >= 3)
 
+    def test_EveryonesMixtape(self):
+        dl = FakeYDL()
+        ie = EveryonesMixtapeIE(dl)
+        result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'm7m0jJAbMQi')
+        self.assertEqual(result['title'], 'Driving')
+        self.assertEqual(len(result['entries']), 24)
+
 
 if __name__ == '__main__':
     unittest.main()
index 263b5ac698516f93b5c2d6eaffb7379cbf5459a5..1e4e62faae69da625867d3cf5d2047e397808727 100644 (file)
@@ -167,13 +167,13 @@ class TestTedSubtitles(BaseTestSubtitles):
     def test_subtitles(self):
         self.DL.params['writesubtitles'] = True
         subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
+        self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
 
     def test_subtitles_lang(self):
         self.DL.params['writesubtitles'] = True
         self.DL.params['subtitleslangs'] = ['fr']
         subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
+        self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
 
     def test_allsubtitles(self):
         self.DL.params['writesubtitles'] = True
index 056700614b43fa0a3dbceeb82ef991e34fdb53f9..a3fc53047de031c8c8e01046b53a891ef6b693a8 100644 (file)
@@ -27,12 +27,6 @@ _TESTS = [
         85,
         u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
     ),
-    (
-        u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf',
-        u'swf',
-        82,
-        u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321'
-    ),
 ]
 
 
index 8f783a86c3218056cdb8d9f501fdc3d6278126ca..82b1ff4f45e5ad032493093101f0c9439c4590a4 100644 (file)
@@ -39,6 +39,7 @@ __authors__  = (
     'Sergey M.',
     'Michael Orlitzky',
     'Chris Gahan',
+    'Saimadhav Heblikar',
 )
 
 __license__ = 'Public Domain'
index b887c7f101970561907c95eeba912a7656dab79f..d66f7b02641437bd97ccd6c9aa9b3ef7211d3c9b 100644 (file)
@@ -52,6 +52,7 @@ from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eitb import EitbIE
 from .escapist import EscapistIE
+from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
 from .extremetube import ExtremeTubeIE
 from .facebook import FacebookIE
@@ -61,6 +62,7 @@ from .fktv import (
     FKTVPosteckeIE,
 )
 from .flickr import FlickrIE
+from .franceinter import FranceInterIE
 from .francetv import (
     PluzzIE,
     FranceTvInfoIE,
index c9e7cc561e4ce55183c8ab50d9946b0aab9d9b12..80bf59ade7c3ff45dc04be324020abbaf69ee4e8 100644 (file)
@@ -25,12 +25,13 @@ class CNNIE(InfoExtractor):
         },
     },
     {
-        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
-        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
-        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
-        u"info_dict": {
-            u"title": "Student's epic speech stuns new freshmen",
-            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
+        "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
+        "file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
+        "md5": "b5cc60c60a3477d185af8f19a2a26f4e",
+        "info_dict": {
+            "title": "Student's epic speech stuns new freshmen",
+            "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
+            "upload_date": "20130821",
         }
     }]
 
index ce3d1690304c8a0b488c2f674c48bb4ef34c86b5..692d828da9ef9739e1b05908e6a9f39259b0940b 100644 (file)
@@ -1,4 +1,5 @@
 import base64
+import hashlib
 import json
 import os
 import re
@@ -234,6 +235,9 @@ class InfoExtractor(object):
                 url = url_or_request.get_full_url()
             except AttributeError:
                 url = url_or_request
+            if len(url) > 200:
+                h = hashlib.md5(url).hexdigest()
+                url = url[:200 - len(h)] + h
             raw_filename = ('%s_%s.dump' % (video_id, url))
             filename = sanitize_filename(raw_filename, restricted=True)
             self.to_screen(u'Saving request to ' + filename)
index f336a3c620a04e8bb643309b4812725e8f50e1d1..03b75b80d3b3019a75f89f2a773c4c4753afe2eb 100644 (file)
@@ -1,4 +1,5 @@
 # coding: utf-8
+from __future__ import unicode_literals
 
 import re
 import json
@@ -20,30 +21,31 @@ class CondeNastIE(InfoExtractor):
 
     # The keys are the supported sites and the values are the name to be shown
     # to the user and in the extractor description.
-    _SITES = {'wired': u'WIRED',
-              'gq': u'GQ',
-              'vogue': u'Vogue',
-              'glamour': u'Glamour',
-              'wmagazine': u'W Magazine',
-              'vanityfair': u'Vanity Fair',
-              }
+    _SITES = {
+        'wired': 'WIRED',
+        'gq': 'GQ',
+        'vogue': 'Vogue',
+        'glamour': 'Glamour',
+        'wmagazine': 'W Magazine',
+        'vanityfair': 'Vanity Fair',
+    }
 
     _VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
-    IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
+    IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
 
     _TEST = {
-        u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
-        u'file': u'5171b343c2b4c00dd0c1ccb3.mp4',
-        u'md5': u'1921f713ed48aabd715691f774c451f7',
-        u'info_dict': {
-            u'title': u'3D Printed Speakers Lit With LED',
-            u'description': u'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
+        'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
+        'file': '5171b343c2b4c00dd0c1ccb3.mp4',
+        'md5': '1921f713ed48aabd715691f774c451f7',
+        'info_dict': {
+            'title': '3D Printed Speakers Lit With LED',
+            'description': 'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
         }
     }
 
     def _extract_series(self, url, webpage):
         title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
-                                        webpage, u'series title', flags=re.DOTALL)
+                                        webpage, 'series title', flags=re.DOTALL)
         url_object = compat_urllib_parse_urlparse(url)
         base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
         m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
@@ -57,39 +59,41 @@ class CondeNastIE(InfoExtractor):
         description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
                                                r'<div class="video-post-content">(.+?)</div>',
                                                ],
-                                              webpage, u'description',
+                                              webpage, 'description',
                                               fatal=False, flags=re.DOTALL)
         params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
-                                    u'player params', flags=re.DOTALL)
-        video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id')
-        player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id')
-        target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target')
+                                    'player params', flags=re.DOTALL)
+        video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
+        player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
+        target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
         data = compat_urllib_parse.urlencode({'videoId': video_id,
                                               'playerId': player_id,
                                               'target': target,
                                               })
         base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
-                                           webpage, u'base info url',
+                                           webpage, 'base info url',
                                            default='http://player.cnevids.com/player/loader.js?')
         info_url = base_info_url + data
         info_page = self._download_webpage(info_url, video_id,
-                                           u'Downloading video info')
-        video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info')
+                                           'Downloading video info')
+        video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info')
         video_info = json.loads(video_info)
 
-        def _formats_sort_key(f):
-            type_ord = 1 if f['type'] == 'video/mp4' else 0
-            quality_ord = 1 if f['quality'] == 'high' else 0
-            return (quality_ord, type_ord)
-        best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1]
+        formats = [{
+            'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
+            'url': fdata['src'],
+            'ext': fdata['type'].split('/')[-1],
+            'quality': 1 if fdata['quality'] == 'high' else 0,
+        } for fdata in video_info['sources'][0]]
+        self._sort_formats(formats)
 
-        return {'id': video_id,
-                'url': best_format['src'],
-                'ext': best_format['type'].split('/')[-1],
-                'title': video_info['title'],
-                'thumbnail': video_info['poster_frame'],
-                'description': description,
-                }
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': video_info['title'],
+            'thumbnail': video_info['poster_frame'],
+            'description': description,
+        }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/everyonesmixtape.py b/youtube_dl/extractor/everyonesmixtape.py
new file mode 100644 (file)
index 0000000..12829cb
--- /dev/null
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+    ExtractorError,
+)
+
+
+class EveryonesMixtapeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
+
+    _TEST = {
+        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
+        'file': '5bfseWNmlds.mp4',
+        "info_dict": {
+            "title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
+            "uploader": "FKR.TV",
+            "uploader_id": "frenchkissrecords",
+            "description": "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
+            "upload_date": "20081015"
+        },
+        'params': {
+            'skip_download': True,  # This is simply YouTube
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+
+        pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
+        pllist_req = compat_urllib_request.Request(pllist_url)
+        pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
+
+        playlist_list = self._download_json(
+            pllist_req, playlist_id, note='Downloading playlist metadata')
+        try:
+            playlist_no = next(playlist['id']
+                               for playlist in playlist_list
+                               if playlist['code'] == playlist_id)
+        except StopIteration:
+            raise ExtractorError('Playlist id not found')
+
+        pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
+        pl_req = compat_urllib_request.Request(pl_url)
+        pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
+        playlist = self._download_json(
+            pl_req, playlist_id, note='Downloading playlist info')
+
+        entries = [{
+            '_type': 'url',
+            'url': t['url'],
+            'title': t['title'],
+        } for t in playlist['tracks']]
+
+        if mobj.group('songnr'):
+            songnr = int(mobj.group('songnr')) - 1
+            return entries[songnr]
+
+        playlist_title = playlist['mixData']['name']
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': playlist_title,
+            'entries': entries,
+        }
index e1d2f0526a42169fbbba96789a413d3d20fec6a6..21ea5ec2bf1c499149809971b780daa9d10d0291 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -11,13 +13,13 @@ class FlickrIE(InfoExtractor):
     """Information Extractor for Flickr videos"""
     _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
     _TEST = {
-        u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
-        u'file': u'5645318632.mp4',
-        u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b',
-        u'info_dict': {
-            u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 
-            u"uploader_id": u"forestwander-nature-pictures", 
-            u"title": u"Dark Hollow Waterfalls"
+        'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
+        'file': '5645318632.mp4',
+        'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
+        'info_dict': {
+            "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 
+            "uploader_id": "forestwander-nature-pictures", 
+            "title": "Dark Hollow Waterfalls"
         }
     }
 
@@ -29,13 +31,13 @@ class FlickrIE(InfoExtractor):
         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
         webpage = self._download_webpage(webpage_url, video_id)
 
-        secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
+        secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret')
 
         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
 
         node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
-            first_xml, u'node_id')
+            first_xml, 'node_id')
 
         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
@@ -44,7 +46,7 @@ class FlickrIE(InfoExtractor):
 
         mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
         if mobj is None:
-            raise ExtractorError(u'Unable to extract video url')
+            raise ExtractorError('Unable to extract video url')
         video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
 
         return [{
diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py
new file mode 100644 (file)
index 0000000..deb1b0b
--- /dev/null
@@ -0,0 +1,38 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class FranceInterIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})'
+    _TEST = {
+        'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
+        'file': '793962.mp3',
+        'md5': '4764932e466e6f6c79c317d2e74f6884',
+        "info_dict": {
+            "title": "L’Histoire dans les jeux vidéo",
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(
+            r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title')
+        path = self._search_regex(
+            r'&urlAOD=(.*?)&startTime', webpage, 'video url')
+        video_url = 'http://www.franceinter.fr/' + path
+
+        return {
+            'id': video_id,
+            'formats': [{
+                'url': video_url,
+                'vcodec': 'none',
+            }],
+            'title': title,
+        }
index 26b7d2ae531f785bc3177af4029652c531d840da..380ebbe556355b8fc0f49e825fa15433d5180b0b 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import json
 
@@ -13,12 +15,12 @@ from ..utils import (
 class GameSpotIE(InfoExtractor):
     _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
     _TEST = {
-        u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
-        u"file": u"gs-2300-6410818.mp4",
-        u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
-        u"info_dict": {
-            u"title": u"Arma 3 - Community Guide: SITREP I",
-            u'description': u'Check out this video where some of the basics of Arma 3 is explained.',
+        "url": "http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
+        "file": "gs-2300-6410818.mp4",
+        "md5": "b2a30deaa8654fcccd43713a6b6a4825",
+        "info_dict": {
+            "title": "Arma 3 - Community Guide: SITREP I",
+            'description': 'Check out this video where some of the basics of Arma 3 is explained.',
         }
     }
 
index a9023f38d3b1ecd04aff4032fa6ceade7e7575b5..839530982e7db4704ece2a589420ab4ecfc31c66 100644 (file)
@@ -328,7 +328,7 @@ class GenericIE(InfoExtractor):
             mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
         if mobj is None:
             # Broaden the search a little bit: JWPlayer JS loader
-            mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
+            mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
         if mobj is None:
             # Try to find twitter cards info
             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
index 50916f4a66c6227e1eb4dc531745c9d5a6ad85d5..23103b163fea1ed6a27cb44dadcf231b478edcdb 100644 (file)
@@ -1,21 +1,24 @@
+from __future__ import unicode_literals
+
 import re
 import hashlib
 
 from .common import InfoExtractor
-from ..utils import determine_ext
 
 _md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
 
+
 class KankanIE(InfoExtractor):
     _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
     
     _TEST = {
-        u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
-        u'file': u'48863.flv',
-        u'md5': u'29aca1e47ae68fc28804aca89f29507e',
-        u'info_dict': {
-            u'title': u'Ready To Go',
+        'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
+        'file': '48863.flv',
+        'md5': '29aca1e47ae68fc28804aca89f29507e',
+        'info_dict': {
+            'title': 'Ready To Go',
         },
+        'skip': 'Only available from China',
     }
 
     def _real_extract(self, url):
@@ -23,22 +26,23 @@ class KankanIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
-        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
+        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
         surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
         gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
         gcid = gcids[-1]
 
-        video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
-                                                 video_id, u'Downloading video url info')
-        ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
-        path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
-        param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
-        param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
+        info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid
+        video_info_page = self._download_webpage(
+            info_url, video_id, 'Downloading video url info')
+        ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip')
+        path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path')
+        param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1')
+        param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2')
         key = _md5('xl_mp43651' + param1 + param2)
         video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
 
-        return {'id': video_id,
-                'title': title,
-                'url': video_url,
-                'ext': determine_ext(video_url),
-                }
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+        }
index 7c54ea0f4c7f351161adae175edca2743ab55266..f3356db50ebf8941ac58e9a229778ba864c57be0 100644 (file)
@@ -1,4 +1,5 @@
-import json
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -10,17 +11,17 @@ from ..utils import (
 
 class MixcloudIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
-    IE_NAME = u'mixcloud'
+    IE_NAME = 'mixcloud'
 
     _TEST = {
-        u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
-        u'file': u'dholbach-cryptkeeper.mp3',
-        u'info_dict': {
-            u'title': u'Cryptkeeper',
-            u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
-            u'uploader': u'Daniel Holbach',
-            u'uploader_id': u'dholbach',
-            u'upload_date': u'20111115',
+        'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
+        'file': 'dholbach-cryptkeeper.mp3',
+        'info_dict': {
+            'title': 'Cryptkeeper',
+            'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+            'uploader': 'Daniel Holbach',
+            'uploader_id': 'dholbach',
+            'upload_date': '20111115',
         },
     }
 
@@ -42,17 +43,18 @@ class MixcloudIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-
         uploader = mobj.group(1)
         cloudcast_name = mobj.group(2)
         track_id = '-'.join((uploader, cloudcast_name))
-        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
+
         webpage = self._download_webpage(url, track_id)
-        json_data = self._download_webpage(api_url, track_id,
-            u'Downloading cloudcast info')
-        info = json.loads(json_data)
 
-        preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
+        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
+        info = self._download_json(
+            api_url, track_id, 'Downloading cloudcast info')
+
+        preview_url = self._search_regex(
+            r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
         song_url = preview_url.replace('/previews/', '/c/originals/')
         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
         final_song_url = self._get_url(template_url)
index 0836243ea6a7e5b6a74e74311e33e5fb56c6a788..6a8e2cc442c25327d9d91acebb47d4e109750731 100644 (file)
@@ -34,7 +34,7 @@ class MporaIE(InfoExtractor):
 
         data = json.loads(data_json)
 
-        uploader = data['info_overlay']['name']
+        uploader = data['info_overlay'].get('username')
         duration = data['video']['duration'] // 1000
         thumbnail = data['video']['encodings']['sd']['poster']
         title = data['info_overlay']['title']
index c2254ae8abdca2ab9dde2388fb2182b056ffd0e2..5c4cd20687a5745982bdce0d43417297bca0bc28 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class RedTubeIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
+    _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
     _TEST = {
         u'url': u'http://www.redtube.com/66418',
         u'file': u'66418.mp4',
index 951e977bd0ba014340fe3eeb626723bde258e0dd..393b5f17c53d5ed216b53ac0d1ff2941cb1d24f7 100644 (file)
@@ -1,4 +1,6 @@
 # encoding: utf-8
+from __future__ import unicode_literals
+
 import json
 import re
 import itertools
@@ -32,58 +34,58 @@ class SoundcloudIE(InfoExtractor):
                        |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
                     )
                     '''
-    IE_NAME = u'soundcloud'
+    IE_NAME = 'soundcloud'
     _TESTS = [
         {
-            u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
-            u'file': u'62986583.mp3',
-            u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
-            u'info_dict': {
-                u"upload_date": u"20121011", 
-                u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", 
-                u"uploader": u"E.T. ExTerrestrial Music", 
-                u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
+            'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
+            'file': '62986583.mp3',
+            'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+            'info_dict': {
+                "upload_date": "20121011",
+                "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
+                "uploader": "E.T. ExTerrestrial Music",
+                "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
             }
         },
         # not streamable song
         {
-            u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
-            u'info_dict': {
-                u'id': u'47127627',
-                u'ext': u'mp3',
-                u'title': u'Goldrushed',
-                u'uploader': u'The Royal Concept',
-                u'upload_date': u'20120521',
+            'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
+            'info_dict': {
+                'id': '47127627',
+                'ext': 'mp3',
+                'title': 'Goldrushed',
+                'uploader': 'The Royal Concept',
+                'upload_date': '20120521',
             },
-            u'params': {
+            'params': {
                 # rtmp
-                u'skip_download': True,
+                'skip_download': True,
             },
         },
         # private link
         {
-            u'url': u'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
-            u'md5': u'aa0dd32bfea9b0c5ef4f02aacd080604',
-            u'info_dict': {
-                u'id': u'123998367',
-                u'ext': u'mp3',
-                u'title': u'Youtube - Dl Test Video \'\' Ä↭',
-                u'uploader': u'jaimeMF',
-                u'description': u'test chars:  \"\'/\\ä↭',
-                u'upload_date': u'20131209',
+            'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
+            'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
+            'info_dict': {
+                'id': '123998367',
+                'ext': 'mp3',
+                'title': 'Youtube - Dl Test Video \'\' Ä↭',
+                'uploader': 'jaimeMF',
+                'description': 'test chars:  \"\'/\\ä↭',
+                'upload_date': '20131209',
             },
         },
         # downloadable song
         {
-            u'url': u'https://soundcloud.com/simgretina/just-your-problem-baby-1',
-            u'md5': u'56a8b69568acaa967b4c49f9d1d52d19',
-            u'info_dict': {
-                u'id': u'105614606',
-                u'ext': u'wav',
-                u'title': u'Just Your Problem Baby (Acapella)',
-                u'description': u'Vocals',
-                u'uploader': u'Sim Gretina',
-                u'upload_date': u'20130815',
+            'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1',
+            'md5': '56a8b69568acaa967b4c49f9d1d52d19',
+            'info_dict': {
+                'id': '105614606',
+                'ext': 'wav',
+                'title': 'Just Your Problem Baby (Acapella)',
+                'description': 'Vocals',
+                'uploader': 'Sim Gretina',
+                'upload_date': '20130815',
             },
         },
     ]
@@ -112,7 +114,7 @@ class SoundcloudIE(InfoExtractor):
         thumbnail = info['artwork_url']
         if thumbnail is not None:
             thumbnail = thumbnail.replace('-large', '-t500x500')
-        ext = u'mp3'
+        ext = 'mp3'
         result = {
             'id': track_id,
             'uploader': info['user']['username'],
@@ -124,11 +126,11 @@ class SoundcloudIE(InfoExtractor):
         if info.get('downloadable', False):
             # We can build a direct link to the song
             format_url = (
-                u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
+                'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
                     track_id, self._CLIENT_ID))
             result['formats'] = [{
                 'format_id': 'download',
-                'ext': info.get('original_format', u'mp3'),
+                'ext': info.get('original_format', 'mp3'),
                 'url': format_url,
                 'vcodec': 'none',
             }]
@@ -138,7 +140,7 @@ class SoundcloudIE(InfoExtractor):
                 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
             stream_json = self._download_webpage(
                 streams_url,
-                track_id, u'Downloading track url')
+                track_id, 'Downloading track url')
 
             formats = []
             format_dict = json.loads(stream_json)
@@ -165,20 +167,19 @@ class SoundcloudIE(InfoExtractor):
                 # We fallback to the stream_url in the original info, this
                 # cannot be always used, sometimes it can give an HTTP 404 error
                 formats.append({
-                    'format_id': u'fallback',
+                    'format_id': 'fallback',
                     'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
                     'ext': ext,
                     'vcodec': 'none',
                 })
 
-            def format_pref(f):
+            for f in formats:
                 if f['format_id'].startswith('http'):
-                    return 2
+                    f['protocol'] = 'http'
                 if f['format_id'].startswith('rtmp'):
-                    return 1
-                return 0
+                    f['protocol'] = 'rtmp'
 
-            formats.sort(key=format_pref)
+            self._sort_formats(formats)
             result['formats'] = formats
 
         return result
@@ -210,14 +211,14 @@ class SoundcloudIE(InfoExtractor):
     
             url = 'http://soundcloud.com/%s' % resolve_title
             info_json_url = self._resolv_url(url)
-        info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
+        info_json = self._download_webpage(info_json_url, full_title, 'Downloading info JSON')
 
         info = json.loads(info_json)
         return self._extract_info_dict(info, full_title, secret_token=token)
 
 class SoundcloudSetIE(SoundcloudIE):
     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
-    IE_NAME = u'soundcloud:set'
+    IE_NAME = 'soundcloud:set'
     # it's in tests/test_playlists.py
     _TESTS = []
 
@@ -254,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE):
 
 class SoundcloudUserIE(SoundcloudIE):
     _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
-    IE_NAME = u'soundcloud:user'
+    IE_NAME = 'soundcloud:user'
 
     # it's in tests/test_playlists.py
     _TESTS = []
@@ -266,7 +267,7 @@ class SoundcloudUserIE(SoundcloudIE):
         url = 'http://soundcloud.com/%s/' % uploader
         resolv_url = self._resolv_url(url)
         user_json = self._download_webpage(resolv_url, uploader,
-            u'Downloading user info')
+            'Downloading user info')
         user = json.loads(user_json)
 
         tracks = []
@@ -276,7 +277,7 @@ class SoundcloudUserIE(SoundcloudIE):
                                                   })
             tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
             response = self._download_webpage(tracks_url, uploader, 
-                u'Downloading tracks page %s' % (i+1))
+                'Downloading tracks page %s' % (i+1))
             new_tracks = json.loads(response)
             tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
             if len(new_tracks) < 50:
index 9e2ad0d9962c375ca27851b3f842de302be28e56..3362b3db85c65c97d8839d8707e36fecf8bc6646 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import os
 import re
 
@@ -11,17 +13,18 @@ from ..aes import (
     aes_decrypt_text
 )
 
+
 class SpankwireIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
     _TEST = {
-        u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
-        u'file': u'103545.mp4',
-        u'md5': u'1b3f55e345500552dbc252a3e9c1af43',
-        u'info_dict': {
-            u"uploader": u"oreusz", 
-            u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
-            u"description": u"Crazy Bitch X rated music video.",
-            u"age_limit": 18,
+        'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
+        'file': '103545.mp4',
+        'md5': '1b3f55e345500552dbc252a3e9c1af43',
+        'info_dict': {
+            "uploader": "oreusz",
+            "title": "Buckcherry`s X Rated Music Video Crazy Bitch",
+            "description": "Crazy Bitch X rated music video.",
+            "age_limit": 18,
         }
     }
 
@@ -34,17 +37,17 @@ class SpankwireIE(InfoExtractor):
         req.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(req, video_id)
 
-        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
+        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
         video_uploader = self._html_search_regex(
-            r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
+            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
         thumbnail = self._html_search_regex(
-            r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
+            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
         description = self._html_search_regex(
-            r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
+            r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
 
         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
         if webpage.find('flashvars\.encrypted = "true"') != -1:
-            password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
+            password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ')
             video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
 
         formats = []
@@ -52,14 +55,21 @@ class SpankwireIE(InfoExtractor):
             path = compat_urllib_parse_urlparse(video_url).path
             extension = os.path.splitext(path)[1][1:]
             format = path.split('/')[4].split('_')[:2]
+            resolution, bitrate_str = format
             format = "-".join(format)
+            height = int(resolution.rstrip('P'))
+            tbr = int(bitrate_str.rstrip('K'))
+
             formats.append({
                 'url': video_url,
                 'ext': extension,
+                'resolution': resolution,
                 'format': format,
+                'tbr': tbr,
+                'height': height,
                 'format_id': format,
             })
-        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+        self._sort_formats(formats)
 
         age_limit = self._rta_search(webpage)
 
index 2bf26d05682e8e2535d412c2718d78bec6077622..9dcffead04d5466c14c6f2ff60995ecfb5435e6d 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -9,61 +11,66 @@ from ..utils import (
 class TeamcocoIE(InfoExtractor):
     _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
     _TEST = {
-        u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
-        u'file': u'19705.mp4',
-        u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a',
-        u'info_dict': {
-            u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", 
-            u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
+        'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
+        'file': '19705.mp4',
+        'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
+        'info_dict': {
+            "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
+            "title": "Louis C.K. Interview Pt. 1 11/3/11"
         }
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
         url_title = mobj.group('url_title')
         webpage = self._download_webpage(url, url_title)
 
-        video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
-            webpage, u'video id')
+        video_id = self._html_search_regex(
+            r'<article class="video" data-id="(\d+?)"',
+            webpage, 'video id')
 
         self.report_extraction(video_id)
 
         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
         data = self._download_xml(data_url, video_id, 'Downloading data webpage')
 
-
         qualities = ['500k', '480p', '1000k', '720p', '1080p']
         formats = []
-        for file in data.findall('files/file'):
-            if file.attrib.get('playmode') == 'all':
+        for filed in data.findall('files/file'):
+            if filed.attrib.get('playmode') == 'all':
                 # it just duplicates one of the entries
                 break
-            file_url = file.text
+            file_url = filed.text
             m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
             if m_format is not None:
                 format_id = m_format.group(1)
             else:
-                format_id = file.attrib['bitrate']
+                format_id = filed.attrib['bitrate']
+            tbr = (
+                int(filed.attrib['bitrate'])
+                if filed.attrib['bitrate'].isdigit()
+                else None)
+
+            try:
+                quality = qualities.index(format_id)
+            except ValueError:
+                quality = -1
             formats.append({
                 'url': file_url,
                 'ext': 'mp4',
+                'tbr': tbr,
                 'format_id': format_id,
+                'quality': quality,
             })
-        def sort_key(f):
-            try:
-                return qualities.index(f['format_id'])
-            except ValueError:
-                return -1
-        formats.sort(key=sort_key)
-        if not formats:
-            raise ExtractorError(u'Unable to extract video URL')
+
+        self._sort_formats(formats)
 
         return {
-            'id':          video_id,
+            'id': video_id,
             'formats': formats,
-            'title':       self._og_search_title(webpage),
-            'thumbnail':   self._og_search_thumbnail(webpage),
+            'title': self._og_search_title(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
             'description': self._og_search_description(webpage),
         }
index 4bca62ba003e325ebedd0fcc74c953bd64120cd5..8b31caa92c1e44473aa42953427b3cc2d71762f7 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import json
 import re
 
@@ -7,6 +9,7 @@ from ..utils import (
     RegexNotFoundError,
 )
 
+
 class TEDIE(SubtitlesInfoExtractor):
     _VALID_URL=r'''http://www\.ted\.com/
                    (
@@ -18,12 +21,12 @@ class TEDIE(SubtitlesInfoExtractor):
                    /(?P<name>\w+) # Here goes the name and then ".html"
                    '''
     _TEST = {
-        u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
-        u'file': u'102.mp4',
-        u'md5': u'2d76ee1576672e0bd8f187513267adf6',
-        u'info_dict': {
-            u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922", 
-            u"title": u"Dan Dennett: The illusion of consciousness"
+        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
+        'file': '102.mp4',
+        'md5': '4ea1dada91e4174b53dac2bb8ace429d',
+        'info_dict': {
+            "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
+            "title": "Dan Dennett: The illusion of consciousness"
         }
     }
 
@@ -47,7 +50,7 @@ class TEDIE(SubtitlesInfoExtractor):
         '''Returns the videos of the playlist'''
 
         webpage = self._download_webpage(
-            url, playlist_id, u'Downloading playlist webpage')
+            url, playlist_id, 'Downloading playlist webpage')
         matches = re.finditer(
             r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
             webpage)
index 28c88ffc7b3b0198353a737479e9257924fb57e3..bf3fde61020490d82e095dc75bf21f04b1219bce 100644 (file)
@@ -131,6 +131,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                      (
                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
+                            (?:www\.)?deturl\.com/www\.youtube\.com/|
+                            (?:www\.)?pwnyoutube\.com|
                             tube\.majestyc\.net/|
                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
index 8c19ed7fa1fb833103c519754499a44e9bf3a76b..c22f2cdc648b440f52a08cb1ba7947e02a985019 100644 (file)
@@ -479,6 +479,7 @@ class FFmpegMergerPP(FFmpegPostProcessor):
     def run(self, info):
         filename = info['filepath']
         args = ['-c', 'copy']
+        self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename)
         self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
         return True, info
 
index d1233be65b881813706b0b4a5026eb6603dc58b5..b9c25c4a964e493521c19cfbe05df283c55f5875 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.01.08'
+__version__ = '2014.01.17.2'