Merge pull request #2962 from simonwjackson/patch-1
authorJaime Marquínez Ferrándiz <jaimeMF@users.noreply.github.com>
Tue, 3 Jun 2014 14:47:59 +0000 (16:47 +0200)
committerJaime Marquínez Ferrándiz <jaimeMF@users.noreply.github.com>
Tue, 3 Jun 2014 14:47:59 +0000 (16:47 +0200)
Update test_age_restriction.py

25 files changed:
Makefile
devscripts/release.sh
test/test_YoutubeDL.py
test/test_playlists.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/cinemassacre.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/empflix.py
youtube_dl/extractor/extremetube.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/mailru.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/ndr.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/nuvid.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/streamcz.py
youtube_dl/extractor/swrmediathek.py [new file with mode: 0644]
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/ustream.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/youtube.py
youtube_dl/version.py

index a8278586129034fc5dc1bd2a9bbcfd0ff15ea8d9..c079761efa9b2e60887575f4cd7626d0abe469a2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -77,6 +77,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
                --exclude 'docs/_build' \
                -- \
                bin devscripts test youtube_dl docs \
-               CHANGELOG LICENSE README.md README.txt \
+               LICENSE README.md README.txt \
                Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
                youtube-dl
index 2974a7c3eee0e14c0f8c2f03b41ce899eeda9324..453087e5f70fa92906926ef12ab3b192087c51c3 100755 (executable)
@@ -45,9 +45,9 @@ fi
 /bin/echo -e "\n### Changing version in version.py..."
 sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
 
-/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
+/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
 make README.md
-git add CHANGELOG README.md youtube_dl/version.py
+git add README.md youtube_dl/version.py
 git commit -m "release $version"
 
 /bin/echo -e "\n### Now tagging, signing and pushing..."
index 8735013f7278a27814dc8210482ea04dd1830d21..e794cc97f0e643c5f05539fd3d0313d30dc98f8d 100644 (file)
@@ -67,7 +67,7 @@ class TestFormatSelection(unittest.TestCase):
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['ext'], 'mp4')
 
-        # No prefer_free_formats => prefer mp4 and flv for greater compatibilty
+        # No prefer_free_formats => prefer mp4 and flv for greater compatibility
         ydl = YDL()
         ydl.params['prefer_free_formats'] = False
         formats = [
@@ -279,7 +279,7 @@ class TestFormatSelection(unittest.TestCase):
         self.assertEqual(ydl._format_note({}), '')
         assertRegexpMatches(self, ydl._format_note({
             'vbr': 10,
-        }), '^x\s*10k$')
+        }), '^\s*10k$')
 
 if __name__ == '__main__':
     unittest.main()
index cc871698a7123b8a06986c5d78d3a95a32af1885..057ce43f054097b31b0255586068a13b2ec5fdee 100644 (file)
@@ -209,20 +209,20 @@ class TestPlaylists(unittest.TestCase):
     def test_ivi_compilation(self):
         dl = FakeYDL()
         ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
+        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa')
         self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dezhurnyi_angel')
-        self.assertEqual(result['title'], 'Ð\94ежÑ\83Ñ\80нÑ\8bй Ð°Ð½Ð³ÐµÐ» (2010 - 2012)')
-        self.assertTrue(len(result['entries']) >= 23)
+        self.assertEqual(result['id'], 'dvoe_iz_lartsa')
+        self.assertEqual(result['title'], 'Ð\94вое Ð¸Ð· Ð»Ð°Ñ\80Ñ\86а (2006 - 2008)')
+        self.assertTrue(len(result['entries']) >= 24)
 
     def test_ivi_compilation_season(self):
         dl = FakeYDL()
         ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2')
+        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1')
         self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
-        self.assertEqual(result['title'], 'Ð\94ежÑ\83Ñ\80нÑ\8bй Ð°Ð½Ð³ÐµÐ» (2010 - 2012) 2 сезон')
-        self.assertTrue(len(result['entries']) >= 7)
+        self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
+        self.assertEqual(result['title'], 'Ð\94вое Ð¸Ð· Ð»Ð°Ñ\80Ñ\86а (2006 - 2008) 1 сезон')
+        self.assertTrue(len(result['entries']) >= 12)
         
     def test_imdb_list(self):
         dl = FakeYDL()
index 3503c76b7c61515d6b42dae8a7afebdc5cfa6712..def58f1d6ace0877131ddebbbc59a5aa7cdf7e58 100644 (file)
@@ -194,7 +194,10 @@ from .normalboots import NormalbootsIE
 from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
-from .nrk import NRKIE
+from .nrk import (
+    NRKIE,
+    NRKTVIE,
+)
 from .ntv import NTVIE
 from .nytimes import NYTimesIE
 from .nuvid import NuvidIE
@@ -260,6 +263,7 @@ from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
+from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
 from .teamcoco import TeamcocoIE
index b88f71bc40b9803fb4ed0dea134738a7e1e07201..a87b32b22fae35e7ac619950cd1b97576f4bbb86 100644 (file)
@@ -38,7 +38,9 @@ class ARDIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_regex(
-            r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
+            [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
+             r'<h4 class="headline">(.*?)</h4>'],
+            webpage, 'title')
         description = self._html_search_meta(
             'dcterms.abstract', webpage, 'description')
         thumbnail = self._og_search_thumbnail(webpage)
index 2301f61b602bc24344ca6cd4bbb9d19a539cd232..496271be4e5f7170ad3d814ec5e2c0b99d15538d 100644 (file)
@@ -1,10 +1,12 @@
 # encoding: utf-8
 from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    int_or_none,
 )
 
 
@@ -13,9 +15,10 @@ class CinemassacreIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-            'file': '19911.mp4',
-            'md5': '782f8504ca95a0eba8fc9177c373eec7',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
             'info_dict': {
+                'id': '19911',
+                'ext': 'mp4',
                 'upload_date': '20121110',
                 'title': '“Angry Video Game Nerd: The Movie” – Trailer',
                 'description': 'md5:fb87405fcb42a331742a0dce2708560b',
@@ -23,9 +26,10 @@ class CinemassacreIE(InfoExtractor):
         },
         {
             'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            'file': '521be8ef82b16.mp4',
-            'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35',
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
             'info_dict': {
+                'id': '521be8ef82b16',
+                'ext': 'mp4',
                 'upload_date': '20131002',
                 'title': 'The Mummy’s Hand (1940)',
             },
@@ -50,29 +54,40 @@ class CinemassacreIE(InfoExtractor):
             r'<div class="entry-content">(?P<description>.+?)</div>',
             webpage, 'description', flags=re.DOTALL, fatal=False)
 
-        playerdata = self._download_webpage(playerdata_url, video_id)
+        playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
+        video_thumbnail = self._search_regex(
+            r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
+        sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
+        videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
 
-        sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
-        hd_url = self._html_search_regex(
-            r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file',
-            default=None)
-        video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
+        videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
 
-        formats = [{
-            'url': sd_url,
-            'ext': 'mp4',
-            'format': 'sd',
-            'format_id': 'sd',
-            'quality': 1,
-        }]
-        if hd_url:
-            formats.append({
-                'url': hd_url,
-                'ext': 'mp4',
-                'format': 'hd',
-                'format_id': 'hd',
-                'quality': 2,
-            })
+        formats = []
+        baseurl = sd_url[:sd_url.rfind('/')+1]
+        for video in videolist.findall('.//video'):
+            src = video.get('src')
+            if not src:
+                continue
+            file_ = src.partition(':')[-1]
+            width = int_or_none(video.get('width'))
+            height = int_or_none(video.get('height'))
+            bitrate = int_or_none(video.get('system-bitrate'))
+            format = {
+                'url': baseurl + file_,
+                'format_id': src.rpartition('.')[0].rpartition('_')[-1],
+            }
+            if width or height:
+                format.update({
+                    'tbr': bitrate // 1000 if bitrate else None,
+                    'width': width,
+                    'height': height,
+                })
+            else:
+                format.update({
+                    'abr': bitrate // 1000 if bitrate else None,
+                    'vcodec': 'none',
+                })
+            formats.append(format)
         self._sort_formats(formats)
 
         return {
index 6e3a316c67bc8fcb5cb1a9b8ac50035ee7b54cc2..ba4d73ab8bf3ff893fdb2c07fc57f0cbc009ec44 100644 (file)
@@ -188,7 +188,7 @@ class ComedyCentralShowsIE(InfoExtractor):
                 })
                 formats.append({
                     'format_id': 'rtmp-%s' % format,
-                    'url': rtmp_video_url,
+                    'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
                     'ext': self._video_extensions.get(format, 'mp4'),
                     'height': h,
                     'width': w,
index eaeee5a51543eb1d2cc75f2922de6f5f4638bd2b..e6952588fbdfa08167935fc2b1c0381804328943 100644 (file)
@@ -3,20 +3,18 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
 
 
 class EmpflixIE(InfoExtractor):
     _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
     _TEST = {
         'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
-        'md5': '5e5cc160f38ca9857f318eb97146e13e',
+        'md5': 'b1bc15b6412d33902d6e5952035fcabc',
         'info_dict': {
             'id': '33051',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Amateur Finger Fuck',
+            'description': 'Amateur solo finger fucking.',
             'age_limit': 18,
         }
     }
@@ -30,6 +28,8 @@ class EmpflixIE(InfoExtractor):
 
         video_title = self._html_search_regex(
             r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
 
         cfg_url = self._html_search_regex(
             r'flashvars\.config = escape\("([^"]+)"',
@@ -37,12 +37,18 @@ class EmpflixIE(InfoExtractor):
 
         cfg_xml = self._download_xml(
             cfg_url, video_id, note='Downloading metadata')
-        video_url = cfg_xml.find('videoLink').text
+
+        formats = [
+            {
+                'url': item.find('videoLink').text,
+                'format_id': item.find('res').text,
+            } for item in cfg_xml.findall('./quality/item')
+        ]
 
         return {
             'id': video_id,
-            'url': video_url,
-            'ext': 'flv',
             'title': video_title,
+            'description': video_description,
+            'formats': formats,
             'age_limit': age_limit,
         }
index ff7c0cd3e6595740f1c98b834f1d2b818d04d25c..14a196ffc63336ae7d016b035cfb28cc7f7d28a0 100644 (file)
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
         webpage = self._download_webpage(req, video_id)
 
         video_title = self._html_search_regex(
-            r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
+            r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
         uploader = self._html_search_regex(
             r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
             fatal=False)
index ca8993241ae85f29508d92c78e6edc7dbcd4cb13..18f91efac450d1ec697652570b6366b1709553d0 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^http://video\.fc2\.com/(?P<lang>[^/]+)/content/(?P<id>[^/]+)'
+    _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
     IE_NAME = 'fc2'
     _TEST = {
         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
@@ -36,7 +36,7 @@ class FC2IE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(webpage)
         refer = url.replace('/content/', '/a/content/')
 
-        mimi = hashlib.md5(video_id + '_gGddgPfeaf_gzyr').hexdigest()
+        mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
 
         info_url = (
             "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
index 1ba4966c724ee15637dc0f2d08d3029dec16f4e3..528be1524ae645f7bb8b36ee2ac2378fd91561be 100644 (file)
@@ -33,14 +33,14 @@ class IviIE(InfoExtractor):
         },
         # Serial's serie
         {
-            'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
-            'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
+            'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
+            'md5': '221f56b35e3ed815fde2df71032f4b3e',
             'info_dict': {
-                'id': '74791',
+                'id': '9549',
                 'ext': 'mp4',
-                'title': 'Ð\94ежÑ\83Ñ\80нÑ\8bй Ð°Ð½Ð³ÐµÐ» - 1 Ñ\81еÑ\80иÑ\8f',
-                'duration': 2490,
-                'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
+                'title': 'Ð\94вое Ð¸Ð· Ð»Ð°Ñ\80Ñ\86а - Ð¡ÐµÑ\80иÑ\8f 1',
+                'duration': 2655,
+                'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
             },
             'skip': 'Only works from Russia',
          }
index 5016989cc9504ef1071b4816514e0924242f1602..7460d81cd501b8c52dcce3caae8313f6854b571a 100644 (file)
@@ -9,29 +9,48 @@ from .common import InfoExtractor
 class MailRuIE(InfoExtractor):
     IE_NAME = 'mailru'
     IE_DESC = 'Видео@Mail.Ru'
-    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
+    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
 
-    _TEST = {
-        'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
-        'md5': 'dea205f03120046894db4ebb6159879a',
-        'info_dict': {
-            'id': '46301138',
-            'ext': 'mp4',
-            'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
-            'timestamp': 1393232740,
-            'upload_date': '20140224',
-            'uploader': 'sonypicturesrus',
-            'uploader_id': 'sonypicturesrus@mail.ru',
-            'duration': 184,
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
+            'md5': 'dea205f03120046894db4ebb6159879a',
+            'info_dict': {
+                'id': '46301138',
+                'ext': 'mp4',
+                'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
+                'timestamp': 1393232740,
+                'upload_date': '20140224',
+                'uploader': 'sonypicturesrus',
+                'uploader_id': 'sonypicturesrus@mail.ru',
+                'duration': 184,
+            },
+        },
+        {
+            'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
+            'md5': '00a91a58c3402204dcced523777b475f',
+            'info_dict': {
+                'id': '46843144',
+                'ext': 'mp4',
+                'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
+                'timestamp': 1397217632,
+                'upload_date': '20140411',
+                'uploader': 'hitech',
+                'uploader_id': 'hitech@corp.mail.ru',
+                'duration': 245,
+            },
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = mobj.group('idv1')
+
+        if not video_id:
+            video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
 
         video_data = self._download_json(
-            'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
+            'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
 
         author = video_data['author']
         uploader = author['name']
@@ -40,6 +59,8 @@ class MailRuIE(InfoExtractor):
         movie = video_data['movie']
         content_id = str(movie['contentId'])
         title = movie['title']
+        if title.endswith('.mp4'):
+            title = title[:-4]
         thumbnail = movie['poster']
         duration = movie['duration']
 
index 1a63ab56a9b02e9f5aa3d28dde6a57fb8155fc35..aa34665d1669f32ab31a02618c58ef9c4b130fe2 100644 (file)
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 import re
+import json
 
 from .common import InfoExtractor
 from ..utils import find_xpath_attr, compat_str
@@ -31,30 +32,68 @@ class NBCIE(InfoExtractor):
 
 
 class NBCNewsIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
+    _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
+        ((video/.+?/(?P<id>\d+))|
+        (feature/[^/]+/(?P<title>.+)))
+        '''
 
-    _TEST = {
-        'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
-        'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
-        'info_dict': {
-            'id': '52753292',
-            'ext': 'flv',
-            'title': 'Crew emerges after four-month Mars food study',
-            'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+    _TESTS = [
+        {
+            'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
+            'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
+            'info_dict': {
+                'id': '52753292',
+                'ext': 'flv',
+                'title': 'Crew emerges after four-month Mars food study',
+                'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+            },
         },
-    }
+        {
+            'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236',
+            'md5': 'b2421750c9f260783721d898f4c42063',
+            'info_dict': {
+                'id': 'I1wpAI_zmhsQ',
+                'ext': 'flv',
+                'title': 'How Twitter Reacted To The Snowden Interview',
+                'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
+            },
+            'add_ie': ['ThePlatform'],
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
-        info = all_info.find('video')
+        if video_id is not None:
+            all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
+            info = all_info.find('video')
 
-        return {
-            'id': video_id,
-            'title': info.find('headline').text,
-            'ext': 'flv',
-            'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
-            'description': compat_str(info.find('caption').text),
-            'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
-        }
+            return {
+                'id': video_id,
+                'title': info.find('headline').text,
+                'ext': 'flv',
+                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
+                'description': compat_str(info.find('caption').text),
+                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
+            }
+        else:
+            # "feature" pages use theplatform.com
+            title = mobj.group('title')
+            webpage = self._download_webpage(url, title)
+            bootstrap_json = self._search_regex(
+                r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
+                flags=re.MULTILINE)
+            bootstrap = json.loads(bootstrap_json)
+            info = bootstrap['results'][0]['video']
+            playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI'
+            mpxid = info['mpxId']
+            all_videos = self._download_json(playlist_url, title)['videos']
+            # The response contains additional videos
+            info = next(v for v in all_videos if v['mpxId'] == mpxid)
+
+            return {
+                '_type': 'url',
+                # We get the best quality video
+                'url': info['videoAssets'][-1]['publicUrl'],
+                'ie_key': 'ThePlatform',
+            }
index 53b34f5e646b233dd72a0657be5d285e1a534ddc..3d6096e46fbe6df0f6885fbdae483f05ac07cf6f 100644 (file)
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     int_or_none,
+    qualities,
 )
 
 
@@ -57,7 +58,7 @@ class NDRIE(InfoExtractor):
 
         formats = []
 
-        mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
+        mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
         if mp3_url:
             formats.append({
                 'url': mp3_url.group('audio'),
@@ -66,15 +67,15 @@ class NDRIE(InfoExtractor):
 
         thumbnail = None
 
-        video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
+        video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
         if video_url:
-            thumbnails = re.findall(r'''\d+: {src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
+            thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
             if thumbnails:
-                QUALITIES = ['xs', 's', 'm', 'l', 'xl']
-                thumbnails.sort(key=lambda thumb: QUALITIES.index(thumb[1]) if thumb[1] in QUALITIES else -1)
-                thumbnail = 'http://www.ndr.de' + thumbnails[-1][0]
+                quality_key = qualities(['xs', 's', 'm', 'l', 'xl'])
+                largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1]))
+                thumbnail = 'http://www.ndr.de' + largest[0]
 
-            for format_id in ['lo', 'hi', 'hq']:
+            for format_id in 'lo', 'hi', 'hq':
                 formats.append({
                     'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
                     'format_id': format_id,
index e6d68b8361be664ce98534d21d0921cb549eff2b..3a6a7883e31f2ff309157f0e4d27765ef98fdf1b 100644 (file)
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    unified_strdate,
+)
 
 
 class NRKIE(InfoExtractor):
@@ -64,4 +68,78 @@ class NRKIE(InfoExtractor):
             'title': data['title'],
             'description': data['description'],
             'thumbnail': thumbnail,
+        }
+
+
+class NRKTVIE(InfoExtractor):
+    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})'
+
+    _TESTS = [
+        {
+            'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/muhh48000314/23-05-2014',
+            'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',
+            'info_dict': {
+                'id': 'muhh48000314',
+                'ext': 'flv',
+                'title': '20 spørsmål',
+                'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
+                'upload_date': '20140523',
+                'duration': 1741.52,
+            }
+        },
+        {
+            'url': 'http://tv.nrk.no/program/mdfp15000514',
+            'md5': '383650ece2b25ecec996ad7b5bb2a384',
+            'info_dict': {
+                'id': 'mdfp15000514',
+                'ext': 'flv',
+                'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
+                'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
+                'upload_date': '20140524',
+                'duration': 4605.0,
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta('title', page, 'title')
+        description = self._html_search_meta('description', page, 'description')
+        thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
+        upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
+        duration = self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)
+        if duration:
+            duration = float(duration)
+
+        formats = []
+
+        f4m_url = re.search(r'data-media="([^"]+)"', page)
+        if f4m_url:
+            formats.append({
+                'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
+                'format_id': 'f4m',
+                'ext': 'flv',
+            })
+
+        m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
+        if m3u8_url:
+            formats.append({
+                'url': m3u8_url.group(1),
+                'format_id': 'm3u8',
+            })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'duration': duration,
+            'formats': formats,
         }
\ No newline at end of file
index f0befa1168f690cb35c47218627d5fc888359224..e3db9fe8c6c643d49aa8c521921bc3f508010f98 100644 (file)
@@ -30,7 +30,7 @@ class NuvidIE(InfoExtractor):
             webpage, 'title').strip()
 
         url_end = self._html_search_regex(
-            r'href="(/mp4/[^"]+)"[^>]*data-link_type="mp4"',
+            r'href="(/[^"]+)"[^>]*data-link_type="mp4"',
             webpage, 'video_url')
         video_url = 'http://m.nuvid.com' + url_end
 
index 7dd3dca0de94f41bb82c9baeacc45e5ab14ae0a2..4118ee9560e03d2fa1eea171766ef4893e274aa5 100644 (file)
@@ -45,7 +45,7 @@ class PornHubIE(InfoExtractor):
 
         video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
         video_uploader = self._html_search_regex(
-            r'(?s)<div class="video-info-row">\s*From:&nbsp;.+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
+            r'(?s)From:&nbsp;.+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
             webpage, 'uploader', fatal=False)
         thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
         if thumbnail:
index 7362904db50588acb6f1f988b7fc6665f9b8ce2e..73efe95420ff7b83412864de02d8d5601690b537 100644 (file)
@@ -5,13 +5,16 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    compat_str,
+)
 
 
 class StreamCZIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
         'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
         'info_dict': {
@@ -22,7 +25,18 @@ class StreamCZIE(InfoExtractor):
             'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
             'duration': 256,
         },
-    }
+    }, {
+        'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
+        'md5': '246272e753e26bbace7fcd9deca0650c',
+        'info_dict': {
+            'id': '10002447',
+            'ext': 'mp4',
+            'title': 'Kancelář Blaník: Tři roky pro Mazánka',
+            'description': 'md5:9177695a8b756a0a8ab160de4043b392',
+            'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000',
+            'duration': 368,
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -57,7 +71,7 @@ class StreamCZIE(InfoExtractor):
         self._sort_formats(formats)
 
         return {
-            'id': str(jsonData['id']),
+            'id': compat_str(jsonData['episode_id']),
             'title': self._og_search_title(webpage),
             'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),
             'formats': formats,
diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py
new file mode 100644 (file)
index 0000000..6c688c5
--- /dev/null
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class SWRMediathekIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+    _TESTS = [{
+        'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
+        'md5': '8c5f6f0172753368547ca8413a7768ac',
+        'info_dict': {
+            'id': '849790d0-dab8-11e3-a953-0026b975f2e6',
+            'ext': 'mp4',
+            'title': 'SWR odysso',
+            'description': 'md5:2012e31baad36162e97ce9eb3f157b8a',
+            'thumbnail': 're:^http:.*\.jpg$',
+            'duration': 2602,
+            'upload_date': '20140515',
+            'uploader': 'SWR Fernsehen',
+            'uploader_id': '990030',
+        },
+    }, {
+        'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
+        'md5': 'b10ab854f912eecc5a6b55cd6fc1f545',
+        'info_dict': {
+            'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
+            'ext': 'mp4',
+            'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen',
+            'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2',
+            'thumbnail': 're:http://.*\.jpg',
+            'duration': 5305,
+            'upload_date': '20140516',
+            'uploader': 'SWR Fernsehen',
+            'uploader_id': '990030',
+        },
+    }, {
+        'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6',
+        'md5': '4382e4ef2c9d7ce6852535fa867a0dd3',
+        'info_dict': {
+            'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6',
+            'ext': 'mp3',
+            'title': 'Saša Stanišic: Vor dem Fest',
+            'description': 'md5:5b792387dc3fbb171eb709060654e8c9',
+            'thumbnail': 're:http://.*\.jpg',
+            'duration': 3366,
+            'upload_date': '20140520',
+            'uploader': 'SWR 2',
+            'uploader_id': '284670',
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        video = self._download_json(
+            'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, video_id, 'Downloading video JSON')
+
+        attr = video['attr']
+        media_type = attr['entry_etype']
+
+        formats = []
+        for entry in video['sub']:
+            if entry['name'] != 'entry_media':
+                continue
+
+            entry_attr = entry['attr']
+            codec = entry_attr['val0']
+            quality = int(entry_attr['val1'])
+
+            fmt = {
+                'url': entry_attr['val2'],
+                'quality': quality,
+            }
+
+            if media_type == 'Video':
+                fmt.update({
+                    'format_note': ['144p', '288p', '544p'][quality-1],
+                    'vcodec': codec,
+                })
+            elif media_type == 'Audio':
+                fmt.update({
+                    'acodec': codec,
+                })
+            formats.append(fmt)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': attr['entry_title'],
+            'description': attr['entry_descl'],
+            'thumbnail': attr['entry_image_16_9'],
+            'duration': parse_duration(attr['entry_durat']),
+            'upload_date': attr['entry_pdatet'][:-4],
+            'uploader': attr['channel_title'],
+            'uploader_id': attr['channel_idkey'],
+            'formats': formats,
+        }
\ No newline at end of file
index f15780ef540d1ef39cc3526b15629004dabb73cd..b6b2dba9ca9e6ee02c7dc6b2cf01d3601874a6b2 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import json
 
@@ -18,17 +20,17 @@ class ThePlatformIE(InfoExtractor):
 
     _TEST = {
         # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
-        u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
-        u'info_dict': {
-            u'id': u'e9I_cZgTgIPd',
-            u'ext': u'flv',
-            u'title': u'Blackberry\'s big, bold Z30',
-            u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
-            u'duration': 247,
+        'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
+        'info_dict': {
+            'id': 'e9I_cZgTgIPd',
+            'ext': 'flv',
+            'title': 'Blackberry\'s big, bold Z30',
+            'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
+            'duration': 247,
         },
-        u'params': {
+        'params': {
             # rtmp download
-            u'skip_download': True,
+            'skip_download': True,
         },
     }
 
@@ -39,7 +41,7 @@ class ThePlatformIE(InfoExtractor):
             error_msg = next(
                 n.attrib['abstract']
                 for n in meta.findall(_x('.//smil:ref'))
-                if n.attrib.get('title') == u'Geographic Restriction')
+                if n.attrib.get('title') == 'Geographic Restriction')
         except StopIteration:
             pass
         else:
@@ -101,8 +103,7 @@ class ThePlatformIE(InfoExtractor):
             config_url = url+ '&form=json'
             config_url = config_url.replace('swf/', 'config/')
             config_url = config_url.replace('onsite/', 'onsite/config/')
-            config_json = self._download_webpage(config_url, video_id, u'Downloading config')
-            config = json.loads(config_json)
+            config = self._download_json(config_url, video_id, 'Downloading config')
             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
         else:
             smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
index e4bb3b949081d7e3c9a74632975ca633870a4a38..488b10df96e298c683cd02287e2da0c49f21a1cc 100644 (file)
@@ -11,29 +11,36 @@ from ..utils import (
 
 
 class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
+    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<videoID>\d+)'
     IE_NAME = 'ustream'
     _TEST = {
         'url': 'http://www.ustream.tv/recorded/20274954',
-        'file': '20274954.flv',
         'md5': '088f151799e8f572f84eb62f17d73e5c',
         'info_dict': {
-            "uploader": "Young Americans for Liberty",
-            "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
+            'id': '20274954',
+            'ext': 'flv',
+            'uploader': 'Young Americans for Liberty',
+            'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
         },
     }
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
+        video_id = m.group('videoID')
+
+        # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
+        if m.group('type') == 'embed/recorded':
+            video_id = m.group('videoID')
+            desktop_url = 'http://www.ustream.tv/recorded/' + video_id
+            return self.url_result(desktop_url, 'Ustream')
         if m.group('type') == 'embed':
             video_id = m.group('videoID')
             webpage = self._download_webpage(url, video_id)
-            desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
+            desktop_video_id = self._html_search_regex(
+                r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
             desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
             return self.url_result(desktop_url, 'Ustream')
 
-        video_id = m.group('videoID')
-
         video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
         webpage = self._download_webpage(url, video_id)
 
index ea34a8f16c86008d663f3ceb0a7482b242160391..eada13ce920b9f4e892f952242ef87bfac504600 100644 (file)
@@ -16,7 +16,7 @@ class VevoIE(InfoExtractor):
     (currently used by MTVIE)
     """
     _VALID_URL = r'''(?x)
-        (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
+        (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
            https?://cache\.vevo\.com/m/html/embed\.html\?video=|
            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
            vevo:)
index 981ca62c0d9762288e736eab0e68fca5273c8752..8327fb146a48a5338a9b21820aac8cb247112e0d 100644 (file)
@@ -1140,7 +1140,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
         if mobj is None:
             mobj = re.search(
-                r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded) on (.*?)</strong>',
+                r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
                 video_webpage)
         if mobj is not None:
             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
index 638ff8af5ae995a5ebe7a5d4171a7c02ab0feec4..3c3c4e7775dc2188c9f45d96ee511b4073f1ae16 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.05.19'
+__version__ = '2014.06.02'