Merge remote-tracking branch 'georgjaehnig/spiegeltv'
authorPhilipp Hagemeister <phihag@phihag.de>
Sat, 7 Jun 2014 13:21:33 +0000 (15:21 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Sat, 7 Jun 2014 13:21:33 +0000 (15:21 +0200)
24 files changed:
Makefile
devscripts/release.sh
test/test_age_restriction.py
test/test_playlists.py
test/test_youtube_lists.py
youtube_dl/downloader/rtmp.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/extremetube.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/mailru.py
youtube_dl/extractor/naver.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/tagesschau.py [new file with mode: 0644]
youtube_dl/extractor/teachertube.py [new file with mode: 0644]
youtube_dl/extractor/teachingchannel.py [new file with mode: 0644]
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/xvideos.py
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/youtube.py
youtube_dl/version.py

index a8278586129034fc5dc1bd2a9bbcfd0ff15ea8d9..c079761efa9b2e60887575f4cd7626d0abe469a2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -77,6 +77,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
                --exclude 'docs/_build' \
                -- \
                bin devscripts test youtube_dl docs \
-               CHANGELOG LICENSE README.md README.txt \
+               LICENSE README.md README.txt \
                Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
                youtube-dl
index 2974a7c3eee0e14c0f8c2f03b41ce899eeda9324..453087e5f70fa92906926ef12ab3b192087c51c3 100755 (executable)
@@ -45,9 +45,9 @@ fi
 /bin/echo -e "\n### Changing version in version.py..."
 sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
 
-/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
+/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
 make README.md
-git add CHANGELOG README.md youtube_dl/version.py
+git add README.md youtube_dl/version.py
 git commit -m "release $version"
 
 /bin/echo -e "\n### Now tagging, signing and pushing..."
index c9cdb96cb30578d58724ddadb4328ad790316a39..71e80b037a5cc99fd0cb1a6711d20cfb59e01b34 100644 (file)
@@ -13,7 +13,7 @@ from youtube_dl import YoutubeDL
 
 
 def _download_restricted(url, filename, age):
-    """ Returns true iff the file has been downloaded """
+    """ Returns true if the file has been downloaded """
 
     params = {
         'age_limit': age,
index 63d31db8cbfd7f24c007787d4f96d244d0d0116f..465b07b9e28e48ce9fe3b8a0a477a712b9f06940 100644 (file)
@@ -28,6 +28,7 @@ from youtube_dl.extractor import (
     SoundcloudSetIE,
     SoundcloudUserIE,
     SoundcloudPlaylistIE,
+    TeacherTubeClassroomIE,
     LivestreamIE,
     NHLVideocenterIE,
     BambuserChannelIE,
@@ -209,20 +210,20 @@ class TestPlaylists(unittest.TestCase):
     def test_ivi_compilation(self):
         dl = FakeYDL()
         ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
+        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa')
         self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dezhurnyi_angel')
-        self.assertEqual(result['title'], 'Ð\94ежÑ\83Ñ\80нÑ\8bй Ð°Ð½Ð³ÐµÐ» (2010 - 2012)')
-        self.assertTrue(len(result['entries']) >= 16)
+        self.assertEqual(result['id'], 'dvoe_iz_lartsa')
+        self.assertEqual(result['title'], 'Ð\94вое Ð¸Ð· Ð»Ð°Ñ\80Ñ\86а (2006 - 2008)')
+        self.assertTrue(len(result['entries']) >= 24)
 
     def test_ivi_compilation_season(self):
         dl = FakeYDL()
         ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season1')
+        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1')
         self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dezhurnyi_angel/season1')
-        self.assertEqual(result['title'], 'Ð\94ежÑ\83Ñ\80нÑ\8bй Ð°Ð½Ð³ÐµÐ» (2010 - 2012) 1 сезон')
-        self.assertTrue(len(result['entries']) >= 16)
+        self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
+        self.assertEqual(result['title'], 'Ð\94вое Ð¸Ð· Ð»Ð°Ñ\80Ñ\86а (2006 - 2008) 1 сезон')
+        self.assertTrue(len(result['entries']) >= 12)
         
     def test_imdb_list(self):
         dl = FakeYDL()
@@ -360,5 +361,13 @@ class TestPlaylists(unittest.TestCase):
             result['title'], 'Brace Yourself - Today\'s Weirdest News')
         self.assertTrue(len(result['entries']) >= 10)
 
+    def test_TeacherTubeClassroom(self):
+        dl = FakeYDL()
+        ie = TeacherTubeClassroomIE(dl)
+        result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'rbhagwati2')
+        self.assertTrue(len(result['entries']) >= 20)
+
 if __name__ == '__main__':
     unittest.main()
index 7d3b9c7054726bb773e3ac390222706c0bcec2af..3aadedd64cf5af38ab1d18b640b10301c2073de2 100644 (file)
@@ -112,11 +112,11 @@ class TestYoutubeLists(unittest.TestCase):
     def test_youtube_mix(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
-        result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
+        result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
         entries = result['entries']
         self.assertTrue(len(entries) >= 20)
         original_video = entries[0]
-        self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
+        self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
 
     def test_youtube_toptracks(self):
         print('Skipping: The playlist page gives error 500')
index 78b1e7cd2227f328b1dcbc6096c8a95b2d421a51..cc6a84106b4ccc1221b74da313eb619544c4a8ef 100644 (file)
@@ -96,6 +96,7 @@ class RtmpFD(FileDownloader):
         flash_version = info_dict.get('flash_version', None)
         live = info_dict.get('rtmp_live', False)
         conn = info_dict.get('rtmp_conn', None)
+        protocol = info_dict.get('rtmp_protocol', None)
 
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
@@ -133,6 +134,8 @@ class RtmpFD(FileDownloader):
                 basic_args += ['--conn', entry]
         elif isinstance(conn, compat_str):
             basic_args += ['--conn', conn]
+        if protocol is not None:
+            basic_args += ['--protocol', protocol]
         args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
 
         if sys.platform == 'win32' and sys.version_info < (3, 0):
index b689dc3c998d8dfb8a611a370d77d4365c573fa1..72523c54d31a4192700eab77bc9b83a241605853 100644 (file)
@@ -267,6 +267,12 @@ from .streamcz import StreamCZIE
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
+from .tagesschau import TagesschauIE
+from .teachertube import (
+    TeacherTubeIE,
+    TeacherTubeClassroomIE,
+)
+from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
index b88f71bc40b9803fb4ed0dea134738a7e1e07201..c6d22c029ef1c8dcdef44df172fe3e9391fea6eb 100644 (file)
@@ -38,15 +38,19 @@ class ARDIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_regex(
-            r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
+            [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
+             r'<meta name="dcterms.title" content="(.*?)"/>',
+             r'<h4 class="headline">(.*?)</h4>'],
+            webpage, 'title')
         description = self._html_search_meta(
             'dcterms.abstract', webpage, 'description')
         thumbnail = self._og_search_thumbnail(webpage)
 
-        streams = [
-            mo.groupdict()
-            for mo in re.finditer(
-                r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
+
+        media_info = self._download_json(
+            'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
+        # The second element of the _mediaArray contains the standard http urls
+        streams = media_info['_mediaArray'][1]['_mediaStreamArray']
         if not streams:
             if '"fsk"' in webpage:
                 raise ExtractorError('This video is only available after 20:00')
@@ -54,21 +58,12 @@ class ARDIE(InfoExtractor):
         formats = []
         for s in streams:
             format = {
-                'quality': int(s['quality']),
+                'quality': s['_quality'],
+                'url': s['_stream'],
             }
-            if s.get('rtmp_url'):
-                format['protocol'] = 'rtmp'
-                format['url'] = s['rtmp_url']
-                format['playpath'] = s['video_url']
-            else:
-                format['url'] = s['video_url']
-
-            quality_name = self._search_regex(
-                r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
-                'quality name', default='NA')
-            format['format_id'] = '%s-%s-%s-%s' % (
-                determine_ext(format['url']), quality_name, s['media_type'],
-                s['quality'])
+
+            format['format_id'] = '%s-%s' % (
+                determine_ext(format['url']), format['quality'])
 
             formats.append(format)
 
index 6e3a316c67bc8fcb5cb1a9b8ac50035ee7b54cc2..ba4d73ab8bf3ff893fdb2c07fc57f0cbc009ec44 100644 (file)
@@ -188,7 +188,7 @@ class ComedyCentralShowsIE(InfoExtractor):
                 })
                 formats.append({
                     'format_id': 'rtmp-%s' % format,
-                    'url': rtmp_video_url,
+                    'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
                     'ext': self._video_extensions.get(format, 'mp4'),
                     'height': h,
                     'width': w,
index ff7c0cd3e6595740f1c98b834f1d2b818d04d25c..14a196ffc63336ae7d016b035cfb28cc7f7d28a0 100644 (file)
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
         webpage = self._download_webpage(req, video_id)
 
         video_title = self._html_search_regex(
-            r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
+            r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
         uploader = self._html_search_regex(
             r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
             fatal=False)
index ca8993241ae85f29508d92c78e6edc7dbcd4cb13..18f91efac450d1ec697652570b6366b1709553d0 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^http://video\.fc2\.com/(?P<lang>[^/]+)/content/(?P<id>[^/]+)'
+    _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
     IE_NAME = 'fc2'
     _TEST = {
         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
@@ -36,7 +36,7 @@ class FC2IE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(webpage)
         refer = url.replace('/content/', '/a/content/')
 
-        mimi = hashlib.md5(video_id + '_gGddgPfeaf_gzyr').hexdigest()
+        mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
 
         info_url = (
             "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
index 1ba4966c724ee15637dc0f2d08d3029dec16f4e3..528be1524ae645f7bb8b36ee2ac2378fd91561be 100644 (file)
@@ -33,14 +33,14 @@ class IviIE(InfoExtractor):
         },
         # Serial's serie
         {
-            'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
-            'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
+            'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
+            'md5': '221f56b35e3ed815fde2df71032f4b3e',
             'info_dict': {
-                'id': '74791',
+                'id': '9549',
                 'ext': 'mp4',
-                'title': 'Ð\94ежÑ\83Ñ\80нÑ\8bй Ð°Ð½Ð³ÐµÐ» - 1 Ñ\81еÑ\80иÑ\8f',
-                'duration': 2490,
-                'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
+                'title': 'Ð\94вое Ð¸Ð· Ð»Ð°Ñ\80Ñ\86а - Ð¡ÐµÑ\80иÑ\8f 1',
+                'duration': 2655,
+                'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
             },
             'skip': 'Only works from Russia',
          }
index 5016989cc9504ef1071b4816514e0924242f1602..7460d81cd501b8c52dcce3caae8313f6854b571a 100644 (file)
@@ -9,29 +9,48 @@ from .common import InfoExtractor
 class MailRuIE(InfoExtractor):
     IE_NAME = 'mailru'
     IE_DESC = 'Видео@Mail.Ru'
-    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
+    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
 
-    _TEST = {
-        'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
-        'md5': 'dea205f03120046894db4ebb6159879a',
-        'info_dict': {
-            'id': '46301138',
-            'ext': 'mp4',
-            'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
-            'timestamp': 1393232740,
-            'upload_date': '20140224',
-            'uploader': 'sonypicturesrus',
-            'uploader_id': 'sonypicturesrus@mail.ru',
-            'duration': 184,
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
+            'md5': 'dea205f03120046894db4ebb6159879a',
+            'info_dict': {
+                'id': '46301138',
+                'ext': 'mp4',
+                'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
+                'timestamp': 1393232740,
+                'upload_date': '20140224',
+                'uploader': 'sonypicturesrus',
+                'uploader_id': 'sonypicturesrus@mail.ru',
+                'duration': 184,
+            },
+        },
+        {
+            'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
+            'md5': '00a91a58c3402204dcced523777b475f',
+            'info_dict': {
+                'id': '46843144',
+                'ext': 'mp4',
+                'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
+                'timestamp': 1397217632,
+                'upload_date': '20140411',
+                'uploader': 'hitech',
+                'uploader_id': 'hitech@corp.mail.ru',
+                'duration': 245,
+            },
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = mobj.group('idv1')
+
+        if not video_id:
+            video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
 
         video_data = self._download_json(
-            'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
+            'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
 
         author = video_data['author']
         uploader = author['name']
@@ -40,6 +59,8 @@ class MailRuIE(InfoExtractor):
         movie = video_data['movie']
         content_id = str(movie['contentId'])
         title = movie['title']
+        if title.endswith('.mp4'):
+            title = title[:-4]
         thumbnail = movie['poster']
         duration = movie['duration']
 
index 4cab30631956b903682fc2de7aa5dd551bcdd4a3..c0231c197b12b86c669e9cff4b34a5c2ac1639bf 100644 (file)
@@ -1,4 +1,6 @@
 # encoding: utf-8
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -12,12 +14,13 @@ class NaverIE(InfoExtractor):
     _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
 
     _TEST = {
-        u'url': u'http://tvcast.naver.com/v/81652',
-        u'file': u'81652.mp4',
-        u'info_dict': {
-            u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
-            u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
-            u'upload_date': u'20130903',
+        'url': 'http://tvcast.naver.com/v/81652',
+        'info_dict': {
+            'id': '81652',
+            'ext': 'mp4',
+            'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+            'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+            'upload_date': '20130903',
         },
     }
 
@@ -28,7 +31,7 @@ class NaverIE(InfoExtractor):
         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
             webpage)
         if m_id is None:
-            raise ExtractorError(u'couldn\'t extract vid and key')
+            raise ExtractorError('couldn\'t extract vid and key')
         vid = m_id.group(1)
         key = m_id.group(2)
         query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
@@ -39,22 +42,27 @@ class NaverIE(InfoExtractor):
         })
         info = self._download_xml(
             'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
-            video_id, u'Downloading video info')
+            video_id, 'Downloading video info')
         urls = self._download_xml(
             'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
-            video_id, u'Downloading video formats info')
+            video_id, 'Downloading video formats info')
 
         formats = []
         for format_el in urls.findall('EncodingOptions/EncodingOption'):
             domain = format_el.find('Domain').text
-            if domain.startswith('rtmp'):
-                continue
-            formats.append({
+            f = {
                 'url': domain + format_el.find('uri').text,
                 'ext': 'mp4',
                 'width': int(format_el.find('width').text),
                 'height': int(format_el.find('height').text),
-            })
+            }
+            if domain.startswith('rtmp'):
+                f.update({
+                    'ext': 'flv',
+                    'rtmp_protocol': '1', # rtmpt
+                })
+            formats.append(f)
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
index f5117d7b308fde3d487c6057120a435f44962efc..3a6a7883e31f2ff309157f0e4d27765ef98fdf1b 100644 (file)
@@ -72,7 +72,7 @@ class NRKIE(InfoExtractor):
 
 
 class NRKTVIE(InfoExtractor):
-    _VALID_URL = r'http://tv\.nrk\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})'
+    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})'
 
     _TESTS = [
         {
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
new file mode 100644 (file)
index 0000000..3633152
--- /dev/null
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class TagesschauIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
+
+    _TESTS = [{
+        'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
+        'md5': 'bcdeac2194fb296d599ce7929dfa4009',
+        'info_dict': {
+            'id': '1399128',
+            'ext': 'mp4',
+            'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
+            'description': 'md5:69da3c61275b426426d711bde96463ab',
+            'thumbnail': 're:^http:.*\.jpg$',
+        },
+    }, {
+        'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
+        'md5': '8aaa8bf3ae1ca2652309718c03019128',
+        'info_dict': {
+            'id': '196',
+            'ext': 'mp4',
+            'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt',
+            'description': 'md5:f22e4af75821d174fa6c977349682691',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }]
+
+    _FORMATS = {
+        's': {'width': 256, 'height': 144, 'quality': 1},
+        'm': {'width': 512, 'height': 288, 'quality': 2},
+        'l': {'width': 960, 'height': 544, 'quality': 3},
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        if video_id.startswith('-'):
+            display_id = video_id.strip('-')
+        else:
+            display_id = video_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        playerpage = self._download_webpage(
+            'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
+            display_id, 'Downloading player page')
+
+        medias = re.findall(
+            r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
+            playerpage)
+
+        formats = []
+        for url, ext, res in medias:
+            f = {
+                'format_id': res + '_' + ext,
+                'url': url,
+                'ext': ext,
+            }
+            f.update(self._FORMATS.get(res, {}))
+            formats.append(f)
+
+        self._sort_formats(formats)
+
+        thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+
+        return {
+            'id': display_id,
+            'title': self._og_search_title(webpage).strip(),
+            'thumbnail': 'http://www.tagesschau.de' + thumbnail,
+            'formats': formats,
+            'description': self._og_search_description(webpage).strip(),
+        }
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py
new file mode 100644 (file)
index 0000000..4740f3d
--- /dev/null
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class TeacherTubeIE(InfoExtractor):
+    IE_NAME = 'teachertube'
+    IE_DESC = 'teachertube.com videos'
+
+    _VALID_URL = r'https?://(?:www\.)?teachertube\.com/viewVideo\.php\?video_id=(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
+        'md5': 'f9434ef992fd65936d72999951ee254c',
+        'info_dict': {
+            'id': '339997',
+            'ext': 'mp4',
+            'title': 'Measures of dispersion from a frequency table_x264',
+            'description': 'md5:a3e9853487185e9fcd7181a07164650b',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }, {
+        'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064',
+        'md5': '0d625ec6bc9bf50f70170942ad580676',
+        'info_dict': {
+            'id': '340064',
+            'ext': 'mp4',
+            'title': 'How to Make Paper Dolls _ Paper Art Projects',
+            'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        url = self._html_search_meta('twitter:player:stream', webpage, 'twitter player')
+
+        formats = [{
+            'format_id': 'flv',
+            'url': url.replace('mp4v', 'flv').replace('.mp4', '.flv'),
+            'quality': 0,
+            'ext': 'flv',
+        }, {
+            'format_id': 'mp4',
+            'url': url,
+            'quality': 1,
+            'ext': 'mp4',
+        }]
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+        }
+
+
+class TeacherTubeClassroomIE(InfoExtractor):
+    IE_NAME = 'teachertube:classroom'
+    IE_DESC = 'teachertube.com online classrooms'
+
+    _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('user')
+
+        rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id,
+                                      user_id, 'Downloading classroom RSS')
+
+        entries = []
+        for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'):
+            entries.append(self.url_result(url.attrib['url'], 'TeacherTube'))
+
+        return self.playlist_result(entries, user_id)
diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py
new file mode 100644 (file)
index 0000000..117afa9
--- /dev/null
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class TeachingChannelIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)'
+
+    _TEST = {
+        'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+        'info_dict': {
+            'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
+            'ext': 'mp4',
+            'title': 'A History of Teaming',
+            'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        ooyala_code = self._search_regex(
+            r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code')
+
+        return OoyalaIE._build_url_result(ooyala_code)
index f15780ef540d1ef39cc3526b15629004dabb73cd..b6b2dba9ca9e6ee02c7dc6b2cf01d3601874a6b2 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import json
 
@@ -18,17 +20,17 @@ class ThePlatformIE(InfoExtractor):
 
     _TEST = {
         # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
-        u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
-        u'info_dict': {
-            u'id': u'e9I_cZgTgIPd',
-            u'ext': u'flv',
-            u'title': u'Blackberry\'s big, bold Z30',
-            u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
-            u'duration': 247,
+        'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
+        'info_dict': {
+            'id': 'e9I_cZgTgIPd',
+            'ext': 'flv',
+            'title': 'Blackberry\'s big, bold Z30',
+            'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
+            'duration': 247,
         },
-        u'params': {
+        'params': {
             # rtmp download
-            u'skip_download': True,
+            'skip_download': True,
         },
     }
 
@@ -39,7 +41,7 @@ class ThePlatformIE(InfoExtractor):
             error_msg = next(
                 n.attrib['abstract']
                 for n in meta.findall(_x('.//smil:ref'))
-                if n.attrib.get('title') == u'Geographic Restriction')
+                if n.attrib.get('title') == 'Geographic Restriction')
         except StopIteration:
             pass
         else:
@@ -101,8 +103,7 @@ class ThePlatformIE(InfoExtractor):
             config_url = url+ '&form=json'
             config_url = config_url.replace('swf/', 'config/')
             config_url = config_url.replace('onsite/', 'onsite/config/')
-            config_json = self._download_webpage(config_url, video_id, u'Downloading config')
-            config = json.loads(config_json)
+            config = self._download_json(config_url, video_id, 'Downloading config')
             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
         else:
             smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
index ea34a8f16c86008d663f3ceb0a7482b242160391..eada13ce920b9f4e892f952242ef87bfac504600 100644 (file)
@@ -16,7 +16,7 @@ class VevoIE(InfoExtractor):
     (currently used by MTVIE)
     """
     _VALID_URL = r'''(?x)
-        (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
+        (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
            https?://cache\.vevo\.com/m/html/embed\.html\?video=|
            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
            vevo:)
index 85e99e1b02b8ab7e7647d6c91dbad08f4827d5f3..7e00448246beb9ab9b7c25f33b05e6f4f1bb8283 100644 (file)
@@ -5,18 +5,21 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     compat_urllib_parse,
+    ExtractorError,
+    clean_html,
 )
 
 
 class XVideosIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
     _TEST = {
-        'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
-        'file': '939581.flv',
-        'md5': '1d0c835822f0a71a7bf011855db929d0',
+        'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
+        'md5': '4b46ae6ea5e6e9086e714d883313c0c9',
         'info_dict': {
-            "title": "Funny Porns By >>>>S<<<<<< -1",
-            "age_limit": 18,
+            'id': '4588838',
+            'ext': 'flv',
+            'title': 'Biker Takes his Girl',
+            'age_limit': 18,
         }
     }
 
@@ -28,6 +31,10 @@ class XVideosIE(InfoExtractor):
 
         self.report_extraction(video_id)
 
+        mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
+        if mobj:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
+
         # Extract video URL
         video_url = compat_urllib_parse.unquote(
             self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
index 393f6ffbe316f4a9dde25ad219f5c0cc00f82a91..d84be25620eecb944845b74299510067772c583f 100644 (file)
@@ -21,7 +21,7 @@ class YahooIE(InfoExtractor):
             'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
             'md5': '4962b075c08be8690a922ee026d05e69',
             'info_dict': {
-                'id': '214727115',
+                'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
                 'ext': 'mp4',
                 'title': 'Julian Smith & Travis Legg Watch Julian Smith',
                 'description': 'Julian and Travis watch Julian Smith',
@@ -31,7 +31,7 @@ class YahooIE(InfoExtractor):
             'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
             'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
             'info_dict': {
-                'id': '103000935',
+                'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
                 'ext': 'mp4',
                 'title': 'Codefellas - The Cougar Lies with Spanish Moss',
                 'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
@@ -58,9 +58,11 @@ class YahooIE(InfoExtractor):
             r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
             default=None)
         if items_json is None:
-            long_id = self._search_regex(
+            CONTENT_ID_REGEXES = [
                 r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
-                webpage, 'content ID')
+                r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"'
+            ]
+            long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
             video_id = long_id
         else:
             items = json.loads(items_json)
@@ -68,9 +70,9 @@ class YahooIE(InfoExtractor):
             # The 'meta' field is not always in the video webpage, we request it
             # from another page
             long_id = info['id']
-        return self._get_info(long_id, video_id)
+        return self._get_info(long_id, video_id, webpage)
 
-    def _get_info(self, long_id, video_id):
+    def _get_info(self, long_id, video_id, webpage):
         query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
                  ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
                  ' AND protocol="http"' % long_id)
@@ -113,7 +115,7 @@ class YahooIE(InfoExtractor):
             'title': meta['title'],
             'formats': formats,
             'description': clean_html(meta['description']),
-            'thumbnail': meta['thumbnail'],
+            'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
         }
 
 
@@ -137,7 +139,7 @@ class YahooNewsIE(YahooIE):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, 'long id')
-        return self._get_info(long_id, video_id)
+        return self._get_info(long_id, video_id, webpage)
 
 
 class YahooSearchIE(SearchInfoExtractor):
index 981ca62c0d9762288e736eab0e68fca5273c8752..7c50881c4453eaff4ac69776fcc2dc94feef8d31 100644 (file)
@@ -223,6 +223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 
         # Dash webm audio
         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
@@ -1140,7 +1141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
         if mobj is None:
             mobj = re.search(
-                r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded) on (.*?)</strong>',
+                r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
                 video_webpage)
         if mobj is not None:
             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
@@ -1414,11 +1415,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         title_span = (search_title('playlist-title') or
             search_title('title long-title') or search_title('title'))
         title = clean_html(title_span)
-        video_re = r'''(?x)data-video-username="(.*?)".*?
+        video_re = r'''(?x)data-video-username=".*?".*?
                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
-        matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
-        # Some of the videos may have been deleted, their username field is empty
-        ids = [video_id for (username, video_id) in matches if username]
+        ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
         url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)
index 638ff8af5ae995a5ebe7a5d4171a7c02ab0feec4..47dde62b9cb016aad90bd9779e8a59e5c8b22dec 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.05.19'
+__version__ = '2014.06.04'