Merge remote-tracking branch 'ralfharing/vh1'

author Philipp Hagemeister <phihag@phihag.de>

Sat, 7 Jun 2014 13:53:30 +0000 (15:53 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Sat, 7 Jun 2014 13:53:30 +0000 (15:53 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Sat, 7 Jun 2014 13:53:30 +0000 (15:53 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Sat, 7 Jun 2014 13:53:30 +0000 (15:53 +0200)
diff --git a/CHANGELOG b/CHANGELOG

deleted file mode 100644 (file)

index 3fa1167..0000000
--- a/CHANGELOG
+++ /dev/null
@@ -1,14 +0,0 @@
-2013.01.02  Codename: GIULIA
-
-    * Add support for ComedyCentral clips <nto>
-    * Corrected Vimeo description fetching <Nick Daniels>
-    * Added the --no-post-overwrites argument <Barbu Paul - Gheorghe>
-    * --verbose offers more environment info
-    * New info_dict field: uploader_id
-    * New updates system, with signature checking
-    * New IEs: NBA, JustinTV, FunnyOrDie, TweetReel, Steam, Ustream
-    * Fixed IEs: BlipTv
-    * Fixed for Python 3 IEs: Xvideo, Youku, XNXX, Dailymotion, Vimeo, InfoQ
-    * Simplified IEs and test code
-    * Various (Python 3 and other) fixes
-    * Revamped and expanded tests
diff --git a/Makefile b/Makefile

index a8278586129034fc5dc1bd2a9bbcfd0ff15ea8d9..c079761efa9b2e60887575f4cd7626d0abe469a2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -77,6 +77,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
                 --exclude 'docs/_build' \
                 -- \
                 bin devscripts test youtube_dl docs \
-               CHANGELOG LICENSE README.md README.txt \
+               LICENSE README.md README.txt \
                 Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
                 youtube-dl
diff --git a/devscripts/release.sh b/devscripts/release.sh

index 2974a7c3eee0e14c0f8c2f03b41ce899eeda9324..453087e5f70fa92906926ef12ab3b192087c51c3 100755 (executable)
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -45,9 +45,9 @@ fi
  /bin/echo -e "\n### Changing version in version.py..."
  sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
  
-/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
+/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
  make README.md
-git add CHANGELOG README.md youtube_dl/version.py
+git add README.md youtube_dl/version.py
  git commit -m "release $version"
  
  /bin/echo -e "\n### Now tagging, signing and pushing..."
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index 8735013f7278a27814dc8210482ea04dd1830d21..e794cc97f0e643c5f05539fd3d0313d30dc98f8d 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -67,7 +67,7 @@ class TestFormatSelection(unittest.TestCase):
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['ext'], 'mp4')
  
-        # No prefer_free_formats => prefer mp4 and flv for greater compatibilty
+        # No prefer_free_formats => prefer mp4 and flv for greater compatibility
          ydl = YDL()
          ydl.params['prefer_free_formats'] = False
          formats = [
@@ -279,7 +279,7 @@ class TestFormatSelection(unittest.TestCase):
          self.assertEqual(ydl._format_note({}), '')
          assertRegexpMatches(self, ydl._format_note({
              'vbr': 10,
-        }), '^x\s*10k$')
+        }), '^\s*10k$')
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py

index c9cdb96cb30578d58724ddadb4328ad790316a39..71e80b037a5cc99fd0cb1a6711d20cfb59e01b34 100644 (file)
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -13,7 +13,7 @@ from youtube_dl import YoutubeDL
  
  
  def _download_restricted(url, filename, age):
-    """ Returns true iff the file has been downloaded """
+    """ Returns true if the file has been downloaded """
  
      params = {
          'age_limit': age,
diff --git a/test/test_playlists.py b/test/test_playlists.py

index cc871698a7123b8a06986c5d78d3a95a32af1885..465b07b9e28e48ce9fe3b8a0a477a712b9f06940 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -28,6 +28,7 @@ from youtube_dl.extractor import (
      SoundcloudSetIE,
      SoundcloudUserIE,
      SoundcloudPlaylistIE,
+    TeacherTubeClassroomIE,
      LivestreamIE,
      NHLVideocenterIE,
      BambuserChannelIE,
@@ -209,20 +210,20 @@ class TestPlaylists(unittest.TestCase):
      def test_ivi_compilation(self):
          dl = FakeYDL()
          ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
+        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa')
          self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dezhurnyi_angel')
-        self.assertEqual(result['title'], 'Ð\94ÐµÐ¶Ñ\83Ñ\80Ð½Ñ\8bÐ¹ Ð°Ð½Ð³ÐµÐ» (2010 - 2012)')
-        self.assertTrue(len(result['entries']) >= 23)
+        self.assertEqual(result['id'], 'dvoe_iz_lartsa')
+        self.assertEqual(result['title'], 'Ð\94Ð²Ð¾Ðµ Ð¸Ð· Ð»Ð°Ñ\80Ñ\86Ð° (2006 - 2008)')
+        self.assertTrue(len(result['entries']) >= 24)
  
      def test_ivi_compilation_season(self):
          dl = FakeYDL()
          ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2')
+        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1')
          self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
-        self.assertEqual(result['title'], 'Ð\94ÐµÐ¶Ñ\83Ñ\80Ð½Ñ\8bÐ¹ Ð°Ð½Ð³ÐµÐ» (2010 - 2012) 2 сезон')
-        self.assertTrue(len(result['entries']) >= 7)
+        self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
+        self.assertEqual(result['title'], 'Ð\94Ð²Ð¾Ðµ Ð¸Ð· Ð»Ð°Ñ\80Ñ\86Ð° (2006 - 2008) 1 сезон')
+        self.assertTrue(len(result['entries']) >= 12)
          
      def test_imdb_list(self):
          dl = FakeYDL()
@@ -360,5 +361,13 @@ class TestPlaylists(unittest.TestCase):
              result['title'], 'Brace Yourself - Today\'s Weirdest News')
          self.assertTrue(len(result['entries']) >= 10)
  
+    def test_TeacherTubeClassroom(self):
+        dl = FakeYDL()
+        ie = TeacherTubeClassroomIE(dl)
+        result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'rbhagwati2')
+        self.assertTrue(len(result['entries']) >= 20)
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py

index 7d3b9c7054726bb773e3ac390222706c0bcec2af..3aadedd64cf5af38ab1d18b640b10301c2073de2 100644 (file)
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -112,11 +112,11 @@ class TestYoutubeLists(unittest.TestCase):
      def test_youtube_mix(self):
          dl = FakeYDL()
          ie = YoutubePlaylistIE(dl)
-        result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
+        result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
          entries = result['entries']
          self.assertTrue(len(entries) >= 20)
          original_video = entries[0]
-        self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
+        self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
  
      def test_youtube_toptracks(self):
          print('Skipping: The playlist page gives error 500')
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index f3666573a5403f48e10de130374e1be9d835f46e..dc0ba986a98744151cafd932acbcd6bbe33fb4a0 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -717,6 +717,17 @@ class YoutubeDL(object):
              info_dict['playlist'] = None
              info_dict['playlist_index'] = None
  
+        thumbnails = info_dict.get('thumbnails')
+        if thumbnails:
+            thumbnails.sort(key=lambda t: (
+                t.get('width'), t.get('height'), t.get('url')))
+            for t in thumbnails:
+                if 'width' in t and 'height' in t:
+                    t['resolution'] = '%dx%d' % (t['width'], t['height'])
+
+        if thumbnails and 'thumbnail' not in info_dict:
+            info_dict['thumbnail'] = thumbnails[-1]['url']
+
          if 'display_id' not in info_dict and 'id' in info_dict:
              info_dict['display_id'] = info_dict['id']
  
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 4e657e297d1a5ddb2aac4a448a8a94a0ac4cf180..e2a4c04dacf282623705c7ec20ef0b8c9a5d4c5a 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -56,6 +56,8 @@ __authors__  = (
      'Nicolas Évrard',
      'Jason Normore',
      'Hoje Lee',
+    'Adam Thalhammer',
+    'Georg Jähnig',
  )
  
  __license__ = 'Public Domain'
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py

index 78b1e7cd2227f328b1dcbc6096c8a95b2d421a51..cc6a84106b4ccc1221b74da313eb619544c4a8ef 100644 (file)
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -96,6 +96,7 @@ class RtmpFD(FileDownloader):
          flash_version = info_dict.get('flash_version', None)
          live = info_dict.get('rtmp_live', False)
          conn = info_dict.get('rtmp_conn', None)
+        protocol = info_dict.get('rtmp_protocol', None)
  
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
@@ -133,6 +134,8 @@ class RtmpFD(FileDownloader):
                  basic_args += ['--conn', entry]
          elif isinstance(conn, compat_str):
              basic_args += ['--conn', conn]
+        if protocol is not None:
+            basic_args += ['--protocol', protocol]
          args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
  
          if sys.platform == 'win32' and sys.version_info < (3, 0):
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 3e3d99b3e9c790bd8ae183edf80510634690a4d4..01c21189b548847ae3232ba4e3f85344d986b430 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -142,6 +142,7 @@ from .khanacademy import KhanAcademyIE
  from .kickstarter import KickStarterIE
  from .keek import KeekIE
  from .kontrtube import KontrTubeIE
+from .ku6 import Ku6IE
  from .la7 import LA7IE
  from .lifenews import LifeNewsIE
  from .liveleak import LiveLeakIE
@@ -194,7 +195,10 @@ from .normalboots import NormalbootsIE
  from .novamov import NovaMovIE
  from .nowness import NownessIE
  from .nowvideo import NowVideoIE
-from .nrk import NRKIE
+from .nrk import (
+    NRKIE,
+    NRKTVIE,
+)
  from .ntv import NTVIE
  from .nytimes import NYTimesIE
  from .nuvid import NuvidIE
@@ -255,13 +259,21 @@ from .southparkstudios import (
  from .space import SpaceIE
  from .spankwire import SpankwireIE
  from .spiegel import SpiegelIE
+from .spiegeltv import SpiegeltvIE
  from .spike import SpikeIE
  from .stanfordoc import StanfordOpenClassroomIE
  from .steam import SteamIE
  from .streamcloud import StreamcloudIE
  from .streamcz import StreamCZIE
+from .swrmediathek import SWRMediathekIE
  from .syfy import SyfyIE
  from .sztvhu import SztvHuIE
+from .tagesschau import TagesschauIE
+from .teachertube import (
+    TeacherTubeIE,
+    TeacherTubeClassroomIE,
+)
+from .teachingchannel import TeachingChannelIE
  from .teamcoco import TeamcocoIE
  from .techtalks import TechTalksIE
  from .ted import TEDIE
diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py

index 6a8cd14c90635f4e42ccc526de138f996853f5c9..cfc7370ae43da592eaca49245200bd922d75a019 100644 (file)
--- a/youtube_dl/extractor/aftonbladet.py
+++ b/youtube_dl/extractor/aftonbladet.py
@@ -1,7 +1,6 @@
  # encoding: utf-8
  from __future__ import unicode_literals
  
-import datetime
  import re
  
  from .common import InfoExtractor
@@ -16,6 +15,7 @@ class AftonbladetIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
              'description': 'Jupiters måne mest aktiv av alla himlakroppar',
+            'timestamp': 1394142732,
              'upload_date': '20140306',
          },
      }
@@ -27,17 +27,17 @@ class AftonbladetIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          # find internal video meta data
-        META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
+        meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
          internal_meta_id = self._html_search_regex(
              r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
-        internal_meta_url = META_URL % internal_meta_id
+        internal_meta_url = meta_url % internal_meta_id
          internal_meta_json = self._download_json(
              internal_meta_url, video_id, 'Downloading video meta data')
  
          # find internal video formats
-        FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
+        format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
          internal_video_id = internal_meta_json['videoId']
-        internal_formats_url = FORMATS_URL % internal_video_id
+        internal_formats_url = format_url % internal_video_id
          internal_formats_json = self._download_json(
              internal_formats_url, video_id, 'Downloading video formats')
  
@@ -54,16 +54,13 @@ class AftonbladetIE(InfoExtractor):
              })
          self._sort_formats(formats)
  
-        timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
-        upload_date = timestamp.strftime('%Y%m%d')
-
          return {
              'id': video_id,
              'title': internal_meta_json['title'],
              'formats': formats,
              'thumbnail': internal_meta_json['imageUrl'],
              'description': internal_meta_json['shortPreamble'],
-            'upload_date': upload_date,
+            'timestamp': internal_meta_json['timePublished'],
              'duration': internal_meta_json['duration'],
              'view_count': internal_meta_json['views'],
          }
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py

index b88f71bc40b9803fb4ed0dea134738a7e1e07201..c6d22c029ef1c8dcdef44df172fe3e9391fea6eb 100644 (file)
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -38,15 +38,19 @@ class ARDIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          title = self._html_search_regex(
-            r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
+            [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
+             r'<meta name="dcterms.title" content="(.*?)"/>',
+             r'<h4 class="headline">(.*?)</h4>'],
+            webpage, 'title')
          description = self._html_search_meta(
              'dcterms.abstract', webpage, 'description')
          thumbnail = self._og_search_thumbnail(webpage)
  
-        streams = [
-            mo.groupdict()
-            for mo in re.finditer(
-                r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
+
+        media_info = self._download_json(
+            'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
+        # The second element of the _mediaArray contains the standard http urls
+        streams = media_info['_mediaArray'][1]['_mediaStreamArray']
          if not streams:
              if '"fsk"' in webpage:
                  raise ExtractorError('This video is only available after 20:00')
@@ -54,21 +58,12 @@ class ARDIE(InfoExtractor):
          formats = []
          for s in streams:
              format = {
-                'quality': int(s['quality']),
+                'quality': s['_quality'],
+                'url': s['_stream'],
              }
-            if s.get('rtmp_url'):
-                format['protocol'] = 'rtmp'
-                format['url'] = s['rtmp_url']
-                format['playpath'] = s['video_url']
-            else:
-                format['url'] = s['video_url']
-
-            quality_name = self._search_regex(
-                r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
-                'quality name', default='NA')
-            format['format_id'] = '%s-%s-%s-%s' % (
-                determine_ext(format['url']), quality_name, s['media_type'],
-                s['quality'])
+
+            format['format_id'] = '%s-%s' % (
+                determine_ext(format['url']), format['quality'])
  
              formats.append(format)
  
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py

index 929aafdff3e848af3295eacf1520ec0ec0334966..dcbbdef4346c36c789e49531df1dc602bc35255b 100644 (file)
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -19,7 +19,7 @@ class BandcampIE(InfoExtractor):
          'md5': 'c557841d5e50261777a6585648adf439',
          'info_dict': {
              "title": "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
-            "duration": 10,
+            "duration": 9.8485,
          },
          '_skip': 'There is a limit of 200 free downloads / month for the test song'
      }]
@@ -28,36 +28,32 @@ class BandcampIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          title = mobj.group('title')
          webpage = self._download_webpage(url, title)
-        # We get the link to the free download page
          m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
-        if m_download is None:
+        if not m_download:
              m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
              if m_trackinfo:
                  json_code = m_trackinfo.group(1)
-                data = json.loads(json_code)
-                d = data[0]
+                data = json.loads(json_code)[0]
  
-                duration = int(round(d['duration']))
                  formats = []
-                for format_id, format_url in d['file'].items():
-                    ext, _, abr_str = format_id.partition('-')
-
+                for format_id, format_url in data['file'].items():
+                    ext, abr_str = format_id.split('-', 1)
                      formats.append({
                          'format_id': format_id,
                          'url': format_url,
-                        'ext': format_id.partition('-')[0],
+                        'ext': ext,
                          'vcodec': 'none',
-                        'acodec': format_id.partition('-')[0],
-                        'abr': int(format_id.partition('-')[2]),
+                        'acodec': ext,
+                        'abr': int(abr_str),
                      })
  
                  self._sort_formats(formats)
  
                  return {
-                    'id': compat_str(d['id']),
-                    'title': d['title'],
+                    'id': compat_str(data['id']),
+                    'title': data['title'],
                      'formats': formats,
-                    'duration': duration,
+                    'duration': float(data['duration']),
                  }
              else:
                  raise ExtractorError('No free songs found')
@@ -67,11 +63,9 @@ class BandcampIE(InfoExtractor):
              r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
              webpage, re.MULTILINE | re.DOTALL).group('id')
  
-        download_webpage = self._download_webpage(download_link, video_id,
-                                                  'Downloading free downloads page')
-        # We get the dictionary of the track from some javascrip code
-        info = re.search(r'items: (.*?),$',
-                         download_webpage, re.MULTILINE).group(1)
+        download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
+        # We get the dictionary of the track from some javascript code
+        info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
          info = json.loads(info)[0]
          # We pick mp3-320 for now, until format selection can be easily implemented.
          mp3_info = info['downloads']['mp3-320']
@@ -100,7 +94,7 @@ class BandcampIE(InfoExtractor):
  
  class BandcampAlbumIE(InfoExtractor):
      IE_NAME = 'Bandcamp:album'
-    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))?'
+    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
  
      _TEST = {
          'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -123,7 +117,7 @@ class BandcampAlbumIE(InfoExtractor):
          'params': {
              'playlistend': 2
          },
-        'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
+        'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
      }
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py

index 96408e4e093ba6b27b4da54248d436d162d3c40c..38ccd957f3eb61a761950bb9a70cdbbeec6bea6d 100644 (file)
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -1,6 +1,5 @@
  from __future__ import unicode_literals
  
-import datetime
  import json
  import re
  
@@ -19,15 +18,16 @@ class BlinkxIE(InfoExtractor):
          'file': '8aQUy7GV.mp4',
          'md5': '2e9a07364af40163a908edbf10bb2492',
          'info_dict': {
-            "title": "Police Car Rolls Away",
-            "uploader": "stupidvideos.com",
-            "upload_date": "20131215",
-            "description": "A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
-            "duration": 14.886,
-            "thumbnails": [{
-                "width": 100,
-                "height": 76,
-                "url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
+            'title': 'Police Car Rolls Away',
+            'uploader': 'stupidvideos.com',
+            'upload_date': '20131215',
+            'timestamp': 1387068000,
+            'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
+            'duration': 14.886,
+            'thumbnails': [{
+                'width': 100,
+                'height': 76,
+                'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
              }],
          },
      }
@@ -41,9 +41,6 @@ class BlinkxIE(InfoExtractor):
                     'video=%s' % video_id)
          data_json = self._download_webpage(api_url, display_id)
          data = json.loads(data_json)['api']['results'][0]
-        dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
-        pload_date = dt.strftime('%Y%m%d')
-
          duration = None
          thumbnails = []
          formats = []
@@ -64,10 +61,7 @@ class BlinkxIE(InfoExtractor):
                  vcodec = remove_start(m['vcodec'], 'ff')
                  acodec = remove_start(m['acodec'], 'ff')
                  tbr = (int(m['vbr']) + int(m['abr'])) // 1000
-                format_id = (u'%s-%sk-%s' %
-                             (vcodec,
-                              tbr,
-                              m['w']))
+                format_id = u'%s-%sk-%s' % (vcodec, tbr, m['w'])
                  formats.append({
                      'format_id': format_id,
                      'url': m['link'],
@@ -88,7 +82,7 @@ class BlinkxIE(InfoExtractor):
              'title': data['title'],
              'formats': formats,
              'uploader': data['channel_name'],
-            'upload_date': pload_date,
+            'timestamp': data['pubdate_epoch'],
              'description': data.get('description'),
              'thumbnails': thumbnails,
              'duration': duration,
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py

index 2301f61b602bc24344ca6cd4bbb9d19a539cd232..496271be4e5f7170ad3d814ec5e2c0b99d15538d 100644 (file)
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -1,10 +1,12 @@
  # encoding: utf-8
  from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
  from ..utils import (
      ExtractorError,
+    int_or_none,
  )
  
  
@@ -13,9 +15,10 @@ class CinemassacreIE(InfoExtractor):
      _TESTS = [
          {
              'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-            'file': '19911.mp4',
-            'md5': '782f8504ca95a0eba8fc9177c373eec7',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
              'info_dict': {
+                'id': '19911',
+                'ext': 'mp4',
                  'upload_date': '20121110',
                  'title': '“Angry Video Game Nerd: The Movie” – Trailer',
                  'description': 'md5:fb87405fcb42a331742a0dce2708560b',
@@ -23,9 +26,10 @@ class CinemassacreIE(InfoExtractor):
          },
          {
              'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            'file': '521be8ef82b16.mp4',
-            'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35',
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
              'info_dict': {
+                'id': '521be8ef82b16',
+                'ext': 'mp4',
                  'upload_date': '20131002',
                  'title': 'The Mummy’s Hand (1940)',
              },
@@ -50,29 +54,40 @@ class CinemassacreIE(InfoExtractor):
              r'<div class="entry-content">(?P<description>.+?)</div>',
              webpage, 'description', flags=re.DOTALL, fatal=False)
  
-        playerdata = self._download_webpage(playerdata_url, video_id)
+        playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
+        video_thumbnail = self._search_regex(
+            r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
+        sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
+        videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
  
-        sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
-        hd_url = self._html_search_regex(
-            r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file',
-            default=None)
-        video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
+        videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
  
-        formats = [{
-            'url': sd_url,
-            'ext': 'mp4',
-            'format': 'sd',
-            'format_id': 'sd',
-            'quality': 1,
-        }]
-        if hd_url:
-            formats.append({
-                'url': hd_url,
-                'ext': 'mp4',
-                'format': 'hd',
-                'format_id': 'hd',
-                'quality': 2,
-            })
+        formats = []
+        baseurl = sd_url[:sd_url.rfind('/')+1]
+        for video in videolist.findall('.//video'):
+            src = video.get('src')
+            if not src:
+                continue
+            file_ = src.partition(':')[-1]
+            width = int_or_none(video.get('width'))
+            height = int_or_none(video.get('height'))
+            bitrate = int_or_none(video.get('system-bitrate'))
+            format = {
+                'url': baseurl + file_,
+                'format_id': src.rpartition('.')[0].rpartition('_')[-1],
+            }
+            if width or height:
+                format.update({
+                    'tbr': bitrate // 1000 if bitrate else None,
+                    'width': width,
+                    'height': height,
+                })
+            else:
+                format.update({
+                    'abr': bitrate // 1000 if bitrate else None,
+                    'vcodec': 'none',
+                })
+            formats.append(format)
          self._sort_formats(formats)
  
          return {
diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py

index 88e0e9aba9150cea2ccac5a6ea4be06b9d0700ba..e96c59f718a5dc412a2ce7eaa962d6bdca98e187 100644 (file)
--- a/youtube_dl/extractor/cmt.py
+++ b/youtube_dl/extractor/cmt.py
@@ -1,19 +1,19 @@
+from __future__ import unicode_literals
  from .mtv import MTVIE
  
+
  class CMTIE(MTVIE):
-    IE_NAME = u'cmt.com'
+    IE_NAME = 'cmt.com'
      _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
      _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
  
-    _TESTS = [
-        {
-            u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
-            u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2',
-            u'info_dict': {
-                u'id': u'989124',
-                u'ext': u'mp4',
-                u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
-                u'description': u'Blame It All On My Roots',
-            },
+    _TESTS = [{
+        'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
+        'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2',
+        'info_dict': {
+            'id': '989124',
+            'ext': 'mp4',
+            'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
+            'description': 'Blame It All On My Roots',
          },
-    ]
+    }]
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py

index b32cb898010a0ad0e02e12f3b3a55c3769cc3979..dae40c136bae20fd54cae401e711b9233c750e14 100644 (file)
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -79,8 +79,11 @@ class CNNIE(InfoExtractor):
  
          self._sort_formats(formats)
  
-        thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
-        thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
+        thumbnails = [{
+            'height': int(t.attrib['height']),
+            'width': int(t.attrib['width']),
+            'url': t.text,
+        } for t in info.findall('images/image')]
  
          metas_el = info.find('metas')
          upload_date = (
@@ -93,8 +96,7 @@ class CNNIE(InfoExtractor):
              'id': info.attrib['id'],
              'title': info.find('headline').text,
              'formats': formats,
-            'thumbnail': thumbnails[-1][1],
-            'thumbnails': thumbs_dict,
+            'thumbnails': thumbnails,
              'description': info.find('description').text,
              'duration': duration,
              'upload_date': upload_date,
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index 6e3a316c67bc8fcb5cb1a9b8ac50035ee7b54cc2..ba4d73ab8bf3ff893fdb2c07fc57f0cbc009ec44 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -188,7 +188,7 @@ class ComedyCentralShowsIE(InfoExtractor):
                  })
                  formats.append({
                      'format_id': 'rtmp-%s' % format,
-                    'url': rtmp_video_url,
+                    'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
                      'ext': self._video_extensions.get(format, 'mp4'),
                      'height': h,
                      'width': w,
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index db472aace8faabb465e9c93b7ff6013ccece4e8e..49e75405e8b079eef83191f9429ebd34a6c0bc26 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -92,8 +92,12 @@ class InfoExtractor(object):
                      unique, but available before title. Typically, id is
                      something like "4234987", title "Dancing naked mole rats",
                      and display_id "dancing-naked-mole-rats"
-    thumbnails:     A list of dictionaries (with the entries "resolution" and
-                    "url") for the varying thumbnails
+    thumbnails:     A list of dictionaries, with the following entries:
+                        * "url"
+                        * "width" (optional, int)
+                        * "height" (optional, int)
+                        * "resolution" (optional, string "{width}x{height"},
+                                        deprecated)
      thumbnail:      Full URL to a video thumbnail image.
      description:    One-line video description.
      uploader:       Full name of the video uploader.
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py

index eaeee5a51543eb1d2cc75f2922de6f5f4638bd2b..e6952588fbdfa08167935fc2b1c0381804328943 100644 (file)
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@@ -3,20 +3,18 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
  
  
  class EmpflixIE(InfoExtractor):
      _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
      _TEST = {
          'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
-        'md5': '5e5cc160f38ca9857f318eb97146e13e',
+        'md5': 'b1bc15b6412d33902d6e5952035fcabc',
          'info_dict': {
              'id': '33051',
-            'ext': 'flv',
+            'ext': 'mp4',
              'title': 'Amateur Finger Fuck',
+            'description': 'Amateur solo finger fucking.',
              'age_limit': 18,
          }
      }
@@ -30,6 +28,8 @@ class EmpflixIE(InfoExtractor):
  
          video_title = self._html_search_regex(
              r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
  
          cfg_url = self._html_search_regex(
              r'flashvars\.config = escape\("([^"]+)"',
@@ -37,12 +37,18 @@ class EmpflixIE(InfoExtractor):
  
          cfg_xml = self._download_xml(
              cfg_url, video_id, note='Downloading metadata')
-        video_url = cfg_xml.find('videoLink').text
+
+        formats = [
+            {
+                'url': item.find('videoLink').text,
+                'format_id': item.find('res').text,
+            } for item in cfg_xml.findall('./quality/item')
+        ]
  
          return {
              'id': video_id,
-            'url': video_url,
-            'ext': 'flv',
              'title': video_title,
+            'description': video_description,
+            'formats': formats,
              'age_limit': age_limit,
          }
diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py

index ff7c0cd3e6595740f1c98b834f1d2b818d04d25c..14a196ffc63336ae7d016b035cfb28cc7f7d28a0 100644 (file)
--- a/youtube_dl/extractor/extremetube.py
+++ b/youtube_dl/extractor/extremetube.py
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
          webpage = self._download_webpage(req, video_id)
  
          video_title = self._html_search_regex(
-            r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
+            r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
          uploader = self._html_search_regex(
              r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
              fatal=False)
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py

index ca8993241ae85f29508d92c78e6edc7dbcd4cb13..18f91efac450d1ec697652570b6366b1709553d0 100644 (file)
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dl/extractor/fc2.py
@@ -13,7 +13,7 @@ from ..utils import (
  
  
  class FC2IE(InfoExtractor):
-    _VALID_URL = r'^http://video\.fc2\.com/(?P<lang>[^/]+)/content/(?P<id>[^/]+)'
+    _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
      IE_NAME = 'fc2'
      _TEST = {
          'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
@@ -36,7 +36,7 @@ class FC2IE(InfoExtractor):
          thumbnail = self._og_search_thumbnail(webpage)
          refer = url.replace('/content/', '/a/content/')
  
-        mimi = hashlib.md5(video_id + '_gGddgPfeaf_gzyr').hexdigest()
+        mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
  
          info_url = (
              "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py

index 2333989665520b8e35728705255b246ea93decac..11fee3d31e88833b8074a1b59cff885eeffa46d3 100644 (file)
--- a/youtube_dl/extractor/gamekings.py
+++ b/youtube_dl/extractor/gamekings.py
@@ -15,7 +15,7 @@ class GamekingsIE(InfoExtractor):
              'id': '20130811',
              'ext': 'mp4',
              'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
-            'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
+            'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
          }
      }
  
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 2861332826b5f20cdf0493edfdba8a7a12f357a8..38a357d3b0406906144e25cbbc45fbe74d2f6c2c 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -363,8 +363,13 @@ class GenericIE(InfoExtractor):
                      return self.url_result('http://' + url)
                  else:
                      if default_search == 'auto_warning':
-                        self._downloader.report_warning(
-                            'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
+                        if re.match(r'^(?:url|URL)$', url):
+                            raise ExtractorError(
+                                'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
+                                expected=True)
+                        else:
+                            self._downloader.report_warning(
+                                'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
                      return self.url_result('ytsearch:' + url)
              else:
                  assert ':' in default_search
@@ -560,7 +565,7 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded NovaMov-based player
          mobj = re.search(
-            r'''(?x)<iframe[^>]+?src=(["\'])
+            r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
                      (?P<url>http://(?:(?:embed|www)\.)?
                          (?:novamov\.com|
                             nowvideo\.(?:ch|sx|eu|at|ag|co)|
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py

index 1ba4966c724ee15637dc0f2d08d3029dec16f4e3..528be1524ae645f7bb8b36ee2ac2378fd91561be 100644 (file)
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -33,14 +33,14 @@ class IviIE(InfoExtractor):
          },
          # Serial's serie
          {
-            'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
-            'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
+            'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
+            'md5': '221f56b35e3ed815fde2df71032f4b3e',
              'info_dict': {
-                'id': '74791',
+                'id': '9549',
                  'ext': 'mp4',
-                'title': 'Ð\94ÐµÐ¶Ñ\83Ñ\80Ð½Ñ\8bÐ¹ Ð°Ð½Ð³ÐµÐ» - 1 Ñ\81ÐµÑ\80Ð¸Ñ\8f',
-                'duration': 2490,
-                'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
+                'title': 'Ð\94Ð²Ð¾Ðµ Ð¸Ð· Ð»Ð°Ñ\80Ñ\86Ð° - Ð¡ÐµÑ\80Ð¸Ñ\8f 1',
+                'duration': 2655,
+                'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
              },
              'skip': 'Only works from Russia',
           }
diff --git a/youtube_dl/extractor/ku6.py b/youtube_dl/extractor/ku6.py

new file mode 100644 (file)

index 0000000..484239b
--- /dev/null
+++ b/youtube_dl/extractor/ku6.py
@@ -0,0 +1,35 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class Ku6IE(InfoExtractor):
+    _VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
+    _TEST = {
+        'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
+        'md5': '01203549b9efbb45f4b87d55bdea1ed1',
+        'info_dict': {
+            'id': 'JG-8yS14xzBr4bCn1pu0xw',
+            'ext': 'f4v',
+            'title': 'techniques test',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._search_regex(r'<h1 title=.*>(.*?)</h1>', webpage, 'title')
+        dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id
+        jsonData = self._download_json(dataUrl, video_id)
+        downloadUrl = jsonData['data']['f']
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': downloadUrl
+        }
+
diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py

index f819c09b348550c878616151ace346ac1a7ab3a4..7460d81cd501b8c52dcce3caae8313f6854b571a 100644 (file)
--- a/youtube_dl/extractor/mailru.py
+++ b/youtube_dl/extractor/mailru.py
@@ -2,7 +2,6 @@
  from __future__ import unicode_literals
  
  import re
-import datetime
  
  from .common import InfoExtractor
  
@@ -10,28 +9,48 @@ from .common import InfoExtractor
  class MailRuIE(InfoExtractor):
      IE_NAME = 'mailru'
      IE_DESC = 'Видео@Mail.Ru'
-    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
+    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
  
-    _TEST = {
-        'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
-        'md5': 'dea205f03120046894db4ebb6159879a',
-        'info_dict': {
-            'id': '46301138',
-            'ext': 'mp4',
-            'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
-            'upload_date': '20140224',
-            'uploader': 'sonypicturesrus',
-            'uploader_id': 'sonypicturesrus@mail.ru',
-            'duration': 184,
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
+            'md5': 'dea205f03120046894db4ebb6159879a',
+            'info_dict': {
+                'id': '46301138',
+                'ext': 'mp4',
+                'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
+                'timestamp': 1393232740,
+                'upload_date': '20140224',
+                'uploader': 'sonypicturesrus',
+                'uploader_id': 'sonypicturesrus@mail.ru',
+                'duration': 184,
+            },
+        },
+        {
+            'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
+            'md5': '00a91a58c3402204dcced523777b475f',
+            'info_dict': {
+                'id': '46843144',
+                'ext': 'mp4',
+                'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
+                'timestamp': 1397217632,
+                'upload_date': '20140411',
+                'uploader': 'hitech',
+                'uploader_id': 'hitech@corp.mail.ru',
+                'duration': 245,
+            },
+        },
+    ]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = mobj.group('idv1')
+
+        if not video_id:
+            video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
  
          video_data = self._download_json(
-            'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
+            'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
  
          author = video_data['author']
          uploader = author['name']
@@ -40,10 +59,11 @@ class MailRuIE(InfoExtractor):
          movie = video_data['movie']
          content_id = str(movie['contentId'])
          title = movie['title']
+        if title.endswith('.mp4'):
+            title = title[:-4]
          thumbnail = movie['poster']
          duration = movie['duration']
  
-        upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d')
          view_count = video_data['views_count']
  
          formats = [
@@ -57,7 +77,7 @@ class MailRuIE(InfoExtractor):
              'id': content_id,
              'title': title,
              'thumbnail': thumbnail,
-            'upload_date': upload_date,
+            'timestamp': video_data['timestamp'],
              'uploader': uploader,
              'uploader_id': uploader_id,
              'duration': duration,
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py

index 4cab30631956b903682fc2de7aa5dd551bcdd4a3..c0231c197b12b86c669e9cff4b34a5c2ac1639bf 100644 (file)
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -1,4 +1,6 @@
  # encoding: utf-8
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
@@ -12,12 +14,13 @@ class NaverIE(InfoExtractor):
      _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
  
      _TEST = {
-        u'url': u'http://tvcast.naver.com/v/81652',
-        u'file': u'81652.mp4',
-        u'info_dict': {
-            u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
-            u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
-            u'upload_date': u'20130903',
+        'url': 'http://tvcast.naver.com/v/81652',
+        'info_dict': {
+            'id': '81652',
+            'ext': 'mp4',
+            'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+            'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+            'upload_date': '20130903',
          },
      }
  
@@ -28,7 +31,7 @@ class NaverIE(InfoExtractor):
          m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
              webpage)
          if m_id is None:
-            raise ExtractorError(u'couldn\'t extract vid and key')
+            raise ExtractorError('couldn\'t extract vid and key')
          vid = m_id.group(1)
          key = m_id.group(2)
          query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
@@ -39,22 +42,27 @@ class NaverIE(InfoExtractor):
          })
          info = self._download_xml(
              'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
-            video_id, u'Downloading video info')
+            video_id, 'Downloading video info')
          urls = self._download_xml(
              'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
-            video_id, u'Downloading video formats info')
+            video_id, 'Downloading video formats info')
  
          formats = []
          for format_el in urls.findall('EncodingOptions/EncodingOption'):
              domain = format_el.find('Domain').text
-            if domain.startswith('rtmp'):
-                continue
-            formats.append({
+            f = {
                  'url': domain + format_el.find('uri').text,
                  'ext': 'mp4',
                  'width': int(format_el.find('width').text),
                  'height': int(format_el.find('height').text),
-            })
+            }
+            if domain.startswith('rtmp'):
+                f.update({
+                    'ext': 'flv',
+                    'rtmp_protocol': '1', # rtmpt
+                })
+            formats.append(f)
+        self._sort_formats(formats)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py

index 1a63ab56a9b02e9f5aa3d28dde6a57fb8155fc35..aa34665d1669f32ab31a02618c58ef9c4b130fe2 100644 (file)
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -1,6 +1,7 @@
  from __future__ import unicode_literals
  
  import re
+import json
  
  from .common import InfoExtractor
  from ..utils import find_xpath_attr, compat_str
@@ -31,30 +32,68 @@ class NBCIE(InfoExtractor):
  
  
  class NBCNewsIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
+    _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
+        ((video/.+?/(?P<id>\d+))|
+        (feature/[^/]+/(?P<title>.+)))
+        '''
  
-    _TEST = {
-        'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
-        'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
-        'info_dict': {
-            'id': '52753292',
-            'ext': 'flv',
-            'title': 'Crew emerges after four-month Mars food study',
-            'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+    _TESTS = [
+        {
+            'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
+            'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
+            'info_dict': {
+                'id': '52753292',
+                'ext': 'flv',
+                'title': 'Crew emerges after four-month Mars food study',
+                'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+            },
          },
-    }
+        {
+            'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236',
+            'md5': 'b2421750c9f260783721d898f4c42063',
+            'info_dict': {
+                'id': 'I1wpAI_zmhsQ',
+                'ext': 'flv',
+                'title': 'How Twitter Reacted To The Snowden Interview',
+                'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
+            },
+            'add_ie': ['ThePlatform'],
+        },
+    ]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
-        all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
-        info = all_info.find('video')
+        if video_id is not None:
+            all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
+            info = all_info.find('video')
  
-        return {
-            'id': video_id,
-            'title': info.find('headline').text,
-            'ext': 'flv',
-            'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
-            'description': compat_str(info.find('caption').text),
-            'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
-        }
+            return {
+                'id': video_id,
+                'title': info.find('headline').text,
+                'ext': 'flv',
+                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
+                'description': compat_str(info.find('caption').text),
+                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
+            }
+        else:
+            # "feature" pages use theplatform.com
+            title = mobj.group('title')
+            webpage = self._download_webpage(url, title)
+            bootstrap_json = self._search_regex(
+                r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
+                flags=re.MULTILINE)
+            bootstrap = json.loads(bootstrap_json)
+            info = bootstrap['results'][0]['video']
+            playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI'
+            mpxid = info['mpxId']
+            all_videos = self._download_json(playlist_url, title)['videos']
+            # The response contains additional videos
+            info = next(v for v in all_videos if v['mpxId'] == mpxid)
+
+            return {
+                '_type': 'url',
+                # We get the best quality video
+                'url': info['videoAssets'][-1]['publicUrl'],
+                'ie_key': 'ThePlatform',
+            }
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py

index 0650f956481c9011032a278fc1a9375b98e26539..3d6096e46fbe6df0f6885fbdae483f05ac07cf6f 100644 (file)
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    qualities,
+)
  
  
  class NDRIE(InfoExtractor):
@@ -45,17 +49,16 @@ class NDRIE(InfoExtractor):
  
          page = self._download_webpage(url, video_id, 'Downloading page')
  
-        title = self._og_search_title(page)
+        title = self._og_search_title(page).strip()
          description = self._og_search_description(page)
+        if description:
+            description = description.strip()
  
-        mobj = re.search(
-            r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
-            page)
-        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
+        duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False))
  
          formats = []
  
-        mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
+        mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
          if mp3_url:
              formats.append({
                  'url': mp3_url.group('audio'),
@@ -64,13 +67,15 @@ class NDRIE(InfoExtractor):
  
          thumbnail = None
  
-        video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
+        video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
          if video_url:
-            thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
-                page, 'thumbnail', fatal=False)
-            if thumbnail:
-                thumbnail = 'http://www.ndr.de' + thumbnail
-            for format_id in ['lo', 'hi', 'hq']:
+            thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
+            if thumbnails:
+                quality_key = qualities(['xs', 's', 'm', 'l', 'xl'])
+                largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1]))
+                thumbnail = 'http://www.ndr.de' + largest[0]
+
+            for format_id in 'lo', 'hi', 'hq':
                  formats.append({
                      'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
                      'format_id': format_id,
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py

index b1bcb7e54cf3f01989eb17c51160acce680eed2c..1c5e9401f36c72a73a701bdffc89529979a1eaaf 100644 (file)
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@@ -4,9 +4,7 @@ import re
  
  from .brightcove import BrightcoveIE
  from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
+from ..utils import ExtractorError
  
  
  class NownessIE(InfoExtractor):
@@ -14,9 +12,10 @@ class NownessIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
-        'file': '2520295746001.mp4',
-        'md5': '0ece2f70a7bd252c7b00f3070182d418',
+        'md5': '068bc0202558c2e391924cb8cc470676',
          'info_dict': {
+            'id': '2520295746001',
+            'ext': 'mp4',
              'description': 'Candor: The Art of Gesticulation',
              'uploader': 'Nowness',
              'title': 'Candor: The Art of Gesticulation',
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py

index e6d68b8361be664ce98534d21d0921cb549eff2b..3a6a7883e31f2ff309157f0e4d27765ef98fdf1b 100644 (file)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    unified_strdate,
+)
  
  
  class NRKIE(InfoExtractor):
@@ -64,4 +68,78 @@ class NRKIE(InfoExtractor):
              'title': data['title'],
              'description': data['description'],
              'thumbnail': thumbnail,
+        }
+
+
+class NRKTVIE(InfoExtractor):
+    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})'
+
+    _TESTS = [
+        {
+            'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/muhh48000314/23-05-2014',
+            'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',
+            'info_dict': {
+                'id': 'muhh48000314',
+                'ext': 'flv',
+                'title': '20 spørsmål',
+                'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
+                'upload_date': '20140523',
+                'duration': 1741.52,
+            }
+        },
+        {
+            'url': 'http://tv.nrk.no/program/mdfp15000514',
+            'md5': '383650ece2b25ecec996ad7b5bb2a384',
+            'info_dict': {
+                'id': 'mdfp15000514',
+                'ext': 'flv',
+                'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
+                'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
+                'upload_date': '20140524',
+                'duration': 4605.0,
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta('title', page, 'title')
+        description = self._html_search_meta('description', page, 'description')
+        thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
+        upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
+        duration = self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)
+        if duration:
+            duration = float(duration)
+
+        formats = []
+
+        f4m_url = re.search(r'data-media="([^"]+)"', page)
+        if f4m_url:
+            formats.append({
+                'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
+                'format_id': 'f4m',
+                'ext': 'flv',
+            })
+
+        m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
+        if m3u8_url:
+            formats.append({
+                'url': m3u8_url.group(1),
+                'format_id': 'm3u8',
+            })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'duration': duration,
+            'formats': formats,
          }
 \ No newline at end of file
diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py

index f0befa1168f690cb35c47218627d5fc888359224..e3db9fe8c6c643d49aa8c521921bc3f508010f98 100644 (file)
--- a/youtube_dl/extractor/nuvid.py
+++ b/youtube_dl/extractor/nuvid.py
@@ -30,7 +30,7 @@ class NuvidIE(InfoExtractor):
              webpage, 'title').strip()
  
          url_end = self._html_search_regex(
-            r'href="(/mp4/[^"]+)"[^>]*data-link_type="mp4"',
+            r'href="(/[^"]+)"[^>]*data-link_type="mp4"',
              webpage, 'video_url')
          video_url = 'http://m.nuvid.com' + url_end
  
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

index 7dd3dca0de94f41bb82c9baeacc45e5ab14ae0a2..4118ee9560e03d2fa1eea171766ef4893e274aa5 100644 (file)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -45,7 +45,7 @@ class PornHubIE(InfoExtractor):
  
          video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
          video_uploader = self._html_search_regex(
-            r'(?s)<div class="video-info-row">\s*From:&nbsp;.+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
+            r'(?s)From:&nbsp;.+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
              webpage, 'uploader', fatal=False)
          thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
          if thumbnail:
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index d6f453fb969f53d89cf1207999fa4fcbea4ffe5f..25515f0686b0725075005da7f93f17544bd1b1ea 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -12,6 +12,7 @@ from ..utils import (
      compat_urllib_parse,
  
      ExtractorError,
+    int_or_none,
      unified_strdate,
  )
  
@@ -44,7 +45,8 @@ class SoundcloudIE(InfoExtractor):
                  "upload_date": "20121011",
                  "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
                  "uploader": "E.T. ExTerrestrial Music",
-                "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
+                "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1",
+                "duration": 143,
              }
          },
          # not streamable song
@@ -57,6 +59,7 @@ class SoundcloudIE(InfoExtractor):
                  'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
                  'uploader': 'The Royal Concept',
                  'upload_date': '20120521',
+                'duration': 227,
              },
              'params': {
                  # rtmp
@@ -74,6 +77,7 @@ class SoundcloudIE(InfoExtractor):
                  'uploader': 'jaimeMF',
                  'description': 'test chars:  \"\'/\\ä↭',
                  'upload_date': '20131209',
+                'duration': 9,
              },
          },
          # downloadable song
@@ -87,6 +91,7 @@ class SoundcloudIE(InfoExtractor):
                  'description': 'Vocals',
                  'uploader': 'Sim Gretina',
                  'upload_date': '20130815',
+                #'duration': 42,
              },
          },
      ]
@@ -119,6 +124,7 @@ class SoundcloudIE(InfoExtractor):
              'title': info['title'],
              'description': info['description'],
              'thumbnail': thumbnail,
+            'duration': int_or_none(info.get('duration'), 1000),
          }
          formats = []
          if info.get('downloadable', False):
diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py

new file mode 100644 (file)

index 0000000..7f388ac
--- /dev/null
+++ b/youtube_dl/extractor/spiegeltv.py
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+
+
+class SpiegeltvIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)'
+    _TEST = {
+        'url': 'http://www.spiegel.tv/filme/flug-mh370/',
+        'info_dict': {
+            'id': 'flug-mh370',
+            'ext': 'm4v',
+            'title': 'Flug MH370',
+            'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines',
+            'thumbnail': 're:http://.*\.jpg$',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
+
+        apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
+        version_json = self._download_json(
+            '%s/version.json' % apihost, video_id,
+            note='Downloading version information')
+        version_name = version_json['version_name']
+
+        slug_json = self._download_json(
+            '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id),
+            video_id,
+            note='Downloading object information')
+        oid = slug_json['object_id']
+
+        media_json = self._download_json(
+            '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid),
+            video_id, note='Downloading media information')
+        uuid = media_json['uuid']
+        is_wide = media_json['is_wide']
+
+        server_json = self._download_json(
+            'http://www.spiegel.tv/streaming_servers/', video_id,
+            note='Downloading server information')
+        server = server_json[0]['endpoint']
+
+        thumbnails = []
+        for image in media_json['images']:
+            thumbnails.append({
+                'url': image['url'],
+                'width': image['width'],
+                'height': image['height'],
+            })
+
+        description = media_json['subtitle']
+        duration = media_json['duration_in_ms'] / 1000.
+
+        if is_wide:
+            format = '16x9'
+        else:
+            format = '4x3'
+
+        url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v'
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': url,
+            'ext': 'm4v',
+            'description': description,
+            'duration': duration,
+            'thumbnails': thumbnails
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py

index 7362904db50588acb6f1f988b7fc6665f9b8ce2e..73efe95420ff7b83412864de02d8d5601690b537 100644 (file)
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -5,13 +5,16 @@ import re
  import json
  
  from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    compat_str,
+)
  
  
  class StreamCZIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
          'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
          'info_dict': {
@@ -22,7 +25,18 @@ class StreamCZIE(InfoExtractor):
              'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
              'duration': 256,
          },
-    }
+    }, {
+        'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
+        'md5': '246272e753e26bbace7fcd9deca0650c',
+        'info_dict': {
+            'id': '10002447',
+            'ext': 'mp4',
+            'title': 'Kancelář Blaník: Tři roky pro Mazánka',
+            'description': 'md5:9177695a8b756a0a8ab160de4043b392',
+            'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000',
+            'duration': 368,
+        },
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -57,7 +71,7 @@ class StreamCZIE(InfoExtractor):
          self._sort_formats(formats)
  
          return {
-            'id': str(jsonData['id']),
+            'id': compat_str(jsonData['episode_id']),
              'title': self._og_search_title(webpage),
              'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),
              'formats': formats,
diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py

new file mode 100644 (file)

index 0000000..6c688c5
--- /dev/null
+++ b/youtube_dl/extractor/swrmediathek.py
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class SWRMediathekIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+    _TESTS = [{
+        'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
+        'md5': '8c5f6f0172753368547ca8413a7768ac',
+        'info_dict': {
+            'id': '849790d0-dab8-11e3-a953-0026b975f2e6',
+            'ext': 'mp4',
+            'title': 'SWR odysso',
+            'description': 'md5:2012e31baad36162e97ce9eb3f157b8a',
+            'thumbnail': 're:^http:.*\.jpg$',
+            'duration': 2602,
+            'upload_date': '20140515',
+            'uploader': 'SWR Fernsehen',
+            'uploader_id': '990030',
+        },
+    }, {
+        'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
+        'md5': 'b10ab854f912eecc5a6b55cd6fc1f545',
+        'info_dict': {
+            'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
+            'ext': 'mp4',
+            'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen',
+            'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2',
+            'thumbnail': 're:http://.*\.jpg',
+            'duration': 5305,
+            'upload_date': '20140516',
+            'uploader': 'SWR Fernsehen',
+            'uploader_id': '990030',
+        },
+    }, {
+        'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6',
+        'md5': '4382e4ef2c9d7ce6852535fa867a0dd3',
+        'info_dict': {
+            'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6',
+            'ext': 'mp3',
+            'title': 'Saša Stanišic: Vor dem Fest',
+            'description': 'md5:5b792387dc3fbb171eb709060654e8c9',
+            'thumbnail': 're:http://.*\.jpg',
+            'duration': 3366,
+            'upload_date': '20140520',
+            'uploader': 'SWR 2',
+            'uploader_id': '284670',
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        video = self._download_json(
+            'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, video_id, 'Downloading video JSON')
+
+        attr = video['attr']
+        media_type = attr['entry_etype']
+
+        formats = []
+        for entry in video['sub']:
+            if entry['name'] != 'entry_media':
+                continue
+
+            entry_attr = entry['attr']
+            codec = entry_attr['val0']
+            quality = int(entry_attr['val1'])
+
+            fmt = {
+                'url': entry_attr['val2'],
+                'quality': quality,
+            }
+
+            if media_type == 'Video':
+                fmt.update({
+                    'format_note': ['144p', '288p', '544p'][quality-1],
+                    'vcodec': codec,
+                })
+            elif media_type == 'Audio':
+                fmt.update({
+                    'acodec': codec,
+                })
+            formats.append(fmt)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': attr['entry_title'],
+            'description': attr['entry_descl'],
+            'thumbnail': attr['entry_image_16_9'],
+            'duration': parse_duration(attr['entry_durat']),
+            'upload_date': attr['entry_pdatet'][:-4],
+            'uploader': attr['channel_title'],
+            'uploader_id': attr['channel_idkey'],
+            'formats': formats,
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py

new file mode 100644 (file)

index 0000000..3633152
--- /dev/null
+++ b/youtube_dl/extractor/tagesschau.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class TagesschauIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
+
+    _TESTS = [{
+        'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
+        'md5': 'bcdeac2194fb296d599ce7929dfa4009',
+        'info_dict': {
+            'id': '1399128',
+            'ext': 'mp4',
+            'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
+            'description': 'md5:69da3c61275b426426d711bde96463ab',
+            'thumbnail': 're:^http:.*\.jpg$',
+        },
+    }, {
+        'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
+        'md5': '8aaa8bf3ae1ca2652309718c03019128',
+        'info_dict': {
+            'id': '196',
+            'ext': 'mp4',
+            'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt',
+            'description': 'md5:f22e4af75821d174fa6c977349682691',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }]
+
+    _FORMATS = {
+        's': {'width': 256, 'height': 144, 'quality': 1},
+        'm': {'width': 512, 'height': 288, 'quality': 2},
+        'l': {'width': 960, 'height': 544, 'quality': 3},
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        if video_id.startswith('-'):
+            display_id = video_id.strip('-')
+        else:
+            display_id = video_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        playerpage = self._download_webpage(
+            'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
+            display_id, 'Downloading player page')
+
+        medias = re.findall(
+            r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
+            playerpage)
+
+        formats = []
+        for url, ext, res in medias:
+            f = {
+                'format_id': res + '_' + ext,
+                'url': url,
+                'ext': ext,
+            }
+            f.update(self._FORMATS.get(res, {}))
+            formats.append(f)
+
+        self._sort_formats(formats)
+
+        thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+
+        return {
+            'id': display_id,
+            'title': self._og_search_title(webpage).strip(),
+            'thumbnail': 'http://www.tagesschau.de' + thumbnail,
+            'formats': formats,
+            'description': self._og_search_description(webpage).strip(),
+        }
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py

new file mode 100644 (file)

index 0000000..6d52763
--- /dev/null
+++ b/youtube_dl/extractor/teachertube.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    qualities,
+    determine_ext,
+)
+
+
+class TeacherTubeIE(InfoExtractor):
+    IE_NAME = 'teachertube'
+    IE_DESC = 'teachertube.com videos'
+
+    _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
+        'md5': 'f9434ef992fd65936d72999951ee254c',
+        'info_dict': {
+            'id': '339997',
+            'ext': 'mp4',
+            'title': 'Measures of dispersion from a frequency table_x264',
+            'description': 'md5:a3e9853487185e9fcd7181a07164650b',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }, {
+        'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064',
+        'md5': '0d625ec6bc9bf50f70170942ad580676',
+        'info_dict': {
+            'id': '340064',
+            'ext': 'mp4',
+            'title': 'How to Make Paper Dolls _ Paper Art Projects',
+            'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }, {
+        'url': 'http://www.teachertube.com/music.php?music_id=8805',
+        'md5': '01e8352006c65757caf7b961f6050e21',
+        'info_dict': {
+            'id': '8805',
+            'ext': 'mp3',
+            'title': 'PER ASPERA AD ASTRA',
+            'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNIČKE ŠKOLE PER ASPERA AD ASTRA',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        quality = qualities(['mp3', 'flv', 'mp4'])
+
+        formats = [
+            {
+                'url': media_url,
+                'quality': quality(determine_ext(media_url))
+            } for media_url in set(zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))[1])
+        ]
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+        }
+
+
+class TeacherTubeClassroomIE(InfoExtractor):
+    IE_NAME = 'teachertube:classroom'
+    IE_DESC = 'teachertube.com online classrooms'
+
+    _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('user')
+
+        rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id,
+                                      user_id, 'Downloading classroom RSS')
+
+        entries = []
+        for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'):
+            entries.append(self.url_result(url.attrib['url'], 'TeacherTube'))
+
+        return self.playlist_result(entries, user_id)
diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py

new file mode 100644 (file)

index 0000000..117afa9
--- /dev/null
+++ b/youtube_dl/extractor/teachingchannel.py
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class TeachingChannelIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)'
+
+    _TEST = {
+        'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+        'info_dict': {
+            'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
+            'ext': 'mp4',
+            'title': 'A History of Teaming',
+            'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        ooyala_code = self._search_regex(
+            r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code')
+
+        return OoyalaIE._build_url_result(ooyala_code)
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index f15780ef540d1ef39cc3526b15629004dabb73cd..b6b2dba9ca9e6ee02c7dc6b2cf01d3601874a6b2 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import re
  import json
  
@@ -18,17 +20,17 @@ class ThePlatformIE(InfoExtractor):
  
      _TEST = {
          # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
-        u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
-        u'info_dict': {
-            u'id': u'e9I_cZgTgIPd',
-            u'ext': u'flv',
-            u'title': u'Blackberry\'s big, bold Z30',
-            u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
-            u'duration': 247,
+        'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
+        'info_dict': {
+            'id': 'e9I_cZgTgIPd',
+            'ext': 'flv',
+            'title': 'Blackberry\'s big, bold Z30',
+            'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
+            'duration': 247,
          },
-        u'params': {
+        'params': {
              # rtmp download
-            u'skip_download': True,
+            'skip_download': True,
          },
      }
  
@@ -39,7 +41,7 @@ class ThePlatformIE(InfoExtractor):
              error_msg = next(
                  n.attrib['abstract']
                  for n in meta.findall(_x('.//smil:ref'))
-                if n.attrib.get('title') == u'Geographic Restriction')
+                if n.attrib.get('title') == 'Geographic Restriction')
          except StopIteration:
              pass
          else:
@@ -101,8 +103,7 @@ class ThePlatformIE(InfoExtractor):
              config_url = url+ '&form=json'
              config_url = config_url.replace('swf/', 'config/')
              config_url = config_url.replace('onsite/', 'onsite/config/')
-            config_json = self._download_webpage(config_url, video_id, u'Downloading config')
-            config = json.loads(config_json)
+            config = self._download_json(config_url, video_id, 'Downloading config')
              smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
          else:
              smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py

index e4bb3b949081d7e3c9a74632975ca633870a4a38..488b10df96e298c683cd02287e2da0c49f21a1cc 100644 (file)
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -11,29 +11,36 @@ from ..utils import (
  
  
  class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
+    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<videoID>\d+)'
      IE_NAME = 'ustream'
      _TEST = {
          'url': 'http://www.ustream.tv/recorded/20274954',
-        'file': '20274954.flv',
          'md5': '088f151799e8f572f84eb62f17d73e5c',
          'info_dict': {
-            "uploader": "Young Americans for Liberty",
-            "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
+            'id': '20274954',
+            'ext': 'flv',
+            'uploader': 'Young Americans for Liberty',
+            'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
          },
      }
  
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url)
+        video_id = m.group('videoID')
+
+        # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
+        if m.group('type') == 'embed/recorded':
+            video_id = m.group('videoID')
+            desktop_url = 'http://www.ustream.tv/recorded/' + video_id
+            return self.url_result(desktop_url, 'Ustream')
          if m.group('type') == 'embed':
              video_id = m.group('videoID')
              webpage = self._download_webpage(url, video_id)
-            desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
+            desktop_video_id = self._html_search_regex(
+                r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
              desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
              return self.url_result(desktop_url, 'Ustream')
  
-        video_id = m.group('videoID')
-
          video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
          webpage = self._download_webpage(url, video_id)
  
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py

index ea34a8f16c86008d663f3ceb0a7482b242160391..eada13ce920b9f4e892f952242ef87bfac504600 100644 (file)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -16,7 +16,7 @@ class VevoIE(InfoExtractor):
      (currently used by MTVIE)
      """
      _VALID_URL = r'''(?x)
-        (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
+        (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
             https?://cache\.vevo\.com/m/html/embed\.html\?video=|
             https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
             vevo:)
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py

index 85e99e1b02b8ab7e7647d6c91dbad08f4827d5f3..7e00448246beb9ab9b7c25f33b05e6f4f1bb8283 100644 (file)
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -5,18 +5,21 @@ import re
  from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse,
+    ExtractorError,
+    clean_html,
  )
  
  
  class XVideosIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
      _TEST = {
-        'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
-        'file': '939581.flv',
-        'md5': '1d0c835822f0a71a7bf011855db929d0',
+        'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
+        'md5': '4b46ae6ea5e6e9086e714d883313c0c9',
          'info_dict': {
-            "title": "Funny Porns By >>>>S<<<<<< -1",
-            "age_limit": 18,
+            'id': '4588838',
+            'ext': 'flv',
+            'title': 'Biker Takes his Girl',
+            'age_limit': 18,
          }
      }
  
@@ -28,6 +31,10 @@ class XVideosIE(InfoExtractor):
  
          self.report_extraction(video_id)
  
+        mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
+        if mobj:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
+
          # Extract video URL
          video_url = compat_urllib_parse.unquote(
              self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index 393f6ffbe316f4a9dde25ad219f5c0cc00f82a91..d84be25620eecb944845b74299510067772c583f 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -21,7 +21,7 @@ class YahooIE(InfoExtractor):
              'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
              'md5': '4962b075c08be8690a922ee026d05e69',
              'info_dict': {
-                'id': '214727115',
+                'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
                  'ext': 'mp4',
                  'title': 'Julian Smith & Travis Legg Watch Julian Smith',
                  'description': 'Julian and Travis watch Julian Smith',
@@ -31,7 +31,7 @@ class YahooIE(InfoExtractor):
              'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
              'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
              'info_dict': {
-                'id': '103000935',
+                'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
                  'ext': 'mp4',
                  'title': 'Codefellas - The Cougar Lies with Spanish Moss',
                  'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
@@ -58,9 +58,11 @@ class YahooIE(InfoExtractor):
              r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
              default=None)
          if items_json is None:
-            long_id = self._search_regex(
+            CONTENT_ID_REGEXES = [
                  r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
-                webpage, 'content ID')
+                r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"'
+            ]
+            long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
              video_id = long_id
          else:
              items = json.loads(items_json)
@@ -68,9 +70,9 @@ class YahooIE(InfoExtractor):
              # The 'meta' field is not always in the video webpage, we request it
              # from another page
              long_id = info['id']
-        return self._get_info(long_id, video_id)
+        return self._get_info(long_id, video_id, webpage)
  
-    def _get_info(self, long_id, video_id):
+    def _get_info(self, long_id, video_id, webpage):
          query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
                   ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
                   ' AND protocol="http"' % long_id)
@@ -113,7 +115,7 @@ class YahooIE(InfoExtractor):
              'title': meta['title'],
              'formats': formats,
              'description': clean_html(meta['description']),
-            'thumbnail': meta['thumbnail'],
+            'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
          }
  
  
@@ -137,7 +139,7 @@ class YahooNewsIE(YahooIE):
          video_id = mobj.group('id')
          webpage = self._download_webpage(url, video_id)
          long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, 'long id')
-        return self._get_info(long_id, video_id)
+        return self._get_info(long_id, video_id, webpage)
  
  
  class YahooSearchIE(SearchInfoExtractor):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 981ca62c0d9762288e736eab0e68fca5273c8752..7c50881c4453eaff4ac69776fcc2dc94feef8d31 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -223,6 +223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
          '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
          '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
  
          # Dash webm audio
          '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
@@ -1140,7 +1141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
          if mobj is None:
              mobj = re.search(
-                r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded) on (.*?)</strong>',
+                r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
                  video_webpage)
          if mobj is not None:
              upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
@@ -1414,11 +1415,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          title_span = (search_title('playlist-title') or
              search_title('title long-title') or search_title('title'))
          title = clean_html(title_span)
-        video_re = r'''(?x)data-video-username="(.*?)".*?
+        video_re = r'''(?x)data-video-username=".*?".*?
                         href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
-        matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
-        # Some of the videos may have been deleted, their username field is empty
-        ids = [video_id for (username, video_id) in matches if username]
+        ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
          url_results = self._ids_to_results(ids)
  
          return self.playlist_result(url_results, playlist_id, title)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 04a04f579fb72e4fc066909770aadabf50e7b772..47dde62b9cb016aad90bd9779e8a59e5c8b22dec 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2014.05.16.1'
+__version__ = '2014.06.04'
author	Philipp Hagemeister <phihag@phihag.de>
	Sat, 7 Jun 2014 13:53:30 +0000 (15:53 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Sat, 7 Jun 2014 13:53:30 +0000 (15:53 +0200)
CHANGELOG	[deleted file]	patch \| blob \| history
Makefile		patch \| blob \| history
devscripts/release.sh		patch \| blob \| history
test/test_YoutubeDL.py		patch \| blob \| history
test/test_age_restriction.py		patch \| blob \| history
test/test_playlists.py		patch \| blob \| history
test/test_youtube_lists.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/downloader/rtmp.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/aftonbladet.py		patch \| blob \| history
youtube_dl/extractor/ard.py		patch \| blob \| history
youtube_dl/extractor/bandcamp.py		patch \| blob \| history
youtube_dl/extractor/blinkx.py		patch \| blob \| history
youtube_dl/extractor/cinemassacre.py		patch \| blob \| history
youtube_dl/extractor/cmt.py		patch \| blob \| history
youtube_dl/extractor/cnn.py		patch \| blob \| history
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/empflix.py		patch \| blob \| history
youtube_dl/extractor/extremetube.py		patch \| blob \| history
youtube_dl/extractor/fc2.py		patch \| blob \| history
youtube_dl/extractor/gamekings.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/ivi.py		patch \| blob \| history
youtube_dl/extractor/ku6.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/mailru.py		patch \| blob \| history
youtube_dl/extractor/naver.py		patch \| blob \| history
youtube_dl/extractor/nbc.py		patch \| blob \| history
youtube_dl/extractor/ndr.py		patch \| blob \| history
youtube_dl/extractor/nowness.py		patch \| blob \| history
youtube_dl/extractor/nrk.py		patch \| blob \| history
youtube_dl/extractor/nuvid.py		patch \| blob \| history
youtube_dl/extractor/pornhub.py		patch \| blob \| history
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/spiegeltv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/streamcz.py		patch \| blob \| history
youtube_dl/extractor/swrmediathek.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tagesschau.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/teachertube.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/teachingchannel.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/theplatform.py		patch \| blob \| history
youtube_dl/extractor/ustream.py		patch \| blob \| history
youtube_dl/extractor/vevo.py		patch \| blob \| history
youtube_dl/extractor/xvideos.py		patch \| blob \| history
youtube_dl/extractor/yahoo.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history