Merge pull request #2279 from prutz1311/master

author Filippo Valsorda <filosottile.wiki@gmail.com>

Sun, 2 Feb 2014 06:16:37 +0000 (22:16 -0800)

committer Filippo Valsorda <filosottile.wiki@gmail.com>

Sun, 2 Feb 2014 06:16:37 +0000 (22:16 -0800)
author Filippo Valsorda <filosottile.wiki@gmail.com>
Sun, 2 Feb 2014 06:16:37 +0000 (22:16 -0800)
committer Filippo Valsorda <filosottile.wiki@gmail.com>
Sun, 2 Feb 2014 06:16:37 +0000 (22:16 -0800)
diff --git a/README.md b/README.md

index d795ef6f28aa8a3cbc9e7d6c21ce3bc6958aa8de..5200f59fe7b5da4bd5a51374f5eda27afdf4e756 100644 (file)
--- a/README.md
+++ b/README.md
@@ -325,7 +325,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
  
  To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
  
-# BUILD INSTRUCTIONS
+# DEVELOPER INSTRUCTIONS
  
  Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
  
@@ -347,6 +347,10 @@ If you want to create a build of youtube-dl yourself, you'll need
  * zip
  * nosetests
  
+### Adding support for a new site
+
+If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
+
  # BUGS
  
  Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
diff --git a/youtube-dl.plugin.zsh b/youtube-dl.plugin.zsh

new file mode 100644 (file)

index 0000000..4edab52
--- /dev/null
+++ b/youtube-dl.plugin.zsh
@@ -0,0 +1,24 @@
+# This allows the youtube-dl command to be installed in ZSH using antigen.
+# Antigen is a bundle manager. It allows you to enhance the functionality of
+# your zsh session by installing bundles and themes easily.
+
+# Antigen documentation:
+# http://antigen.sharats.me/
+# https://github.com/zsh-users/antigen
+
+# Install youtube-dl:
+# antigen bundle rg3/youtube-dl
+# Bundles installed by antigen are available for use immediately.
+
+# Update youtube-dl (and all other antigen bundles):
+# antigen update
+
+# The antigen command will download the git repository to a folder and then
+# execute an enabling script (this file). The complete process for loading the
+# code is documented here:
+# https://github.com/zsh-users/antigen#notes-on-writing-plugins
+
+# This specific script just aliases youtube-dl to the python script that this
+# library provides. This requires updating the PYTHONPATH to ensure that the
+# full set of code can be located.
+alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 663c8bd7fde121ef5bc6087aef86d352e6662327..4419b21f6c520622eab6f05561ff147a026ee7c2 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -110,6 +110,7 @@ from .khanacademy import KhanAcademyIE
  from .kickstarter import KickStarterIE
  from .keek import KeekIE
  from .la7 import LA7IE
+from .lifenews import LifeNewsIE
  from .liveleak import LiveLeakIE
  from .livestream import LivestreamIE, LivestreamOriginalIE
  from .lynda import (
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py

index d18bc7e0ca5c9d5556d60e354e8b0d9f351e89f7..df2cff81c603b5d85d0f6c82a0f36af509002301 100644 (file)
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -24,5 +24,5 @@ class BloombergIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          name = mobj.group('name')
          webpage = self._download_webpage(url, name)
-        ooyala_code = self._search_regex(r'<source src="http://player.ooyala.com/player/[^/]+/([^".]+)', webpage, u'ooyala url')
-        return OoyalaIE._build_url_result(ooyala_code)
+        ooyala_url = self._twitter_search_player(webpage)
+        return self.url_result(ooyala_url, OoyalaIE.ie_key())
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py

index d10b7bd0cda2161838e83da741fe7a2f6c88f575..1db27026ac7116f5b228843a96ba8861b4df6def 100644 (file)
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -28,7 +28,25 @@ class CollegeHumorIE(InfoExtractor):
              'description': 'This video wasn\'t long enough, so we made it double-spaced.',
              'age_limit': 10,
          },
-    }]
+    },
+    # embedded youtube video
+    {
+        'url': 'http://www.collegehumor.com/embed/6950457',
+        'info_dict': {
+            'id': 'W5gMp3ZjYg4',
+            'ext': 'mp4',
+            'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
+            'uploader': 'Funnyplox TV',
+            'uploader_id': 'funnyploxtv',
+            'description': 'md5:7e8899d3f749db50fa089eb243cba17f',
+            'upload_date': '20140128',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': ['Youtube'],
+    },
+    ]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -38,6 +56,12 @@ class CollegeHumorIE(InfoExtractor):
          data = json.loads(self._download_webpage(
              jsonUrl, video_id, 'Downloading info JSON'))
          vdata = data['video']
+        if vdata.get('youtubeId') is not None:
+            return {
+                '_type': 'url',
+                'url': vdata['youtubeId'],
+                'ie_key': 'Youtube',
+            }
  
          AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
          rating = vdata.get('rating')
@@ -49,7 +73,7 @@ class CollegeHumorIE(InfoExtractor):
          PREFS = {'high_quality': 2, 'low_quality': 0}
          formats = []
          for format_key in ('mp4', 'webm'):
-            for qname, qurl in vdata[format_key].items():
+            for qname, qurl in vdata.get(format_key, {}).items():
                  formats.append({
                      'format_id': format_key + '_' + qname,
                      'url': qurl,
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index 3333d433bc42f89c0dc8dc7ff264010ef5bda9a6..ed3986f313a149f0db4a69dc92762730297ced1a 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -14,7 +14,7 @@ from ..utils import (
  
  
  class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
+    _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
          (video-clips|episodes|cc-studios|video-collections)
          /(?P<title>.*)'''
      _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
@@ -86,7 +86,7 @@ class ComedyCentralShowsIE(InfoExtractor):
  
      @staticmethod
      def _transform_rtmp_url(rtmp_video_url):
-        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
+        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
          if not m:
              raise ExtractorError('Cannot transform RTMP url')
          base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index f7478d4598e8f15ea802ec9451aff53ce452fe7a..2c0c75604b96cfd08a283c0d93ef36f85b525922 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -399,7 +399,7 @@ class InfoExtractor(object):
      # Helper functions for extracting OpenGraph info
      @staticmethod
      def _og_regexes(prop):
-        content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
+        content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
          property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
          template = r'<meta[^>]+?%s[^>]+?%s'
          return [
@@ -465,6 +465,10 @@ class InfoExtractor(object):
          }
          return RATING_TABLE.get(rating.lower(), None)
  
+    def _twitter_search_player(self, html):
+        return self._html_search_meta('twitter:player', html,
+            'twitter card player')
+
      def _sort_formats(self, formats):
          if not formats:
              raise ExtractorError(u'No video formats found')
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py

index 2b66bddbbb7040e2a02041ff9a31ea392eb6ea88..920728e01f1c2b2a6e6b75c3b6740e84b3043897 100644 (file)
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -1,4 +1,6 @@
  # encoding: utf-8
+from __future__ import unicode_literals
+
  import re, base64, zlib
  from hashlib import sha1
  from math import pow, sqrt, floor
@@ -18,29 +20,29 @@ from ..aes import (
  )
  
  class CrunchyrollIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
+    _VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
      _TESTS = [{
-        u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
-        u'file': u'645513.flv',
-        #u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
-        u'info_dict': {
-            u'title': u'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
-            u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
-            u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
-            u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
-            u'upload_date': u'20131013',
+        'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
+        'file': '645513.flv',
+        #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
+        'info_dict': {
+            'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
+            'description': 'md5:2d17137920c64f2f49981a7797d275ef',
+            'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+            'uploader': 'Yomiuri Telecasting Corporation (YTV)',
+            'upload_date': '20131013',
          },
-        u'params': {
+        'params': {
              # rtmp
-            u'skip_download': True,
+            'skip_download': True,
          },
      }]
  
      _FORMAT_IDS = {
-        u'360': (u'60', u'106'),
-        u'480': (u'61', u'106'),
-        u'720': (u'62', u'106'),
-        u'1080': (u'80', u'108'),
+        '360': ('60', '106'),
+        '480': ('61', '106'),
+        '720': ('62', '106'),
+        '1080': ('80', '108'),
      }
  
      def _decrypt_subtitles(self, data, iv, id):
@@ -63,7 +65,7 @@ class CrunchyrollIE(InfoExtractor):
              num3 = key ^ num1
              num4 = num3 ^ (num3 >> 3) ^ num2
              prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
-            shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
+            shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
              # Extend 160 Bit hash to 256 Bit
              return shaHash + [0] * 12
          
@@ -79,93 +81,98 @@ class CrunchyrollIE(InfoExtractor):
  
      def _convert_subtitles_to_srt(self, subtitles):
          i=1
-        output = u''
+        output = ''
          for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
-            start = start.replace(u'.', u',')
-            end = end.replace(u'.', u',')
+            start = start.replace('.', ',')
+            end = end.replace('.', ',')
              text = clean_html(text)
-            text = text.replace(u'\\N', u'\n')
+            text = text.replace('\\N', '\n')
              if not text:
                  continue
-            output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
+            output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
              i+=1
          return output
  
      def _real_extract(self,url):
          mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        if mobj.group('prefix') == 'm':
+            mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
+            webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
+        else:
+            webpage_url = 'http://www.' + mobj.group('url')
  
-        webpage_url = u'http://www.' + mobj.group('url')
-        video_id = mobj.group(u'video_id')
-        webpage = self._download_webpage(webpage_url, video_id)
-        note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
+        webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
+        note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
          if note_m:
              raise ExtractorError(note_m)
  
-        video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
-        video_title = re.sub(r' {2,}', u' ', video_title)
-        video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
+        video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
+        video_title = re.sub(r' {2,}', ' ', video_title)
+        video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
          if not video_description:
              video_description = None
-        video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
+        video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
          if video_upload_date:
              video_upload_date = unified_strdate(video_upload_date)
-        video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
+        video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
  
-        playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
+        playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
          playerdata_req = compat_urllib_request.Request(playerdata_url)
-        playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
-        playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
-        playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
+        playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
+        playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
          
-        stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
-        video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
+        stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
+        video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
  
          formats = []
          for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
              stream_quality, stream_format = self._FORMAT_IDS[fmt]
-            video_format = fmt+u'p'
-            streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
+            video_format = fmt+'p'
+            streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
              # urlencode doesn't work!
-            streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
-            streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
-            streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
-            streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
-            video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
-            video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
+            streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
+            streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+            streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
+            streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
+            video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
+            video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
              formats.append({
-                u'url': video_url,
-                u'play_path':   video_play_path,
-                u'ext': 'flv',
-                u'format': video_format,
-                u'format_id': video_format,
+                'url': video_url,
+                'play_path':   video_play_path,
+                'ext': 'flv',
+                'format': video_format,
+                'format_id': video_format,
              })
  
          subtitles = {}
          for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
-            sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
-                                              video_id, note=u'Downloading subtitles for '+sub_name)
-            id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
-            iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
-            data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
+            sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
+                                              video_id, note='Downloading subtitles for '+sub_name)
+            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
+            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
+            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
              if not id or not iv or not data:
                  continue
              id = int(id)
              iv = base64.b64decode(iv)
              data = base64.b64decode(data)
  
-            subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
-            lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
+            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+            lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False)
              if not lang_code:
                  continue
              subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
  
          return {
-            u'id':          video_id,
-            u'title':       video_title,
-            u'description': video_description,
-            u'thumbnail':   video_thumbnail,
-            u'uploader':    video_uploader,
-            u'upload_date': video_upload_date,
-            u'subtitles':   subtitles,
-            u'formats':     formats,
+            'id':          video_id,
+            'title':       video_title,
+            'description': video_description,
+            'thumbnail':   video_thumbnail,
+            'uploader':    video_uploader,
+            'upload_date': video_upload_date,
+            'subtitles':   subtitles,
+            'formats':     formats,
          }
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py

index b32ff9f867a02147d8ddc145cba685f074e6155c..ae342341c6d5951f0fa40c8437f275fa710cb068 100644 (file)
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -1,4 +1,7 @@
  # encoding: utf-8
+
+from __future__ import unicode_literals
+
  import re
  import json
  
@@ -30,7 +33,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
  
  
  class PluzzIE(FranceTVBaseInfoExtractor):
-    IE_NAME = u'pluzz.francetv.fr'
+    IE_NAME = 'pluzz.francetv.fr'
      _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
  
      # Can't use tests, videos expire in 7 days
@@ -44,17 +47,17 @@ class PluzzIE(FranceTVBaseInfoExtractor):
  
  
  class FranceTvInfoIE(FranceTVBaseInfoExtractor):
-    IE_NAME = u'francetvinfo.fr'
+    IE_NAME = 'francetvinfo.fr'
      _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
  
      _TEST = {
-        u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
-        u'file': u'84981923.mp4',
-        u'info_dict': {
-            u'title': u'Soir 3',
+        'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
+        'file': '84981923.mp4',
+        'info_dict': {
+            'title': 'Soir 3',
          },
-        u'params': {
-            u'skip_download': True,
+        'params': {
+            'skip_download': True,
          },
      }
  
@@ -62,13 +65,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          page_title = mobj.group('title')
          webpage = self._download_webpage(url, page_title)
-        video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
+        video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id')
          return self._extract_video(video_id)
  
  
  class FranceTVIE(FranceTVBaseInfoExtractor):
-    IE_NAME = u'francetv'
-    IE_DESC = u'France 2, 3, 4, 5 and Ô'
+    IE_NAME = 'francetv'
+    IE_DESC = 'France 2, 3, 4, 5 and Ô'
      _VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
          (?:
              emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
@@ -78,73 +81,73 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
      _TESTS = [
          # france2
          {
-            u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
-            u'file': u'75540104.mp4',
-            u'info_dict': {
-                u'title': u'13h15, le samedi...',
-                u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
+            'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
+            'file': '75540104.mp4',
+            'info_dict': {
+                'title': '13h15, le samedi...',
+                'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
              },
-            u'params': {
+            'params': {
                  # m3u8 download
-                u'skip_download': True,
+                'skip_download': True,
              },
          },
          # france3
          {
-            u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
-            u'info_dict': {
-                u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
-                u'ext': u'flv',
-                u'title': u'Le scandale du prix des médicaments',
-                u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
+            'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
+            'info_dict': {
+                'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
+                'ext': 'flv',
+                'title': 'Le scandale du prix des médicaments',
+                'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
              },
-            u'params': {
+            'params': {
                  # rtmp download
-                u'skip_download': True,
+                'skip_download': True,
              },
          },
          # france4
          {
-            u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
-            u'info_dict': {
-                u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
-                u'ext': u'flv',
-                u'title': u'Hero Corp Making of - Extrait 1',
-                u'description': u'md5:c87d54871b1790679aec1197e73d650a',
+            'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+            'info_dict': {
+                'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+                'ext': 'flv',
+                'title': 'Hero Corp Making of - Extrait 1',
+                'description': 'md5:c87d54871b1790679aec1197e73d650a',
              },
-            u'params': {
+            'params': {
                  # rtmp download
-                u'skip_download': True,
+                'skip_download': True,
              },
          },
          # france5
          {
-            u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
-            u'info_dict': {
-                u'id': u'92837968',
-                u'ext': u'mp4',
-                u'title': u'C à dire ?!',
-                u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
+            'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
+            'info_dict': {
+                'id': '92837968',
+                'ext': 'mp4',
+                'title': 'C à dire ?!',
+                'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
              },
-            u'params': {
+            'params': {
                  # m3u8 download
-                u'skip_download': True,
+                'skip_download': True,
              },
          },
          # franceo
          {
-            u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
-            u'info_dict': {
-                u'id': u'92327925',
-                u'ext': u'mp4',
-                u'title': u'Infô-Afrique',
-                u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
+            'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
+            'info_dict': {
+                'id': '92327925',
+                'ext': 'mp4',
+                'title': 'Infô-Afrique',
+                'description': 'md5:ebf346da789428841bee0fd2a935ea55',
              },
-            u'params': {
+            'params': {
                  # m3u8 download
-                u'skip_download': True,
+                'skip_download': True,
              },
-            u'skip': u'The id changes frequently',
+            'skip': 'The id changes frequently',
          },
      ]
  
@@ -160,26 +163,26 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
                   '\.fr/\?id-video=([^"/&]+)'),
                  (r'<a class="video" id="ftv_player_(.+?)"'),
              ]
-            video_id = self._html_search_regex(id_res, webpage, u'video ID')
+            video_id = self._html_search_regex(id_res, webpage, 'video ID')
          else:
              video_id = mobj.group('id')
          return self._extract_video(video_id)
  
  
  class GenerationQuoiIE(InfoExtractor):
-    IE_NAME = u'france2.fr:generation-quoi'
+    IE_NAME = 'france2.fr:generation-quoi'
      _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
  
      _TEST = {
-        u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
-        u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
-        u'info_dict': {
-            u'title': u'Génération Quoi - Garde à Vous',
-            u'uploader': u'Génération Quoi',
+        'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
+        'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
+        'info_dict': {
+            'title': 'Génération Quoi - Garde à Vous',
+            'uploader': 'Génération Quoi',
          },
-        u'params': {
+        'params': {
              # It uses Dailymotion
-            u'skip_download': True,
+            'skip_download': True,
          },
      }
  
@@ -194,20 +197,20 @@ class GenerationQuoiIE(InfoExtractor):
  
  
  class CultureboxIE(FranceTVBaseInfoExtractor):
-    IE_NAME = u'culturebox.francetvinfo.fr'
+    IE_NAME = 'culturebox.francetvinfo.fr'
      _VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
  
      _TEST = {
-        u'url': u'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
-        u'info_dict': {
-            u'id': u'EV_6785',
-            u'ext': u'mp4',
-            u'title': u'Einstein on the beach au Théâtre du Châtelet',
-            u'description': u'md5:9ce2888b1efefc617b5e58b3f6200eeb',
+        'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
+        'info_dict': {
+            'id': 'EV_6785',
+            'ext': 'mp4',
+            'title': 'Einstein on the beach au Théâtre du Châtelet',
+            'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
          },
-        u'params': {
+        'params': {
              # m3u8 download
-            u'skip_download': True,
+            'skip_download': True,
          },
      }
  
@@ -215,5 +218,5 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          name = mobj.group('name')
          webpage = self._download_webpage(url, name)
-        video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, u'video id')
+        video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
          return self._extract_video(video_id)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 48de379b723741ed826e19bc1d4db308cf7bed8e..082da9c77b138f35651e2de8a2d035f8fee1f517 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -38,18 +38,6 @@ class GenericIE(InfoExtractor):
                  'title': 'R\u00e9gis plante sa Jeep',
              }
          },
-        # embedded vimeo video
-        {
-            'add_ie': ['Vimeo'],
-            'url': 'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
-            'file': '22444065.mp4',
-            'md5': '2903896e23df39722c33f015af0666e2',
-            'info_dict': {
-                'title': 'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
-                'uploader_id': 'skillsmatter',
-                'uploader': 'Skills Matter',
-            }
-        },
          # bandcamp page with custom domain
          {
              'add_ie': ['Bandcamp'],
@@ -254,7 +242,7 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded (iframe) Vimeo player
          mobj = re.search(
-            r'<iframe[^>]+?src="((?:https?:)?//player.vimeo.com/video/.+?)"', webpage)
+            r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
          if mobj:
              player_url = unescapeHTML(mobj.group(1))
              surl = smuggle_url(player_url, {'Referer': url})
@@ -262,7 +250,7 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded (swf embed) Vimeo player
          mobj = re.search(
-            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
+            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
          if mobj:
              return self.url_result(mobj.group(1), 'Vimeo')
  
@@ -332,7 +320,7 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group(1), 'Aparat')
  
          # Look for MPORA videos
-        mobj = re.search(r'<iframe .*?src="(http://mpora\.com/videos/[^"]+)"', webpage)
+        mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
          if mobj is not None:
              return self.url_result(mobj.group(1), 'Mpora')
  
@@ -350,7 +338,7 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded Huffington Post player
          mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'HuffPost')
  
@@ -358,7 +346,7 @@ class GenericIE(InfoExtractor):
          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
          if mobj is None:
              # Look for gorilla-vid style embedding
-            mobj = re.search(r'(?s)jw_plugins.*?file:\s*["\'](.*?)["\']', webpage)
+            mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage)
          if mobj is None:
              # Broaden the search a little bit
              mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py

index c79c589c72cac1fef1240aefd6f90a677ad3bdaf..7c208b85d21d58fb47c1be5d2db16598c10d1757 100644 (file)
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -1,27 +1,27 @@
+from __future__ import unicode_literals
+
  import base64
  import re
  
  from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse,
-
-    ExtractorError,
  )
  
  
  class InfoQIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
      _TEST = {
-        u"name": u"InfoQ",
-        u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
-        u"file": u"12-jan-pythonthings.mp4",
-        u"info_dict": {
-            u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
-            u"title": u"A Few of My Favorite [Python] Things"
+        "name": "InfoQ",
+        "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
+        "file": "12-jan-pythonthings.mp4",
+        "info_dict": {
+            "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
+            "title": "A Few of My Favorite [Python] Things",
+        },
+        "params": {
+            "skip_download": True,
          },
-        u"params": {
-            u"skip_download": True
-        }
      }
  
      def _real_extract(self, url):
@@ -31,32 +31,25 @@ class InfoQIE(InfoExtractor):
          self.report_extraction(url)
  
          # Extract video URL
-        mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video url')
-        real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
+        encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
+        real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
          video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
  
          # Extract title
          video_title = self._search_regex(r'contentTitle = "(.*?)";',
-            webpage, u'title')
+            webpage, 'title')
  
          # Extract description
          video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
-            webpage, u'description', fatal=False)
+            webpage, 'description', fatal=False)
  
          video_filename = video_url.split('/')[-1]
          video_id, extension = video_filename.split('.')
  
-        info = {
+        return {
              'id': video_id,
              'url': video_url,
-            'uploader': None,
-            'upload_date': None,
              'title': video_title,
              'ext': extension, # Extension is always(?) mp4, but seems to be flv
-            'thumbnail': None,
              'description': video_description,
          }
-
-        return [info]
-\ No newline at end of file
diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py

new file mode 100644 (file)

index 0000000..4e4035b
--- /dev/null
+++ b/youtube_dl/extractor/lifenews.py
@@ -0,0 +1,63 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class LifeNewsIE(InfoExtractor):
+    IE_NAME = 'lifenews'
+    IE_DESC = 'LIFE | NEWS'
+    _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
+    
+    _TEST = {
+        'url': 'http://lifenews.ru/news/126342',
+        'file': '126342.mp4',
+        'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
+        'info_dict': {
+            'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
+            'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
+            'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
+            'upload_date': '20140130',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
+
+        video_url = self._html_search_regex(
+            r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
+        
+        thumbnail = self._html_search_regex(
+            r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
+
+        title = self._og_search_title(webpage)
+        TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
+        if title.endswith(TITLE_SUFFIX):
+            title = title[:-len(TITLE_SUFFIX)]
+
+        description = self._og_search_description(webpage)
+
+        view_count = self._html_search_regex(
+            r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
+        comment_count = self._html_search_regex(
+            r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
+
+        upload_date = self._html_search_regex(
+            r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'thumbnail': thumbnail,
+            'title': title,
+            'description': description,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'upload_date': unified_strdate(upload_date),
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py

index d01fd01e3a5413b8b98305c94507ce25d8dab44e..4e76c1f4a5f8c0a802ea95eaac6d20eb373998c3 100644 (file)
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@@ -1,5 +1,6 @@
  from __future__ import unicode_literals
  
+import json
  import re
  
  from .common import InfoExtractor
@@ -10,7 +11,7 @@ from ..utils import (
  
  class LiveLeakIE(InfoExtractor):
      _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.liveleak.com/view?i=757_1364311680',
          'file': '757_1364311680.mp4',
          'md5': '0813c2430bea7a46bf13acf3406992f4',
@@ -19,15 +20,37 @@ class LiveLeakIE(InfoExtractor):
              'uploader': 'ljfriel2',
              'title': 'Most unlucky car accident'
          }
-    }
+    },
+    {
+        'url': 'http://www.liveleak.com/view?i=f93_1390833151',
+        'file': 'f93_1390833151.mp4',
+        'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
+        'info_dict': {
+            'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
+            'uploader': 'ARD_Stinkt',
+            'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
+        }
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
  
          video_id = mobj.group('video_id')
          webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(
-            r'file: "(.*?)",', webpage, 'video URL')
+        sources_raw = self._search_regex(
+            r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
+        if sources_raw is None:
+            sources_raw = '[{ %s}]' % (
+                self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
+
+        sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
+        sources = json.loads(sources_json)
+
+        formats = [{
+            'format_note': s.get('label'),
+            'url': s['file'],
+        } for s in sources]
+        self._sort_formats(formats)
  
          video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
          video_description = self._og_search_description(webpage)
@@ -36,9 +59,8 @@ class LiveLeakIE(InfoExtractor):
  
          return {
              'id': video_id,
-            'url': video_url,
-            'ext': 'mp4',
              'title': video_title,
              'description': video_description,
-            'uploader': video_uploader
+            'uploader': video_uploader,
+            'formats': formats,
          }
diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py

index 62e99091d7385aa1ba753183bda319b562efeee4..8c1966ab25e5215ab96286822d50507aa25ea58d 100644 (file)
--- a/youtube_dl/extractor/malemotion.py
+++ b/youtube_dl/extractor/malemotion.py
@@ -16,7 +16,8 @@ class MalemotionIE(InfoExtractor):
          'info_dict': {
              "title": "Bien dur",
              "age_limit": 18,
-        }
+        },
+        'skip': 'This video has been deleted.'
      }
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index f6f31bfdc53c6cb0685cfb74e9cc59b56269d77f..4521451ac933baabce786c76813e3e8407298b65 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -119,7 +119,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
              if mgid.endswith('.swf'):
                  mgid = mgid[:-4]
          except RegexNotFoundError:
-            mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
+            mgid = self._search_regex(
+                [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
+                webpage, u'mgid')
          return self._get_videos_info(mgid)
  
  
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py

index 4becddee604b4ec60a7ffd44c0619a07d31c2514..4fa0575f8a282aa6f8f561a7f18bc0129fceea8c 100644 (file)
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
  import os.path
  
  from .common import InfoExtractor
@@ -11,13 +12,13 @@ from ..utils import (
  class MySpassIE(InfoExtractor):
      _VALID_URL = r'http://www\.myspass\.de/.*'
      _TEST = {
-        u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
-        u'file': u'11741.mp4',
-        u'md5': u'0b49f4844a068f8b33f4b7c88405862b',
-        u'info_dict': {
-            u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", 
-            u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
-        }
+        'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
+        'file': '11741.mp4',
+        'md5': '0b49f4844a068f8b33f4b7c88405862b',
+        'info_dict': {
+            "description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
+            "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
+        },
      }
  
      def _real_extract(self, url):
@@ -37,12 +38,11 @@ class MySpassIE(InfoExtractor):
          # extract values from metadata
          url_flv_el = metadata.find('url_flv')
          if url_flv_el is None:
-            raise ExtractorError(u'Unable to extract download url')
+            raise ExtractorError('Unable to extract download url')
          video_url = url_flv_el.text
-        extension = os.path.splitext(video_url)[1][1:]
          title_el = metadata.find('title')
          if title_el is None:
-            raise ExtractorError(u'Unable to extract title')
+            raise ExtractorError('Unable to extract title')
          title = title_el.text
          format_id_el = metadata.find('format_id')
          if format_id_el is None:
@@ -59,13 +59,12 @@ class MySpassIE(InfoExtractor):
              thumbnail = imagePreview_el.text
          else:
              thumbnail = None
-        info = {
+
+        return {
              'id': video_id,
              'url': video_url,
              'title': title,
-            'ext': extension,
              'format': format,
              'thumbnail': thumbnail,
-            'description': description
+            'description': description,
          }
-        return [info]
diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py

index 0f178905bfe0b049499dd58f71df42da1c419639..7e421610eb6f47054aa1ea1720fc4b2cb4404d49 100644 (file)
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -1,48 +1,39 @@
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
  
  
  class NBAIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
      _TEST = {
-        u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        u'file': u'0021200253-okc-bkn-recap.nba.mp4',
-        u'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
-        u'info_dict': {
-            u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.", 
-            u"title": u"Thunder vs. Nets"
-        }
+        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
+        'file': u'0021200253-okc-bkn-recap.nba.mp4',
+        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
+        'info_dict': {
+            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+            'title': 'Thunder vs. Nets',
+        },
      }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
          video_id = mobj.group(1)
  
          webpage = self._download_webpage(url, video_id)
  
-        video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
+        video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
  
          shortened_video_id = video_id.rpartition('/')[2]
          title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
  
-        # It isn't there in the HTML it returns to us
-        # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
-
          description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
  
-        info = {
+        return {
              'id': shortened_video_id,
              'url': video_url,
              'ext': 'mp4',
              'title': title,
-            # 'uploader_date': uploader_date,
              'description': description,
          }
-        return [info]
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py

index d08e47734c217864a93062acbe87e2e658c57779..44312ba4ecf61220ad21e8d233a40e99960389b2 100644 (file)
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
  from ..utils import unescapeHTML
  
  class OoyalaIE(InfoExtractor):
-    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
+    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
  
      _TEST = {
          # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
diff --git a/youtube_dl/extractor/rbmaradio.py b/youtube_dl/extractor/rbmaradio.py

index 4b6147a73560460988fc356dd5a9684317985b36..b9cb7abd14cef4714a063d1d21d18d14eee60be2 100644 (file)
--- a/youtube_dl/extractor/rbmaradio.py
+++ b/youtube_dl/extractor/rbmaradio.py
@@ -1,3 +1,6 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
  import json
  import re
  
@@ -12,16 +15,16 @@ from ..utils import (
  class RBMARadioIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
      _TEST = {
-        u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
-        u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3',
-        u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95',
-        u'info_dict': {
-            u"uploader_id": u"ford-lopatin", 
-            u"location": u"Spain", 
-            u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", 
-            u"uploader": u"Ford & Lopatin", 
-            u"title": u"Live at Primavera Sound 2011"
-        }
+        'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
+        'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
+        'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
+        'info_dict': {
+            "uploader_id": "ford-lopatin",
+            "location": "Spain",
+            "description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
+            "uploader": "Ford & Lopatin",
+            "title": "Live at Primavera Sound 2011",
+        },
      }
  
      def _real_extract(self, url):
@@ -31,26 +34,24 @@ class RBMARadioIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
-            webpage, u'json data', flags=re.MULTILINE)
+            webpage, 'json data', flags=re.MULTILINE)
  
          try:
              data = json.loads(json_data)
          except ValueError as e:
-            raise ExtractorError(u'Invalid JSON: ' + str(e))
+            raise ExtractorError('Invalid JSON: ' + str(e))
  
          video_url = data['akamai_url'] + '&cbr=256'
          url_parts = compat_urllib_parse_urlparse(video_url)
-        video_ext = url_parts.path.rpartition('.')[2]
-        info = {
-                'id': video_id,
-                'url': video_url,
-                'ext': video_ext,
-                'title': data['title'],
-                'description': data.get('teaser_text'),
-                'location': data.get('country_of_origin'),
-                'uploader': data.get('host', {}).get('name'),
-                'uploader_id': data.get('host', {}).get('slug'),
-                'thumbnail': data.get('image', {}).get('large_url_2x'),
-                'duration': data.get('duration'),
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': data['title'],
+            'description': data.get('teaser_text'),
+            'location': data.get('country_of_origin'),
+            'uploader': data.get('host', {}).get('name'),
+            'uploader_id': data.get('host', {}).get('slug'),
+            'thumbnail': data.get('image', {}).get('large_url_2x'),
+            'duration': data.get('duration'),
          }
-        return [info]
diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py

index c32f64d99791dbc4d866ad0a2bc681fa5ae3609d..4678f62dfadba9968ff363a919471c869eb35c71 100644 (file)
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
@@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):
      IE_NAME = '220.ro'
      _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
      _TEST = {
-        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
-        u'file': u'LYV6doKo7f.mp4',
-        u'md5': u'03af18b73a07b4088753930db7a34add',
-        u'info_dict': {
-            u"title": u"Luati-le Banii sez 4 ep 1",
-            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+        "url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
+        'file': 'LYV6doKo7f.mp4',
+        'md5': '03af18b73a07b4088753930db7a34add',
+        'info_dict': {
+            "title": "Luati-le Banii sez 4 ep 1",
+            "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
          }
      }
  
@@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
          flashVars_str = self._search_regex(
              r'<param name="flashVars" value="([^"]+)"',
-            webpage, u'flashVars')
+            webpage, 'flashVars')
          flashVars = compat_parse_qs(flashVars_str)
  
-        info = {
+        return {
              '_type': 'video',
              'id': video_id,
              'ext': 'mp4',
@@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):
              'description': clean_html(flashVars['desc'][0]),
              'thumbnail': flashVars['preview'][0],
          }
-        return info
diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py

index 051a34d5b8b048db9112ac2df5f3f04115447a87..9156d7fafd6ac2688ab329b31f1b683ab868def0 100644 (file)
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
@@ -6,20 +8,20 @@ from .common import InfoExtractor
  class SpiegelIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
      _TESTS = [{
-        u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
-        u'file': u'1259285.mp4',
-        u'md5': u'2c2754212136f35fb4b19767d242f66e',
-        u'info_dict': {
-            u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
-        }
+        'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
+        'file': '1259285.mp4',
+        'md5': '2c2754212136f35fb4b19767d242f66e',
+        'info_dict': {
+            'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
+        },
      },
      {
-        u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
-        u'file': u'1309159.mp4',
-        u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
-        u'info_dict': {
-            u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
-        }
+        'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
+        'file': '1309159.mp4',
+        'md5': 'f2cdf638d7aa47654e251e1aee360af1',
+        'info_dict': {
+            'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
+        },
      }]
  
      def _real_extract(self, url):
@@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          video_title = self._html_search_regex(
-            r'<div class="module-title">(.*?)</div>', webpage, u'title')
+            r'<div class="module-title">(.*?)</div>', webpage, 'title')
  
-        xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
+        xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
          idoc = self._download_xml(
              xml_url, video_id,
-            note=u'Downloading XML', errnote=u'Failed to download XML')
+            note='Downloading XML', errnote='Failed to download XML')
  
          formats = [
              {
                  'format_id': n.tag.rpartition('type')[2],
-                'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
+                'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
                  'width': int(n.find('./width').text),
                  'height': int(n.find('./height').text),
                  'abr': int(n.find('./audiobitrate').text),
@@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):
  
          self._sort_formats(formats)
  
-        info = {
+        return {
              'id': video_id,
              'title': video_title,
              'duration': duration,
              'formats': formats,
          }
-        return info
diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py

index 4e404fbf5912fd32b695c701466309a38179e799..c980153ec9190640e47f3883d086d8c9d1b56bc0 100644 (file)
--- a/youtube_dl/extractor/tutv.py
+++ b/youtube_dl/extractor/tutv.py
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
  import base64
  import re
  
@@ -6,15 +7,16 @@ from ..utils import (
      compat_parse_qs,
  )
  
+
  class TutvIE(InfoExtractor):
-    _VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
      _TEST = {
-        u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
-        u'file': u'2742556.flv',
-        u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
-        u'info_dict': {
-            u"title": u"Noah en pabellon cuahutemoc"
-        }
+        'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
+        'file': '2742556.flv',
+        'md5': '5eb766671f69b82e528dc1e7769c5cb2',
+        'info_dict': {
+            'title': 'Noah en pabellon cuahutemoc',
+        },
      }
  
      def _real_extract(self, url):
@@ -22,18 +24,15 @@ class TutvIE(InfoExtractor):
          video_id = mobj.group('id')
  
          webpage = self._download_webpage(url, video_id)
-        internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
+        internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
  
-        data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
-        data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
+        data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
+        data_content = self._download_webpage(data_url, video_id, note='Downloading video info')
          data = compat_parse_qs(data_content)
          video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
-        ext = video_url.partition(u'?')[0].rpartition(u'.')[2]
  
-        info = {
+        return {
              'id': internal_id,
              'url': video_url,
-            'ext': ext,
              'title': self._og_search_title(webpage),
          }
-        return [info]
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py

index 74c82587f802acc82e3025235a4bfe92725e8111..7fa2b9e159ed1a60c056140f05f51851663830e9 100644 (file)
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import json
  import re
  
@@ -10,48 +12,48 @@ from ..utils import (
  
  class UstreamIE(InfoExtractor):
      _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
-    IE_NAME = u'ustream'
+    IE_NAME = 'ustream'
      _TEST = {
-        u'url': u'http://www.ustream.tv/recorded/20274954',
-        u'file': u'20274954.flv',
-        u'md5': u'088f151799e8f572f84eb62f17d73e5c',
-        u'info_dict': {
-            u"uploader": u"Young Americans for Liberty", 
-            u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
-        }
+        'url': 'http://www.ustream.tv/recorded/20274954',
+        'file': '20274954.flv',
+        'md5': '088f151799e8f572f84eb62f17d73e5c',
+        'info_dict': {
+            "uploader": "Young Americans for Liberty",
+            "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
+        },
      }
  
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url)
          video_id = m.group('videoID')
  
-        video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
+        video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
          webpage = self._download_webpage(url, video_id)
  
          self.report_extraction(video_id)
  
          video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
-            webpage, u'title')
+            webpage, 'title')
  
          uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
-            webpage, u'uploader', fatal=False, flags=re.DOTALL)
+            webpage, 'uploader', fatal=False, flags=re.DOTALL)
  
          thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
-            webpage, u'thumbnail', fatal=False)
-
-        info = {
-                'id': video_id,
-                'url': video_url,
-                'ext': 'flv',
-                'title': video_title,
-                'uploader': uploader,
-                'thumbnail': thumbnail,
-               }
-        return info
+            webpage, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': video_title,
+            'uploader': uploader,
+            'thumbnail': thumbnail,
+        }
+
  
  class UstreamChannelIE(InfoExtractor):
      _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
-    IE_NAME = u'ustream:channel'
+    IE_NAME = 'ustream:channel'
  
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py

index a4b26a26f4132840c57700fad96785dfb390a8db..f0673972c4632248a367c5ed5ca3de3a1f0a093b 100644 (file)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import re
  import json
  import xml.etree.ElementTree
@@ -22,16 +24,16 @@ class VevoIE(InfoExtractor):
             vevo:)
          (?P<id>[^&?#]+)'''
      _TESTS = [{
-        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
-        u'file': u'GB1101300280.mp4',
-        u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
-        u'info_dict': {
-            u"upload_date": u"20130624",
-            u"uploader": u"Hurts",
-            u"title": u"Somebody to Die For",
-            u"duration": 230.12,
-            u"width": 1920,
-            u"height": 1080,
+        'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
+        'file': 'GB1101300280.mp4',
+        "md5": "06bea460acb744eab74a9d7dcb4bfd61",
+        'info_dict': {
+            "upload_date": "20130624",
+            "uploader": "Hurts",
+            "title": "Somebody to Die For",
+            "duration": 230.12,
+            "width": 1920,
+            "height": 1080,
          }
      }]
      _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
@@ -44,7 +46,7 @@ class VevoIE(InfoExtractor):
                  if version['version'] > last_version['version']:
                      last_version = version
          if last_version['version'] == -1:
-            raise ExtractorError(u'Unable to extract last version of the video')
+            raise ExtractorError('Unable to extract last version of the video')
  
          renditions = xml.etree.ElementTree.fromstring(last_version['data'])
          formats = []
@@ -85,7 +87,7 @@ class VevoIE(InfoExtractor):
              format_url = self._SMIL_BASE_URL + m.group('path')
              formats.append({
                  'url': format_url,
-                'format_id': u'SMIL_' + m.group('cbr'),
+                'format_id': 'SMIL_' + m.group('cbr'),
                  'vcodec': m.group('vcodec'),
                  'acodec': m.group('acodec'),
                  'vbr': int(m.group('vbr')),
@@ -101,26 +103,25 @@ class VevoIE(InfoExtractor):
          video_id = mobj.group('id')
  
          json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
-        info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
-        video_info = json.loads(info_json)['video']
+        video_info = self._download_json(json_url, video_id)['video']
  
          formats = self._formats_from_json(video_info)
          try:
              smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
                  self._SMIL_BASE_URL, video_id, video_id.lower())
              smil_xml = self._download_webpage(smil_url, video_id,
-                                              u'Downloading SMIL info')
+                                              'Downloading SMIL info')
              formats.extend(self._formats_from_smil(smil_xml))
          except ExtractorError as ee:
              if not isinstance(ee.cause, compat_HTTPError):
                  raise
              self._downloader.report_warning(
-                u'Cannot download SMIL information, falling back to JSON ..')
+                'Cannot download SMIL information, falling back to JSON ..')
  
          timestamp_ms = int(self._search_regex(
-            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
+            r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
          upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
-        info = {
+        return {
              'id': video_id,
              'title': video_info['title'],
              'formats': formats,
@@ -129,5 +130,3 @@ class VevoIE(InfoExtractor):
              'uploader': video_info['mainArtists'][0]['artistName'],
              'duration': video_info['duration'],
          }
-
-        return info
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py

index e971b5b4b3a32b801edb12efb429c8228417c307..fcb5ff758deae198614e821dc132871e5fb90679 100644 (file)
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
@@ -9,12 +11,12 @@ from ..utils import (
  class YouJizzIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
      _TEST = {
-        u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
-        u'file': u'2189178.flv',
-        u'md5': u'07e15fa469ba384c7693fd246905547c',
-        u'info_dict': {
-            u"title": u"Zeichentrick 1",
-            u"age_limit": 18,
+        'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
+        'file': '2189178.flv',
+        'md5': '07e15fa469ba384c7693fd246905547c',
+        'info_dict': {
+            "title": "Zeichentrick 1",
+            "age_limit": 18,
          }
      }
  
@@ -30,12 +32,12 @@ class YouJizzIE(InfoExtractor):
  
          # Get the video title
          video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
-            webpage, u'title').strip()
+            webpage, 'title').strip()
  
          # Get the embed page
          result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
          if result is None:
-            raise ExtractorError(u'ERROR: unable to extract embed page')
+            raise ExtractorError('ERROR: unable to extract embed page')
  
          embed_page_url = result.group(0).strip()
          video_id = result.group('videoid')
@@ -47,23 +49,23 @@ class YouJizzIE(InfoExtractor):
          if m_playlist is not None:
              playlist_url = m_playlist.group('playlist')
              playlist_page = self._download_webpage(playlist_url, video_id,
-                                                   u'Downloading playlist page')
+                                                   'Downloading playlist page')
              m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page))
              if len(m_levels) == 0:
-                raise ExtractorError(u'Unable to extract video url')
+                raise ExtractorError('Unable to extract video url')
              videos = [(int(m.group(1)), m.group(2)) for m in m_levels]
              (_, video_url) = sorted(videos)[0]
              video_url = video_url.replace('%252F', '%2F')
          else:
              video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
-                                           webpage, u'video URL')
-
-        info = {'id': video_id,
-                'url': video_url,
-                'title': video_title,
-                'ext': 'flv',
-                'format': 'flv',
-                'player_url': embed_page_url,
-                'age_limit': age_limit}
+                                           webpage, 'video URL')
  
-        return [info]
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'ext': 'flv',
+            'format': 'flv',
+            'player_url': embed_page_url,
+            'age_limit': age_limit,
+        }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 87a5a452e011ba5270b39dd815eb6abc611781c3..54592d174b1b27cbeb55bae783b9549e0ab37dd3 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1662,7 +1662,7 @@ class YoutubeUserIE(InfoExtractor):
                      '_type': 'url',
                      'url': video_id,
                      'ie_key': 'Youtube',
-                    'id': 'video_id',
+                    'id': video_id,
                      'title': title,
                  }
          url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index aab85706a441f6f3cf6026173ab665c2afa73a3b..b5748c14e0bc50b693165177a0e1925e077b7fc2 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2014.01.28.1'
+__version__ = '2014.01.30.2'
author	Filippo Valsorda <filosottile.wiki@gmail.com>
	Sun, 2 Feb 2014 06:16:37 +0000 (22:16 -0800)
committer	Filippo Valsorda <filosottile.wiki@gmail.com>
	Sun, 2 Feb 2014 06:16:37 +0000 (22:16 -0800)
README.md		patch \| blob \| history
youtube-dl.plugin.zsh	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/bloomberg.py		patch \| blob \| history
youtube_dl/extractor/collegehumor.py		patch \| blob \| history
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/crunchyroll.py		patch \| blob \| history
youtube_dl/extractor/francetv.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/infoq.py		patch \| blob \| history
youtube_dl/extractor/lifenews.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/liveleak.py		patch \| blob \| history
youtube_dl/extractor/malemotion.py		patch \| blob \| history
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/myspass.py		patch \| blob \| history
youtube_dl/extractor/nba.py		patch \| blob \| history
youtube_dl/extractor/ooyala.py		patch \| blob \| history
youtube_dl/extractor/rbmaradio.py		patch \| blob \| history
youtube_dl/extractor/ro220.py		patch \| blob \| history
youtube_dl/extractor/spiegel.py		patch \| blob \| history
youtube_dl/extractor/tutv.py		patch \| blob \| history
youtube_dl/extractor/ustream.py		patch \| blob \| history
youtube_dl/extractor/vevo.py		patch \| blob \| history
youtube_dl/extractor/youjizz.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history