Merge remote-tracking branch 'dstftw/generic-webpage-unescape'

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Fri, 21 Mar 2014 21:14:24 +0000 (22:14 +0100)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Fri, 21 Mar 2014 21:14:24 +0000 (22:14 +0100)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 21 Mar 2014 21:14:24 +0000 (22:14 +0100)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 21 Mar 2014 21:14:24 +0000 (22:14 +0100)
diff --git a/README.md b/README.md

index ccd94b2dcd65e70dbb7e262650081b7f7f6e156f..a10b13055694243ba7245d426d5baffc20fb8aaf 100644 (file)
--- a/README.md
+++ b/README.md
@@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like.
                                       an empty string (--proxy "") for direct
                                       connection
      --no-check-certificate           Suppress HTTPS certificate validation.
+    --prefer-insecure                Use an unencrypted connection to retrieve
+                                     information about the video. (Currently
+                                     supported only for YouTube)
      --cache-dir DIR                  Location in the filesystem where youtube-dl
                                       can store some downloaded information
                                       permanently. By default $XDG_CACHE_HOME
@@ -191,9 +194,9 @@ which means you can modify it, redistribute it or use it however you like.
                                       preference using slashes: "-f 22/17/18".
                                       "-f mp4" and "-f flv" are also supported.
                                       You can also use the special names "best",
-                                     "bestaudio", "worst", and "worstaudio". By
-                                     default, youtube-dl will pick the best
-                                     quality.
+                                     "bestvideo", "bestaudio", "worst",
+                                     "worstvideo" and "worstaudio". By default,
+                                     youtube-dl will pick the best quality.
      --all-formats                    download all available video formats
      --prefer-free-formats            prefer free video formats unless a specific
                                       one is requested
diff --git a/devscripts/release.sh b/devscripts/release.sh

index 72e708c7f79c24f06797c4de10e6334ecf6bb9ec..aa3119c424556f2a14a88a2aef61c18b2b44c7bf 100755 (executable)
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -70,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
  git checkout HEAD -- youtube-dl youtube-dl.exe
  
  /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
-for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
+for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
  scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
  ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
  ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
@@ -97,7 +97,7 @@ rm -rf build
  
  make pypi-files
  echo "Uploading to PyPi ..."
-python setup.py sdist upload
+python setup.py sdist bdist_wheel upload
  make clean
  
  /bin/echo -e "\n### DONE!"
diff --git a/setup.cfg b/setup.cfg

new file mode 100644 (file)

index 0000000..e57d130
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[wheel]
+universal = True
diff --git a/test/helper.py b/test/helper.py

index b1f421ac58331bad23328502f42a0e1316df853d..17de951c55fce050111e3889b360729ecf3f7021 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -71,7 +71,7 @@ class FakeYDL(YoutubeDL):
              old_report_warning(message)
          self.report_warning = types.MethodType(report_warning, self)
  
-def get_testcases():
+def gettestcases():
      for ie in youtube_dl.extractor.gen_extractors():
          t = getattr(ie, '_TEST', None)
          if t:
diff --git a/test/test_all_urls.py b/test/test_all_urls.py

index 047e84f192d977a436c28f8caf599981e4b25b26..39ac8b8a1188746348dd1c15058ec71ccc75b769 100644 (file)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -9,7 +9,7 @@ import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  
-from test.helper import get_testcases
+from test.helper import gettestcases
  
  from youtube_dl.extractor import (
      FacebookIE,
@@ -105,7 +105,7 @@ class TestAllURLsMatching(unittest.TestCase):
  
      def test_no_duplicates(self):
          ies = gen_extractors()
-        for tc in get_testcases():
+        for tc in gettestcases():
              url = tc['url']
              for ie in ies:
                  if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
@@ -141,6 +141,7 @@ class TestAllURLsMatching(unittest.TestCase):
      def test_pbs(self):
          # https://github.com/rg3/youtube-dl/issues/2350
          self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
+        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_download.py b/test/test_download.py

index ca8c82f71d614021ed6cdf9894da856453dde380..8fccdaf9e079e23de57856457fbc865be844fecd 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -8,7 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  from test.helper import (
      get_params,
-    get_testcases,
+    gettestcases,
      try_rm,
      md5,
      report_warning
@@ -51,7 +51,7 @@ def _file_md5(fn):
      with open(fn, 'rb') as f:
          return hashlib.md5(f.read()).hexdigest()
  
-defs = get_testcases()
+defs = gettestcases()
  
  
  class TestDownload(unittest.TestCase):
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

deleted file mode 100755 (executable)

index 672ef9e..0000000
--- a/youtube_dl/InfoExtractors.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Legacy file for backwards compatibility, use youtube_dl.extractor instead!
-
-from .extractor.common import InfoExtractor, SearchInfoExtractor
-from .extractor import gen_extractors, get_info_extractor
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 5095f87d29b36287543a0dac86dcf4a50efb68ab..c5d08b0bbabb572c3711d1ae8119e7eeb7e40e71 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -148,6 +148,8 @@ class YoutubeDL(object):
                         again.
      cookiefile:        File name where cookies should be read from and dumped to.
      nocheckcertificate:Do not verify SSL certificates
+    prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
+                       At the moment, this is only supported by YouTube.
      proxy:             URL of the proxy server to use
      socket_timeout:    Time to wait for unresponsive hosts, in seconds
      bidi_workaround:   Work around buggy terminals without bidirectional text
@@ -533,7 +535,7 @@ class YoutubeDL(object):
                  else:
                      raise
          else:
-            self.report_error('no suitable InfoExtractor: %s' % url)
+            self.report_error('no suitable InfoExtractor for URL %s' % url)
  
      def process_ie_result(self, ie_result, download=True, extra_info={}):
          """
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 0e9504c1485cdc4a6ba09968f1331615f0b8942f..a4cbdb0bdbea7ad238fad1e828a14a106b9cdcd4 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -56,7 +56,6 @@ __authors__  = (
  __license__ = 'Public Domain'
  
  import codecs
-import getpass
  import io
  import locale
  import optparse
@@ -68,6 +67,7 @@ import sys
  
  
  from .utils import (
+    compat_getpass,
      compat_print,
      DateRange,
      decodeOption,
@@ -237,6 +237,9 @@ def parseOpts(overrideArguments=None):
          '--proxy', dest='proxy', default=None, metavar='URL',
          help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
      general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
+    general.add_option(
+        '--prefer-insecure', action='store_true', dest='prefer_insecure',
+        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
      general.add_option(
          '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
          help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
@@ -257,7 +260,6 @@ def parseOpts(overrideArguments=None):
          action='store_true',
          help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
  
-
      selection.add_option(
          '--playlist-start',
          dest='playliststart', metavar='NUMBER', default=1, type=int,
@@ -611,7 +613,7 @@ def _real_main(argv=None):
      if opts.usetitle and opts.useid:
          parser.error(u'using title conflicts with using video ID')
      if opts.username is not None and opts.password is None:
-        opts.password = getpass.getpass(u'Type account password and press return:')
+        opts.password = compat_getpass(u'Type account password and press [Return]: ')
      if opts.ratelimit is not None:
          numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
          if numeric_limit is None:
@@ -756,6 +758,7 @@ def _real_main(argv=None):
          'download_archive': download_archive_fn,
          'cookiefile': opts.cookiefile,
          'nocheckcertificate': opts.no_check_certificate,
+        'prefer_insecure': opts.prefer_insecure,
          'proxy': opts.proxy,
          'socket_timeout': opts.socket_timeout,
          'bidi_workaround': opts.bidi_workaround,
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 313414e7d9c667fde3260afdb69b39486f2079d0..d828c6932e40d09bd1de5ce852a0c8748dad3734 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -2,6 +2,7 @@ from .academicearth import AcademicEarthCourseIE
  from .addanime import AddAnimeIE
  from .aftonbladet import AftonbladetIE
  from .anitube import AnitubeIE
+from .aol import AolIE
  from .aparat import AparatIE
  from .appletrailers import AppleTrailersIE
  from .archiveorg import ArchiveOrgIE
@@ -10,6 +11,7 @@ from .arte import (
      ArteTvIE,
      ArteTVPlus7IE,
      ArteTVCreativeIE,
+    ArteTVConcertIE,
      ArteTVFutureIE,
      ArteTVDDCIE,
  )
@@ -63,6 +65,7 @@ from .ehow import EHowIE
  from .eighttracks import EightTracksIE
  from .eitb import EitbIE
  from .elpais import ElPaisIE
+from .engadget import EngadgetIE
  from .escapist import EscapistIE
  from .everyonesmixtape import EveryonesMixtapeIE
  from .exfm import ExfmIE
@@ -71,6 +74,7 @@ from .facebook import FacebookIE
  from .faz import FazIE
  from .firstpost import FirstpostIE
  from .firsttv import FirstTVIE
+from .fivemin import FiveMinIE
  from .fktv import (
      FKTVIE,
      FKTVPosteckeIE,
@@ -173,6 +177,7 @@ from .nowness import NownessIE
  from .nowvideo import NowVideoIE
  from .ooyala import OoyalaIE
  from .orf import ORFIE
+from .parliamentliveuk import ParliamentLiveUKIE
  from .pbs import PBSIE
  from .photobucket import PhotobucketIE
  from .playvid import PlayvidIE
@@ -196,6 +201,7 @@ from .rutube import (
      RutubeMovieIE,
      RutubePersonIE,
  )
+from .rutv import RUTVIE
  from .savefrom import SaveFromIE
  from .servingsys import ServingSysIE
  from .sina import SinaIE
@@ -251,12 +257,13 @@ from .ustream import UstreamIE, UstreamChannelIE
  from .vbox7 import Vbox7IE
  from .veehd import VeeHDIE
  from .veoh import VeohIE
+from .vesti import VestiIE
  from .vevo import VevoIE
-from .vgtrk import VGTRKIE
  from .vice import ViceIE
  from .viddler import ViddlerIE
  from .videobam import VideoBamIE
  from .videodetective import VideoDetectiveIE
+from .videolecturesnet import VideoLecturesNetIE
  from .videofyme import VideofyMeIE
  from .videopremium import VideoPremiumIE
  from .vimeo import (
diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py

new file mode 100644 (file)

index 0000000..abc6689
--- /dev/null
+++ b/youtube_dl/extractor/aol.py
@@ -0,0 +1,28 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .fivemin import FiveMinIE
+
+
+class AolIE(InfoExtractor):
+    IE_NAME = 'on.aol.com'
+    _VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
+
+    _TEST = {
+        'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
+        'md5': '18ef68f48740e86ae94b98da815eec42',
+        'info_dict': {
+            'id': '518167793',
+            'ext': 'mp4',
+            'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
+        },
+        'add_ie': ['FiveMin'],
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        self.to_screen('Downloading 5min.com video %s' % video_id)
+        return FiveMinIE._build_result(video_id)
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py

index d194f25643f2e26839686f372b404293db831ae2..548442166460d894e53d22b7b6bb69f7849f28b0 100644 (file)
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -131,7 +131,7 @@ class ArteTvIE(InfoExtractor):
  
  class ArteTVPlus7IE(InfoExtractor):
      IE_NAME = 'arte.tv:+7'
-    _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
+    _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
  
      @classmethod
      def _extract_url_info(cls, url):
@@ -202,6 +202,8 @@ class ArteTVPlus7IE(InfoExtractor):
                      re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
                      # The version with sourds/mal subtitles has also lower relevance
                      re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
+                    # Prefer http downloads over m3u8
+                    0 if f['url'].endswith('m3u8') else 1,
                  )
          formats = sorted(formats, key=sort_key)
          def _format(format_info):
@@ -242,8 +244,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
  
      _TEST = {
          'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
-        'file': '050489-002.mp4',
          'info_dict': {
+            'id': '050489-002',
+            'ext': 'mp4',
              'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
          },
      }
@@ -255,8 +258,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):
  
      _TEST = {
          'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
-        'file': '050940-003.mp4',
          'info_dict': {
+            'id': '050940-003',
+            'ext': 'mp4',
              'title': 'Les champignons au secours de la planète',
          },
      }
@@ -270,7 +274,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
  
  class ArteTVDDCIE(ArteTVPlus7IE):
      IE_NAME = 'arte.tv:ddc'
-    _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
+    _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
  
      def _real_extract(self, url):
          video_id, lang = self._extract_url_info(url)
@@ -284,3 +288,19 @@ class ArteTVDDCIE(ArteTVPlus7IE):
          javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
          json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
          return self._extract_from_json_url(json_url, video_id, lang)
+
+
+class ArteTVConcertIE(ArteTVPlus7IE):
+    IE_NAME = 'arte.tv:concert'
+    _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
+
+    _TEST = {
+        'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
+        'md5': '9ea035b7bd69696b67aa2ccaaa218161',
+        'info_dict': {
+            'id': '186',
+            'ext': 'mp4',
+            'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
+            'upload_date': '20140128',
+        },
+    }
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index ed3986f313a149f0db4a69dc92762730297ced1a..d50fcdbdbb0fc23becdf6a254769667da44cb9c4 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -14,7 +14,7 @@ from ..utils import (
  
  
  class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
          (video-clips|episodes|cc-studios|video-collections)
          /(?P<title>.*)'''
      _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py

index d65046f588d0bf4481ec6f8d8de7e031e6bdb2f9..2a8eda9eff3ce9364a3e8702c7422cb364dab582 100644 (file)
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -10,9 +10,9 @@ from ..utils import (
  
  
  class CSpanIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
+    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
      IE_DESC = 'C-SPAN'
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
          'md5': '8e44ce11f0f725527daccc453f553eb0',
          'info_dict': {
@@ -22,13 +22,24 @@ class CSpanIE(InfoExtractor):
              'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
          },
          'skip': 'Regularly fails on travis, for unknown reasons',
-    }
+    }, {
+        'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
+        # For whatever reason, the served video alternates between
+        # two different ones
+        #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
+        'info_dict': {
+            'id': '340723',
+            'ext': 'mp4',
+            'title': 'International Health Care Models',
+            'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
+        }
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          page_id = mobj.group('id')
          webpage = self._download_webpage(url, page_id)
-        video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
+        video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
  
          description = self._html_search_regex(
              [
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py

index 4876ecb4812710e2509eec8fc19f00dac60d2fde..6033cd94a1b251d66e7a3f80034bc58b79fa4b55 100644 (file)
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -1,25 +1,28 @@
  # encoding: utf-8
+
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse,
-    determine_ext,
  )
  
  
  class DaumIE(InfoExtractor):
      _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
-    IE_NAME = u'daum.net'
+    IE_NAME = 'daum.net'
  
      _TEST = {
-        u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
-        u'file': u'52554690.mp4',
-        u'info_dict': {
-            u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
-            u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
-            u'upload_date': u'20130831',
-            u'duration': 3868,
+        'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+        'info_dict': {
+            'id': '52554690',
+            'ext': 'mp4',
+            'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
+            'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
+            'upload_date': '20130831',
+            'duration': 3868,
          },
      }
  
@@ -30,14 +33,14 @@ class DaumIE(InfoExtractor):
          webpage = self._download_webpage(canonical_url, video_id)
          full_id = self._search_regex(
              r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
-            webpage, u'full id')
+            webpage, 'full id')
          query = compat_urllib_parse.urlencode({'vid': full_id})
          info = self._download_xml(
              'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
-            u'Downloading video info')
+            'Downloading video info')
          urls = self._download_xml(
              'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
-            video_id, u'Downloading video formats info')
+            video_id, 'Downloading video formats info')
  
          self.to_screen(u'%s: Getting video urls' % video_id)
          formats = []
@@ -53,7 +56,6 @@ class DaumIE(InfoExtractor):
              format_url = url_doc.find('result/url').text
              formats.append({
                  'url': format_url,
-                'ext': determine_ext(format_url),
                  'format_id': profile,
              })
  
diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py

new file mode 100644 (file)

index 0000000..92ada81
--- /dev/null
+++ b/youtube_dl/extractor/engadget.py
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .fivemin import FiveMinIE
+from ..utils import (
+    url_basename,
+)
+
+
+class EngadgetIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://www.engadget.com/
+        (?:video/5min/(?P<id>\d+)|
+            [\d/]+/.*?)
+        '''
+
+    _TEST = {
+        'url': 'http://www.engadget.com/video/5min/518153925/',
+        'md5': 'c6820d4828a5064447a4d9fc73f312c9',
+        'info_dict': {
+            'id': '518153925',
+            'ext': 'mp4',
+            'title': 'Samsung Galaxy Tab Pro 8.4 Review',
+        },
+        'add_ie': ['FiveMin'],
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        if video_id is not None:
+            return FiveMinIE._build_result(video_id)
+        else:
+            title = url_basename(url)
+            webpage = self._download_webpage(url, title)
+            ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
+            return {
+                '_type': 'playlist',
+                'title': title,
+                'entries': [FiveMinIE._build_result(id) for id in ids]
+            }
diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py

new file mode 100644 (file)

index 0000000..215cc83
--- /dev/null
+++ b/youtube_dl/extractor/fivemin.py
@@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+)
+
+
+class FiveMinIE(InfoExtractor):
+    IE_NAME = '5min'
+    _VALID_URL = r'''(?x)
+        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
+            5min:)
+        (?P<id>\d+)
+        '''
+
+    _TEST = {
+        # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
+        'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
+        'md5': '4f7b0b79bf1a470e5004f7112385941d',
+        'info_dict': {
+            'id': '518013791',
+            'ext': 'mp4',
+            'title': 'iPad Mini with Retina Display Review',
+        },
+    }
+
+    @classmethod
+    def _build_result(cls, video_id):
+        return cls.url_result('5min:%s' % video_id, cls.ie_key())
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info = self._download_json(
+            'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
+            'playlist=%s&url=https' % video_id,
+            video_id)['binding'][0]
+
+        second_id = compat_str(int(video_id[:-2]) + 1)
+        formats = []
+        for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
+            if any(r['ID'] == quality for r in info['Renditions']):
+                formats.append({
+                    'format_id': compat_str(quality),
+                    'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
+                    'height': height,
+                })
+
+        return {
+            'id': video_id,
+            'title': info['Title'],
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 0d02f836e122054c8eea8f658d6a4b13a5b97973..23891325677e40165252788bba7a47e4cee4f91c 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -24,6 +24,7 @@ from ..utils import (
  )
  from .brightcove import BrightcoveIE
  from .ooyala import OoyalaIE
+from .rutv import RUTVIE
  
  
  class GenericIE(InfoExtractor):
@@ -101,6 +102,20 @@ class GenericIE(InfoExtractor):
                  'title': '2cc213299525360.mov',  # that's what we get
              },
          },
+        # second style of embedded ooyala videos
+        {
+            'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
+            'info_dict': {
+                'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
+                'ext': 'mp4',
+                'title': 'Behind-the-scenes: Financial Review Sunday ',
+                'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
          # google redirect
          {
              'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -143,6 +158,32 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
                  'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+            },
+        },
+        # RUTV embed
+        {
+            'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
+            'info_dict': {
+                'id': '776940',
+                'ext': 'mp4',
+                'title': 'Охотское море стало целиком российским',
+                'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        # Embedded TED video
+        {
+            'url': 'http://en.support.wordpress.com/videos/ted-talks/',
+            'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
+            'info_dict': {
+                'id': '981',
+                'ext': 'mp4',
+                'title': 'My web playroom',
+                'uploader': 'Ze Frank',
+                'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
              }
          },
          # nowvideo embed hidden behind percent encoding
@@ -155,7 +196,7 @@ class GenericIE(InfoExtractor):
                  'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
                  'description': 'No description',
              },
-        }
+        },
      ]
  
      def report_download_webpage(self, video_id):
@@ -181,9 +222,14 @@ class GenericIE(InfoExtractor):
                      newurl = newurl.replace(' ', '%20')
                      newheaders = dict((k,v) for k,v in req.headers.items()
                                        if k.lower() not in ("content-length", "content-type"))
+                    try:
+                        # This function was deprecated in python 3.3 and removed in 3.4
+                        origin_req_host = req.get_origin_req_host()
+                    except AttributeError:
+                        origin_req_host = req.origin_req_host
                      return HEADRequest(newurl,
                                         headers=newheaders,
-                                       origin_req_host=req.get_origin_req_host(),
+                                       origin_req_host=origin_req_host,
                                         unverifiable=True)
                  else:
                      raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
@@ -340,9 +386,9 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded (iframe) Vimeo player
          mobj = re.search(
-            r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
          if mobj:
-            player_url = unescapeHTML(mobj.group(1))
+            player_url = unescapeHTML(mobj.group('url'))
              surl = smuggle_url(player_url, {'Referer': url})
              return self.url_result(surl, 'Vimeo')
  
@@ -408,9 +454,10 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group('url'))
  
          # Look for Ooyala videos
-        mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
+        mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+             re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
          if mobj is not None:
-            return OoyalaIE._build_url_result(mobj.group(1))
+            return OoyalaIE._build_url_result(mobj.group('ec'))
  
          # Look for Aparat videos
          mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@ -467,6 +514,11 @@ class GenericIE(InfoExtractor):
              return self.playlist_result(
                  urlrs, playlist_id=video_id, playlist_title=video_title)
  
+        # Look for embedded RUTV player
+        rutv_url = RUTVIE._extract_url(webpage)
+        if rutv_url:
+            return self.url_result(rutv_url, 'RUTV')
+
          # Start with something easy: JW Player in SWFObject
          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
          if mobj is None:
@@ -478,6 +530,13 @@ class GenericIE(InfoExtractor):
          if mobj is None:
              # Broaden the search a little bit: JWPlayer JS loader
              mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
+
+        # Look for embedded TED player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'TED')
+
          if mobj is None:
              # Try to find twitter cards info
              mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py

index 2a29e6072bc78b0afe5fe2460cd9d1ea45565203..d1defd363c5fe9c86330236f53b8aa21bfe65a38 100644 (file)
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -48,7 +48,7 @@ class IPrimaIE(InfoExtractor):
  
          webpage = self._download_webpage(url, video_id)
  
-        if re.search(r'Nemáte oprávnění přistupovat na tuto stránku.\s*</div>', webpage):
+        if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
              raise ExtractorError(
                  '%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
  
diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py

index 1b45b67b0579d9fb06462f587651bc8f83e4751d..5341ac773f79fe237626bdfe3243bd1561d8003d 100644 (file)
--- a/youtube_dl/extractor/kontrtube.py
+++ b/youtube_dl/extractor/kontrtube.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..utils import int_or_none
  
  
  class KontrTubeIE(InfoExtractor):
@@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor):
  
          video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
          thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
-        title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
-            'video title')
+        title = self._html_search_regex(
+            r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
          description = self._html_search_meta('description', webpage, 'video description')
  
-        mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
-            webpage)
+        mobj = re.search(
+            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
          duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
  
-        view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
-            'view count', fatal=False)
-        view_count = int(view_count) if view_count is not None else None
+        view_count = self._html_search_regex(
+            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
  
          comment_count = None
-        comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
-            fatal=False)
+        comment_str = self._html_search_regex(
+            r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
          if comment_str.startswith('комментариев нет'):
              comment_count = 0
          else:
              mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
              if mobj:
-                comment_count = int(mobj.group('total'))
+                comment_count = mobj.group('total')
  
          return {
              'id': video_id,
@@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor):
              'title': title,
              'description': description,
              'duration': duration,
-            'view_count': view_count,
-            'comment_count': comment_count,
+            'view_count': int_or_none(view_count),
+            'comment_count': int_or_none(comment_count),
          }
 \ No newline at end of file
diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py

index 1d7aa40ed7f1be0ac0348b18f64fd046fc08204e..b8c892ccefc156a5a28b945384553af70e67a909 100644 (file)
--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@@ -1,6 +1,5 @@
  from __future__ import unicode_literals
  
-import json
  import re
  
  from .common import InfoExtractor
@@ -12,8 +11,9 @@ class NineGagIE(InfoExtractor):
  
      _TEST = {
          "url": "http://9gag.tv/v/1912",
-        "file": "1912.mp4",
          "info_dict": {
+            "id": "1912",
+            "ext": "mp4",
              "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
              "title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
              "view_count": int,
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py

index 44312ba4ecf61220ad21e8d233a40e99960389b2..e20327791c748617363430dee6dd8803f73bda30 100644 (file)
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -1,20 +1,23 @@
+from __future__ import unicode_literals
  import re
  import json
  
  from .common import InfoExtractor
  from ..utils import unescapeHTML
  
+
  class OoyalaIE(InfoExtractor):
      _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
  
      _TEST = {
          # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
-        u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-        u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
-        u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
-        u'info_dict': {
-            u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
-            u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+        'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+        'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
+        'info_dict': {
+            'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+            'ext': 'mp4',
+            'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+            'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
          },
      }
  
@@ -28,13 +31,14 @@ class OoyalaIE(InfoExtractor):
              ie=cls.ie_key())
  
      def _extract_result(self, info, more_info):
-        return {'id': info['embedCode'],
-                'ext': 'mp4',
-                'title': unescapeHTML(info['title']),
-                'url': info.get('ipad_url') or info['url'],
-                'description': unescapeHTML(more_info['description']),
-                'thumbnail': more_info['promo'],
-                }
+        return {
+            'id': info['embedCode'],
+            'ext': 'mp4',
+            'title': unescapeHTML(info['title']),
+            'url': info.get('ipad_url') or info['url'],
+            'description': unescapeHTML(more_info['description']),
+            'thumbnail': more_info['promo'],
+        }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -42,22 +46,23 @@ class OoyalaIE(InfoExtractor):
          player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
          player = self._download_webpage(player_url, embedCode)
          mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
-                                        player, u'mobile player url')
+                                        player, 'mobile player url')
          mobile_player = self._download_webpage(mobile_url, embedCode)
          videos_info = self._search_regex(
              r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
-            mobile_player, u'info').replace('\\"','"')
-        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
+            mobile_player, 'info').replace('\\"','"')
+        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
          videos_info = json.loads(videos_info)
          videos_more_info =json.loads(videos_more_info)
  
          if videos_more_info.get('lineup'):
              videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
-            return {'_type': 'playlist',
-                    'id': embedCode,
-                    'title': unescapeHTML(videos_more_info['title']),
-                    'entries': videos,
-                    }
+            return {
+                '_type': 'playlist',
+                'id': embedCode,
+                'title': unescapeHTML(videos_more_info['title']),
+                'entries': videos,
+            }
          else:
              return self._extract_result(videos_info[0], videos_more_info)
          
diff --git a/youtube_dl/extractor/parliamentliveuk.py b/youtube_dl/extractor/parliamentliveuk.py

new file mode 100644 (file)

index 0000000..02dca14
--- /dev/null
+++ b/youtube_dl/extractor/parliamentliveuk.py
@@ -0,0 +1,57 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unified_strdate,
+)
+
+
+class ParliamentLiveUKIE(InfoExtractor):
+    IE_NAME = 'parliamentlive.tv'
+    IE_DESC = 'UK parliament videos'
+    _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
+        'info_dict': {
+            'id': '15121',
+            'ext': 'asf',
+            'title': 'hoc home affairs committee, 18 mar 2014.pm',
+            'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
+        },
+        'params': {
+            'skip_download': True,  # Requires mplayer (mms)
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        asx_url = self._html_search_regex(
+            r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
+            'metadata URL')
+        asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
+        video_url = asx.find('.//REF').attrib['HREF']
+
+        title = self._search_regex(
+            r'''(?x)player\.setClipDetails\(
+                (?:(?:[0-9]+|"[^"]+"),\s*){2}
+                "([^"]+",\s*"[^"]+)"
+                ''',
+            webpage, 'title').replace('", "', ', ')
+        description = self._html_search_regex(
+            r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
+            webpage, 'description')
+
+        return {
+            'id': video_id,
+            'ext': 'asf',
+            'url': video_url,
+            'title': title,
+            'description': description,
+        }
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py

index e7e0042fb4e39a77061976078d4662a9cc17f522..64cded70789249746a5e2b6604d86563a6ad499c 100644 (file)
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..utils import (
+    US_RATINGS,
+)
  
  
  class PBSIE(InfoExtractor):
@@ -13,7 +16,7 @@ class PBSIE(InfoExtractor):
              # Article with embedded player
             (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
             # Player
-           video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
+           video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
          )
      '''
  
@@ -57,6 +60,11 @@ class PBSIE(InfoExtractor):
          info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
          info = self._download_json(info_url, display_id)
  
+        rating_str = info.get('rating')
+        if rating_str is not None:
+            rating_str = rating_str.rpartition('-')[2]
+        age_limit = US_RATINGS.get(rating_str)
+
          return {
              'id': video_id,
              'title': info['title'],
@@ -65,4 +73,5 @@ class PBSIE(InfoExtractor):
              'description': info['program'].get('description'),
              'thumbnail': info.get('image_url'),
              'duration': info.get('duration'),
+            'age_limit': age_limit,
          }
diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py

new file mode 100644 (file)

index 0000000..5c38cbc
--- /dev/null
+++ b/youtube_dl/extractor/rutv.py
@@ -0,0 +1,183 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none
+)
+
+
+class RUTVIE(InfoExtractor):
+    IE_DESC = 'RUTV.RU'
+    _VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)'
+
+    _TESTS = [
+        {
+            'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
+            'info_dict': {
+                'id': '774471',
+                'ext': 'mp4',
+                'title': 'Монологи на все времена',
+                'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
+                'duration': 2906,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
+            'info_dict': {
+                'id': '774016',
+                'ext': 'mp4',
+                'title': 'Чужой в семье Сталина',
+                'description': '',
+                'duration': 2539,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
+            'info_dict': {
+                'id': '766888',
+                'ext': 'mp4',
+                'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
+                'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
+                'duration': 279,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
+            'info_dict': {
+                'id': '771852',
+                'ext': 'mp4',
+                'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
+                'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
+                'duration': 3096,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
+            'info_dict': {
+                'id': '51499',
+                'ext': 'flv',
+                'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+                'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'Translation has finished',
+        },
+    ]
+
+    @classmethod
+    def _extract_url(cls, webpage):
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage)
+        if mobj:
+            return mobj.group('url')
+
+        mobj = re.search(
+            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        video_type = mobj.group('type')
+
+        if not video_type or video_type == 'swf':
+            video_type = 'video'
+
+        json_data = self._download_json(
+            'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
+            video_id, 'Downloading JSON')
+
+        if json_data['errors']:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
+
+        playlist = json_data['data']['playlist']
+        medialist = playlist['medialist']
+        media = medialist[0]
+
+        if media['errors']:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
+
+        view_count = playlist.get('count_views')
+        priority_transport = playlist['priority_transport']
+
+        thumbnail = media['picture']
+        width = int_or_none(media['width'])
+        height = int_or_none(media['height'])
+        description = media['anons']
+        title = media['title']
+        duration = int_or_none(media.get('duration'))
+
+        formats = []
+
+        for transport, links in media['sources'].items():
+            for quality, url in links.items():
+                if transport == 'rtmp':
+                    mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
+                    if not mobj:
+                        continue
+                    fmt = {
+                        'url': mobj.group('url'),
+                        'play_path': mobj.group('playpath'),
+                        'app': mobj.group('app'),
+                        'page_url': 'http://player.rutv.ru',
+                        'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
+                        'rtmp_live': True,
+                        'ext': 'flv',
+                        'vbr': int(quality),
+                    }
+                elif transport == 'm3u8':
+                    fmt = {
+                        'url': url,
+                        'ext': 'mp4',
+                    }
+                else:
+                    fmt = {
+                        'url': url
+                    }
+                fmt.update({
+                    'width': width,
+                    'height': height,
+                    'format_id': '%s-%s' % (transport, quality),
+                    'preference': -1 if priority_transport == transport else -2,
+                })
+                formats.append(fmt)
+
+        if not formats:
+            raise ExtractorError('No media links available for %s' % video_id)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'view_count': view_count,
+            'duration': duration,
+            'formats': formats,
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index cf10be2d02cb5bc538e2561eee2c3fb5e47643c6..ad1a46c3385713056b94d2f00e38558e8ea69b1c 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -11,7 +11,9 @@ from ..utils import (
  
  
  class TEDIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'''(?x)http://www\.ted\.com/
+    _VALID_URL = r'''(?x)
+        (?P<proto>https?://)
+        (?P<type>www|embed)(?P<urlmain>\.ted\.com/
          (
              (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
              |
@@ -19,6 +21,7 @@ class TEDIE(SubtitlesInfoExtractor):
          )
          (/lang/(.*?))? # The url may contain the language
          /(?P<name>\w+) # Here goes the name and then ".html"
+        .*)$
          '''
      _TEST = {
          'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
@@ -48,6 +51,9 @@ class TEDIE(SubtitlesInfoExtractor):
  
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url, re.VERBOSE)
+        if m.group('type') == 'embed':
+            desktop_url = m.group('proto') + 'www' + m.group('urlmain')
+            return self.url_result(desktop_url, 'TED')
          name = m.group('name')
          if m.group('type_talk'):
              return self._talk_info(url, name)
@@ -93,11 +99,14 @@ class TEDIE(SubtitlesInfoExtractor):
              self._list_available_subtitles(video_id, talk_info)
              return
  
+        thumbnail = talk_info['thumb']
+        if not thumbnail.startswith('http'):
+            thumbnail = 'http://' + thumbnail
          return {
              'id': video_id,
              'title': talk_info['title'],
              'uploader': talk_info['speaker'],
-            'thumbnail': talk_info['thumb'],
+            'thumbnail': thumbnail,
              'description': self._og_search_description(webpage),
              'subtitles': video_subtitles,
              'formats': formats,
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py

index 35df918b879ced4b7f2d7e90f1e21d15479504ca..054f427252341306edd7698e6d150bcb619b64fc 100644 (file)
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@@ -16,7 +16,7 @@ class UdemyIE(InfoExtractor):
      _LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
      _NETRC_MACHINE = 'udemy'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
          'md5': '98eda5b657e752cf945d8445e261b5c5',
          'info_dict': {
@@ -27,7 +27,7 @@ class UdemyIE(InfoExtractor):
              'duration': 579.29,
          },
          'skip': 'Requires udemy account credentials',
-    }
+    }]
  
      def _handle_error(self, response):
          if not isinstance(response, dict):
@@ -129,6 +129,7 @@ class UdemyCourseIE(UdemyIE):
      _VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)'
      _SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<'
      _ALREADY_ENROLLED = '>You are already taking this course.<'
+    _TESTS = []
  
      @classmethod
      def suitable(cls, url):
diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py

new file mode 100644 (file)

index 0000000..27f9acb
--- /dev/null
+++ b/youtube_dl/extractor/vesti.py
@@ -0,0 +1,121 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from .rutv import RUTVIE
+
+
+class VestiIE(InfoExtractor):
+    IE_DESC = 'Вести.Ru'
+    _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
+
+    _TESTS = [
+        {
+            'url': 'http://www.vesti.ru/videos?vid=575582&cid=1',
+            'info_dict': {
+                'id': '765035',
+                'ext': 'mp4',
+                'title': 'Вести.net: биткоины в России не являются законными',
+                'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b',
+                'duration': 302,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.vesti.ru/doc.html?id=1349233',
+            'info_dict': {
+                'id': '773865',
+                'ext': 'mp4',
+                'title': 'Участники митинга штурмуют Донецкую областную администрацию',
+                'description': 'md5:1a160e98b3195379b4c849f2f4958009',
+                'duration': 210,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.vesti.ru/only_video.html?vid=576180',
+            'info_dict': {
+                'id': '766048',
+                'ext': 'mp4',
+                'title': 'США заморозило, Британию затопило',
+                'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1',
+                'duration': 87,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://hitech.vesti.ru/news/view/id/4000',
+            'info_dict': {
+                'id': '766888',
+                'ext': 'mp4',
+                'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
+                'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
+                'duration': 279,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
+            'info_dict': {
+                'id': '766403',
+                'ext': 'mp4',
+                'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
+                'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
+                'duration': 271,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+            'skip': 'Blocked outside Russia',
+        },
+        {
+            'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
+            'info_dict': {
+                'id': '51499',
+                'ext': 'flv',
+                'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
+                'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'Translation has finished'
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id, 'Downloading page')
+
+        mobj = re.search(
+            r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
+            page)
+        if mobj:
+            video_id = mobj.group('id')
+            page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
+                'Downloading video page')
+
+        rutv_url = RUTVIE._extract_url(page)
+        if rutv_url:
+            return self.url_result(rutv_url, 'RUTV')
+
+        raise ExtractorError('No video found', expected=True)
+\ No newline at end of file
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py

index c3360f16676183b868eeb495b5552e9a459477c3..ee47c30bab9bd37a5d8e75109f29108864db68d0 100644 (file)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -21,6 +21,7 @@ class VevoIE(InfoExtractor):
             https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
             vevo:)
          (?P<id>[^&?#]+)'''
+
      _TESTS = [{
          'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
          "md5": "06bea460acb744eab74a9d7dcb4bfd61",
@@ -33,7 +34,8 @@ class VevoIE(InfoExtractor):
              "duration": 230.12,
              "width": 1920,
              "height": 1080,
-            'timestamp': 1372057200,
+            # timestamp and upload_date are often incorrect; seem to change randomly
+            'timestamp': int,
          }
      }, {
          'note': 'v3 SMIL format',
@@ -47,7 +49,7 @@ class VevoIE(InfoExtractor):
              'title': 'I Wish I Could Break Your Heart',
              'duration': 226.101,
              'age_limit': 0,
-            'timestamp': 1392796919,
+            'timestamp': int,
          }
      }, {
          'note': 'Age-limited video',
@@ -58,7 +60,6 @@ class VevoIE(InfoExtractor):
              'age_limit': 18,
              'title': 'Tunnel Vision (Explicit)',
              'uploader': 'Justin Timberlake',
-            # timestamp and upload_date are often incorrect; seem to change randomly
              'upload_date': 're:2013070[34]',
              'timestamp': int,
          },
diff --git a/youtube_dl/extractor/vgtrk.py b/youtube_dl/extractor/vgtrk.py

deleted file mode 100644 (file)

index 429b8bc..0000000
--- a/youtube_dl/extractor/vgtrk.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    int_or_none
-)
-
-
-class VGTRKIE(InfoExtractor):
-    IE_DESC = 'ВГТРК'
-    _VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia2?\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)'
-
-    _TESTS = [
-        {
-            'url': 'http://www.vesti.ru/videos?vid=575582&cid=1',
-            'info_dict': {
-                'id': '765035',
-                'ext': 'mp4',
-                'title': 'Вести.net: биткоины в России не являются законными',
-                'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b',
-                'duration': 302,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://www.vesti.ru/doc.html?id=1349233',
-            'info_dict': {
-                'id': '773865',
-                'ext': 'mp4',
-                'title': 'Участники митинга штурмуют Донецкую областную администрацию',
-                'description': 'md5:1a160e98b3195379b4c849f2f4958009',
-                'duration': 210,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://www.vesti.ru/only_video.html?vid=576180',
-            'info_dict': {
-                'id': '766048',
-                'ext': 'mp4',
-                'title': 'США заморозило, Британию затопило',
-                'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1',
-                'duration': 87,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://hitech.vesti.ru/news/view/id/4000',
-            'info_dict': {
-                'id': '766888',
-                'ext': 'mp4',
-                'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
-                'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
-                'duration': 279,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
-            'info_dict': {
-                'id': '766403',
-                'ext': 'mp4',
-                'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
-                'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
-                'duration': 271,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-            'skip': 'Blocked outside Russia',
-        },
-        {
-            'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
-            'info_dict': {
-                'id': '51499',
-                'ext': 'flv',
-                'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
-                'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
-            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
-            'skip': 'Translation has finished'
-        },
-        {
-            'url': 'http://russia.tv/video/show/brand_id/5169/episode_id/970443/video_id/975648',
-            'info_dict': {
-                'id': '771852',
-                'ext': 'mp4',
-                'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
-                'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
-                'duration': 3096,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://russia.tv/brand/show/brand_id/57638',
-            'info_dict': {
-                'id': '774016',
-                'ext': 'mp4',
-                'title': 'Чужой в семье Сталина',
-                'description': '',
-                'duration': 2539,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://2.russia.tv/video/show/brand_id/48863/episode_id/972920/video_id/978667/viewtype/picture',
-            'info_dict': {
-                'id': '775081',
-                'ext': 'mp4',
-                'title': 'XXII зимние Олимпийские игры. Россияне заняли весь пьедестал в лыжных гонках',
-                'description': 'md5:15d3741dd8d04b203fbc031c6a47fb0f',
-                'duration': 101,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-            'skip': 'Blocked outside Russia',
-        },
-        {
-            'url': 'http://tvkultura.ru/video/show/brand_id/31724/episode_id/972347/video_id/978186',
-            'info_dict': {
-                'id': '774471',
-                'ext': 'mp4',
-                'title': 'Монологи на все времена',
-                'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
-                'duration': 2906,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://rutv.ru/brand/show/id/6792/channel/75',
-            'info_dict': {
-                'id': '125521',
-                'ext': 'mp4',
-                'title': 'Грустная дама червей. Х/ф',
-                'description': '',
-                'duration': 4882,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-    ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage(url, video_id, 'Downloading page')
-
-        mobj = re.search(
-            r'<meta property="og:video" content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
-            page)
-        if mobj:
-            video_id = mobj.group('id')
-            page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
-                'Downloading video page')
-
-        mobj = re.search(
-            r'<meta property="og:video" content="http://player\.rutv\.ru/flash2v/container\.swf\?id=(?P<id>\d+)', page)
-        if mobj:
-            video_type = 'video'
-            video_id = mobj.group('id')
-        else:
-            mobj = re.search(
-                r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>',
-                page)
-
-            if not mobj:
-                raise ExtractorError('No media found', expected=True)
-
-            video_type = mobj.group('type')
-            video_id = mobj.group('id')
-
-        json_data = self._download_json(
-            'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
-            video_id, 'Downloading JSON')
-
-        if json_data['errors']:
-            raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True)
-
-        playlist = json_data['data']['playlist']
-        medialist = playlist['medialist']
-        media = medialist[0]
-
-        if media['errors']:
-            raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True)
-
-        view_count = playlist.get('count_views')
-        priority_transport = playlist['priority_transport']
-
-        thumbnail = media['picture']
-        width = int_or_none(media['width'])
-        height = int_or_none(media['height'])
-        description = media['anons']
-        title = media['title']
-        duration = int_or_none(media.get('duration'))
-
-        formats = []
-
-        for transport, links in media['sources'].items():
-            for quality, url in links.items():
-                if transport == 'rtmp':
-                    mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
-                    if not mobj:
-                        continue
-                    fmt = {
-                        'url': mobj.group('url'),
-                        'play_path': mobj.group('playpath'),
-                        'app': mobj.group('app'),
-                        'page_url': 'http://player.rutv.ru',
-                        'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
-                        'rtmp_live': True,
-                        'ext': 'flv',
-                        'vbr': int(quality),
-                    }
-                elif transport == 'm3u8':
-                    fmt = {
-                        'url': url,
-                        'ext': 'mp4',
-                    }
-                else:
-                    fmt = {
-                        'url': url
-                    }
-                fmt.update({
-                    'width': width,
-                    'height': height,
-                    'format_id': '%s-%s' % (transport, quality),
-                    'preference': -1 if priority_transport == transport else -2,
-                })
-                formats.append(fmt)
-
-        if not formats:
-            raise ExtractorError('No media links available for %s' % video_id)
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'view_count': view_count,
-            'duration': duration,
-            'formats': formats,
-        }
-\ No newline at end of file
diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py

new file mode 100644 (file)

index 0000000..f8b946a
--- /dev/null
+++ b/youtube_dl/extractor/videolecturesnet.py
@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    find_xpath_attr,
+    int_or_none,
+    parse_duration,
+    unified_strdate,
+)
+
+
+class VideoLecturesNetIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
+    IE_NAME = 'videolectures.net'
+
+    _TEST = {
+        'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
+        'info_dict': {
+            'id': 'promogram_igor_mekjavic_eng',
+            'ext': 'mp4',
+            'title': 'Automatics, robotics and biocybernetics',
+            'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+            'upload_date': '20130627',
+            'duration': 565,
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
+        smil = self._download_xml(smil_url, video_id)
+
+        title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
+        description = find_xpath_attr(smil, './/meta', 'name', 'abstract').attrib['content']
+        upload_date = unified_strdate(
+            find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
+
+        switch = smil.find('.//switch')
+        duration = parse_duration(switch.attrib.get('dur'))
+        thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
+        thumbnail = (
+            None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
+
+        formats = [{
+            'url': v.attrib['src'],
+            'width': int_or_none(v.attrib.get('width')),
+            'height': int_or_none(v.attrib.get('height')),
+            'filesize': int_or_none(v.attrib.get('size')),
+            'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
+            'ext': v.attrib.get('ext'),
+        } for v in switch.findall('./video')
+            if v.attrib.get('proto') == 'http']
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'upload_date': upload_date,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

index 2206a06d59f57093f59135f6faa8d68381695a95..15f31529822bcba124cfb12bcb9e56566b3bfba7 100644 (file)
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,29 +1,33 @@
+from __future__ import unicode_literals
+
  import re
  
  from ..utils import (
      ExtractorError,
      unescapeHTML,
      unified_strdate,
+    US_RATINGS,
  )
  from .subtitles import SubtitlesInfoExtractor
  
  
  class VikiIE(SubtitlesInfoExtractor):
-    IE_NAME = u'viki'
+    IE_NAME = 'viki'
  
      _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
      _TEST = {
-        u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
-        u'file': u'1023585v.mp4',
-        u'md5': u'a21454021c2646f5433514177e2caa5f',
-        u'info_dict': {
-            u'title': u'Heirs Episode 14',
-            u'uploader': u'SBS',
-            u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
-            u'upload_date': u'20131121',
-            u'age_limit': 13,
+        'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
+        'md5': 'a21454021c2646f5433514177e2caa5f',
+        'info_dict': {
+            'id': '1023585v',
+            'ext': 'mp4',
+            'title': 'Heirs Episode 14',
+            'uploader': 'SBS',
+            'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+            'upload_date': '20131121',
+            'age_limit': 13,
          },
-        u'skip': u'Blocked in the US',
+        'skip': 'Blocked in the US',
      }
  
      def _real_extract(self, url):
@@ -44,28 +48,21 @@ class VikiIE(SubtitlesInfoExtractor):
  
          rating_str = self._html_search_regex(
              r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
-            u'rating information', default='').strip()
-        RATINGS = {
-            'G': 0,
-            'PG': 10,
-            'PG-13': 13,
-            'R': 16,
-            'NC': 18,
-        }
-        age_limit = RATINGS.get(rating_str)
+            'rating information', default='').strip()
+        age_limit = US_RATINGS.get(rating_str)
  
          info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
          info_webpage = self._download_webpage(
-            info_url, video_id, note=u'Downloading info page')
+            info_url, video_id, note='Downloading info page')
          if re.match(r'\s*<div\s+class="video-error', info_webpage):
              raise ExtractorError(
-                u'Video %s is blocked from your location.' % video_id,
+                'Video %s is blocked from your location.' % video_id,
                  expected=True)
          video_url = self._html_search_regex(
-            r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
+            r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
  
          upload_date_str = self._html_search_regex(
-            r'"created_at":"([^"]+)"', info_webpage, u'upload date')
+            r'"created_at":"([^"]+)"', info_webpage, 'upload date')
          upload_date = (
              unified_strdate(upload_date_str)
              if upload_date_str is not None
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py

index d3eefd086720a82aa5f78c52d941fc6aee35c655..cdc0059f6153714dbb7089f3135c01293edd3126 100644 (file)
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -1,11 +1,10 @@
  from __future__ import unicode_literals
  
-import os
  import re
+import json
  
  from .common import InfoExtractor
  from ..utils import (
-    compat_urllib_parse_urlparse,
      compat_urllib_request,
      parse_duration,
      str_to_int,
@@ -42,7 +41,6 @@ class XTubeIE(InfoExtractor):
              r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
          video_description = self._html_search_regex(
              r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
-        video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
          duration = parse_duration(self._html_search_regex(
              r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
          view_count = self._html_search_regex(
@@ -54,12 +52,18 @@ class XTubeIE(InfoExtractor):
          if comment_count:
              comment_count = str_to_int(comment_count)
  
-        path = compat_urllib_parse_urlparse(video_url).path
-        extension = os.path.splitext(path)[1][1:]
-        format = path.split('/')[5].split('_')[:2]
-        format[0] += 'p'
-        format[1] += 'k'
-        format = "-".join(format)
+        player_quality_option = json.loads(self._html_search_regex(
+            r'playerQualityOption = ({.+?});', webpage, 'player quality option'))
+
+        QUALITIES = ['3gp', 'mp4_normal', 'mp4_high', 'flv', 'mp4_ultra', 'mp4_720', 'mp4_1080']
+        formats = [
+            {
+                'url': url,
+                'format_id': format_id,
+                'preference': QUALITIES.index(format_id) if format_id in QUALITIES else -1,
+            } for format_id, url in player_quality_option.items()
+        ]
+        self._sort_formats(formats)
  
          return {
              'id': video_id,
@@ -69,9 +73,6 @@ class XTubeIE(InfoExtractor):
              'duration': duration,
              'view_count': view_count,
              'comment_count': comment_count,
-            'url': video_url,
-            'ext': extension,
-            'format': format,
-            'format_id': format,
+            'formats': formats,
              'age_limit': 18,
-        }
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py

index 77ad423c44b38af655fc14a8918dfbcf677ca936..d456c4da522d689ac7bcbd33c5f8a3b1204c3b00 100644 (file)
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -1,3 +1,6 @@
+from __future__ import unicode_literals
+
+
  import json
  import re
  import sys
@@ -17,24 +20,25 @@ from ..aes import (
  
  
  class YouPornIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
+    _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
      _TEST = {
-        u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
-        u'file': u'505835.mp4',
-        u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
-        u'info_dict': {
-            u"upload_date": u"20101221",
-            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
-            u"uploader": u"Ask Dan And Jennifer",
-            u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
-            u"age_limit": 18,
+        'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+        'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
+        'info_dict': {
+            'id': '505835',
+            'ext': 'mp4',
+            'upload_date': '20101221',
+            'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
+            'uploader': 'Ask Dan And Jennifer',
+            'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
+            'age_limit': 18,
          }
      }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        url = mobj.group('proto') + 'www.' + mobj.group('url')
  
          req = compat_urllib_request.Request(url)
          req.add_header('Cookie', 'age_verified=1')
@@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor):
          age_limit = self._rta_search(webpage)
  
          # Get JSON parameters
-        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
+        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
          try:
              params = json.loads(json_params)
          except:
@@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor):
          # Get all of the links from the page
          DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
          download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
-            webpage, u'download list').strip()
+            webpage, 'download list').strip()
          LINK_RE = r'<a href="([^"]+)">'
          links = re.findall(LINK_RE, download_list_html)
  
@@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor):
              resolution = format_parts[0]
              height = int(resolution[:-len('p')])
              bitrate = int(format_parts[1][:-len('k')])
-            format = u'-'.join(format_parts) + u'-' + dn
+            format = '-'.join(format_parts) + '-' + dn
  
              formats.append({
                  'url': video_url,
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index f7cb497a87071698bb5361c9695c121a9693f1d0..723e7b9e69ba5e1351019ec83872244755a5d292 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
  
      def _real_extract(self, url):
+        proto = (
+            u'http' if self._downloader.params.get('prefer_insecure', False)
+            else u'https')
+
          # Extract original video URL from URL with redirection, like age verification, using next_url parameter
          mobj = re.search(self._NEXT_URL_RE, url)
          if mobj:
-            url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+            url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
          video_id = self.extract_id(url)
  
          # Get video webpage
-        url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
          video_webpage = self._download_webpage(url, video_id)
  
          # Attempt to extract SWF player URL
@@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                                                    'asv': 3,
                                                    'sts':'1588',
                                                    })
-            video_info_url = 'https://www.youtube.com/get_video_info?' + data
+            video_info_url = proto + '://www.youtube.com/get_video_info?' + data
              video_info_webpage = self._download_webpage(video_info_url, video_id,
                                      note=False,
                                      errnote='unable to download video info webpage')
@@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          else:
              age_gate = False
              for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
                          % (video_id, el_type))
                  video_info_webpage = self._download_webpage(video_info_url, video_id,
                                          note=False,
@@ -1445,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              'duration':     video_duration,
              'age_limit':    18 if age_gate else 0,
              'annotations':  video_annotations,
-            'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+            'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
              'view_count':   view_count,
              'like_count': like_count,
              'dislike_count': dislike_count,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 3cf29e63a36a9210fefbd2c64e46839fb9434017..3574fc615c6eb54228c9b07c47ff0499b72333b7 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -6,6 +6,7 @@ import ctypes
  import datetime
  import email.utils
  import errno
+import getpass
  import gzip
  import itertools
  import io
@@ -762,6 +763,10 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
  
  def unified_strdate(date_str):
      """Return a string with the date in the format YYYYMMDD"""
+
+    if date_str is None:
+        return None
+
      upload_date = None
      #Replace commas
      date_str = date_str.replace(',', ' ')
@@ -1279,3 +1284,21 @@ def parse_xml(s):
      parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
      kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
      return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+
+
+if sys.version_info < (3, 0) and sys.platform == 'win32':
+    def compat_getpass(prompt, *args, **kwargs):
+        if isinstance(prompt, compat_str):
+            prompt = prompt.encode(preferredencoding())
+        return getpass.getpass(prompt, *args, **kwargs)
+else:
+    compat_getpass = getpass.getpass
+
+
+US_RATINGS = {
+    'G': 0,
+    'PG': 10,
+    'PG-13': 13,
+    'R': 16,
+    'NC': 18,
+}
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index c038225f70e9785948e28691de69d7e9595cbfb0..266930a7f57e668cc50377164bee95ddf63cc53a 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2014.03.12'
+__version__ = '2014.03.21.5'
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Fri, 21 Mar 2014 21:14:24 +0000 (22:14 +0100)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Fri, 21 Mar 2014 21:14:24 +0000 (22:14 +0100)
README.md		patch \| blob \| history
devscripts/release.sh		patch \| blob \| history
setup.cfg	[new file with mode: 0644]	patch \| blob
test/helper.py		patch \| blob \| history
test/test_all_urls.py		patch \| blob \| history
test/test_download.py		patch \| blob \| history
youtube_dl/InfoExtractors.py	[deleted file]	patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/aol.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/arte.py		patch \| blob \| history
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/cspan.py		patch \| blob \| history
youtube_dl/extractor/daum.py		patch \| blob \| history
youtube_dl/extractor/engadget.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/fivemin.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/iprima.py		patch \| blob \| history
youtube_dl/extractor/kontrtube.py		patch \| blob \| history
youtube_dl/extractor/ninegag.py		patch \| blob \| history
youtube_dl/extractor/ooyala.py		patch \| blob \| history
youtube_dl/extractor/parliamentliveuk.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/pbs.py		patch \| blob \| history
youtube_dl/extractor/rutv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/udemy.py		patch \| blob \| history
youtube_dl/extractor/vesti.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vevo.py		patch \| blob \| history
youtube_dl/extractor/vgtrk.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/videolecturesnet.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/viki.py		patch \| blob \| history
youtube_dl/extractor/xtube.py		patch \| blob \| history
youtube_dl/extractor/youporn.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history