Merge pull request #4647 from aajanki/hds_metadata

author Philipp Hagemeister <phihag@phihag.de>

Thu, 8 Jan 2015 15:37:49 +0000 (16:37 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Thu, 8 Jan 2015 15:37:49 +0000 (16:37 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Thu, 8 Jan 2015 15:37:49 +0000 (16:37 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Thu, 8 Jan 2015 15:37:49 +0000 (16:37 +0100)
diff --git a/AUTHORS b/AUTHORS

index a63c97ae01d700095c3fed0a6c697f8410aee5bd..8f201080305d80473b85572e1429d480a9530874 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -100,3 +100,4 @@ Cédric Luthi
  Thijs Vermeir
  Joel Leclerc
  Christopher Krooss
+Ondřej Caletka
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md

index 0ff7b395a9f71e2aa2a9e1f20417d34e1f0fbcf6..7917abfc6df2480eb779f22ab336dd82c10f3d13 100644 (file)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -44,7 +44,7 @@ In particular, every site support request issue should only pertain to services
  
  ###  Is anyone going to need the feature?
  
-Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
  
  ###  Is your question about youtube-dl?
  
diff --git a/Makefile b/Makefile

index e53a367ef5ad1cbc5ed163ecba8f4a71ac02e8e1..5780798793cf2915807d28791a8cb59d40e9dcf2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -63,7 +63,7 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
         chmod a+x youtube-dl
  
  README.md: youtube_dl/*.py youtube_dl/*/*.py
-       COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py
+       COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py
  
  CONTRIBUTING.md: README.md
         python devscripts/make_contributing.py README.md CONTRIBUTING.md
diff --git a/README.md b/README.md

index 915bcd0cd2469c70d20ebd469d9814298c5cc2c8..24b4d7970f25632e3482696d838300bd7f828e14 100644 (file)
--- a/README.md
+++ b/README.md
@@ -248,14 +248,15 @@ which means you can modify it, redistribute it or use it however you like.
  
  ## Video Format Options:
      -f, --format FORMAT              video format code, specify the order of
-                                     preference using slashes: -f 22/17/18 .  -f
-                                     mp4 , -f m4a and  -f flv  are also
-                                     supported. You can also use the special
-                                     names "best", "bestvideo", "bestaudio",
-                                     "worst", "worstvideo" and "worstaudio". By
-                                     default, youtube-dl will pick the best
-                                     quality. Use commas to download multiple
-                                     audio formats, such as -f
+                                     preference using slashes, as in -f 22/17/18
+                                     .  Instead of format codes, you can select
+                                     by extension for the extensions aac, m4a,
+                                     mp3, mp4, ogg, wav, webm. You can also use
+                                     the special names "best", "bestvideo",
+                                     "bestaudio", "worst".  By default, youtube-
+                                     dl will pick the best quality. Use commas
+                                     to download multiple audio formats, such as
+                                     -f
                                       136/137/mp4/bestvideo,140/m4a/bestaudio.
                                       You can merge the video and audio of two
                                       formats into a single file using -f <video-
@@ -326,7 +327,7 @@ which means you can modify it, redistribute it or use it however you like.
  
  # CONFIGURATION
  
-You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
  
  # OUTPUT TEMPLATE
  
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py

index f0f0481c781ab40f8386de5c87a99c6468e00708..d3ef5f0b50daa56513118f55d5b636e5f46552a0 100755 (executable)
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -16,7 +16,7 @@ def main():
          template = tmplf.read()
  
      ie_htmls = []
-    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
+    for ie in youtube_dl.list_extractors(age_limit=None):
          ie_html = '<b>{}</b>'.format(ie.IE_NAME)
          ie_desc = getattr(ie, 'IE_DESC', None)
          if ie_desc is False:
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py

index 14001064400f37b14feeb9a0fee6a898ef36ef4a..3df4385a6b09a520791f67f8f3958c0b635df1bf 100644 (file)
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -23,12 +23,12 @@ def main():
  
      def gen_ies_md(ies):
          for ie in ies:
-            ie_md = '**{}**'.format(ie.IE_NAME)
+            ie_md = '**{0}**'.format(ie.IE_NAME)
              ie_desc = getattr(ie, 'IE_DESC', None)
              if ie_desc is False:
                  continue
              if ie_desc is not None:
-                ie_md += ': {}'.format(ie.IE_DESC)
+                ie_md += ': {0}'.format(ie.IE_DESC)
              if not ie.working():
                  ie_md += ' (Currently broken)'
              yield ie_md
diff --git a/test/helper.py b/test/helper.py

index 96d58b7c12fd9119b3b5f65eb9c41cfc3c97f500..77225e4f799755e2927a3105fa1974f08779fc8d 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL):
  
  def gettestcases(include_onlymatching=False):
      for ie in youtube_dl.extractor.gen_extractors():
-        t = getattr(ie, '_TEST', None)
-        if t:
-            assert not hasattr(ie, '_TESTS'), \
-                '%s has _TEST and _TESTS' % type(ie).__name__
-            tests = [t]
-        else:
-            tests = getattr(ie, '_TESTS', [])
-        for t in tests:
-            if not include_onlymatching and t.get('only_matching', False):
-                continue
-            t['name'] = type(ie).__name__[:-len('IE')]
-            yield t
+        for tc in ie.get_testcases(include_onlymatching):
+            yield tc
  
  
  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py

index 13c18ed95d4ea65111b6a5bc1406d0a5703336c2..be8d12997a1a5aba2cb62270068363f339a5eac6 100644 (file)
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
          self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
          self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
  
+    def test_html_search_meta(self):
+        ie = self.ie
+        html = '''
+            <meta name="a" content="1" />
+            <meta name='b' content='2'>
+            <meta name="c" content='3'>
+            <meta name=d content='4'>
+            <meta property="e" content='5' >
+            <meta content="6" name="f">
+        '''
+
+        self.assertEqual(ie._html_search_meta('a', html), '1')
+        self.assertEqual(ie._html_search_meta('b', html), '2')
+        self.assertEqual(ie._html_search_meta('c', html), '3')
+        self.assertEqual(ie._html_search_meta('d', html), '4')
+        self.assertEqual(ie._html_search_meta('e', html), '5')
+        self.assertEqual(ie._html_search_meta('f', html), '6')
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index 730f7ec260e9c4f1ef53273d67fc2d52f68e78b1..85d87f2c31e803aff668f1d71a6bbdfba33cdcd8 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -8,6 +8,8 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
+import copy
+
  from test.helper import FakeYDL, assertRegexpMatches
  from youtube_dl import YoutubeDL
  from youtube_dl.extractor import YoutubeIE
@@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase):
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], 'vid-high')
  
+    def test_format_selection_audio_exts(self):
+        formats = [
+            {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+        ]
+
+        info_dict = _make_result(formats)
+        ydl = YDL({'format': 'best'})
+        ie = YoutubeIE(ydl)
+        ie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(copy.deepcopy(info_dict))
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'aac-64')
+
+        ydl = YDL({'format': 'mp3'})
+        ie = YoutubeIE(ydl)
+        ie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(copy.deepcopy(info_dict))
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'mp3-64')
+
+        ydl = YDL({'prefer_free_formats': True})
+        ie = YoutubeIE(ydl)
+        ie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(copy.deepcopy(info_dict))
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'ogg-64')
+
      def test_format_selection_video(self):
          formats = [
              {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
diff --git a/test/test_subtitles.py b/test/test_subtitles.py

index d345651918f04aeeae5c6e38a023e17a9414a974..6336dd317ca5a77ebced2e55d3c49873b58ebda6 100644 (file)
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -17,6 +17,7 @@ from youtube_dl.extractor import (
      TEDIE,
      VimeoIE,
      WallaIE,
+    CeskaTelevizeIE,
  )
  
  
@@ -317,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):
          self.assertEqual(len(subtitles), 0)
  
  
+class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
+    url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
+    IE = CeskaTelevizeIE
+
+    def test_list_subtitles(self):
+        self.DL.expect_warning('Automatic Captions not supported by this server')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+
+    def test_allsubtitles(self):
+        self.DL.expect_warning('Automatic Captions not supported by this server')
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['cs']))
+        self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
+
+    def test_nosubtitles(self):
+        self.DL.expect_warning('video doesn\'t have subtitles')
+        self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles), 0)
+
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py

index dd49a6d179dc7cea01935be7c019292b4f3559dc..16e1a1ddfdef5be7c0ba941d8793b3c8b88faf08 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -16,6 +16,7 @@ import json
  import xml.etree.ElementTree
  
  from youtube_dl.utils import (
+    age_restricted,
      args_to_str,
      clean_html,
      DateRange,
@@ -402,5 +403,12 @@ Trying to open render node...
  Success at /dev/dri/renderD128.
  ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
  
+    def test_age_restricted(self):
+        self.assertFalse(age_restricted(None, 10))  # unrestricted content
+        self.assertFalse(age_restricted(1, None))  # unrestricted policy
+        self.assertFalse(age_restricted(8, 10))
+        self.assertTrue(age_restricted(18, 14))
+        self.assertFalse(age_restricted(18, 18))
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 806e7b239cec05e1724dc58e74aca1af671981cf..24d6c2de7670d2139259d1cdd5a068e81f5f0bb3 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -63,6 +63,7 @@ from .utils import (
      YoutubeDLHandler,
      prepend_extension,
      args_to_str,
+    age_restricted,
  )
  from .cache import Cache
  from .extractor import get_info_extractor, gen_extractors
@@ -550,13 +551,8 @@ class YoutubeDL(object):
              max_views = self.params.get('max_views')
              if max_views is not None and view_count > max_views:
                  return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
-        age_limit = self.params.get('age_limit')
-        if age_limit is not None:
-            actual_age_limit = info_dict.get('age_limit')
-            if actual_age_limit is None:
-                actual_age_limit = 0
-            if age_limit < actual_age_limit:
-                return 'Skipping "' + title + '" because it is age restricted'
+        if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+            return 'Skipping "%s" because it is age restricted' % title
          if self.in_download_archive(info_dict):
              return '%s has already been recorded in archive' % video_title
          return None
@@ -790,7 +786,7 @@ class YoutubeDL(object):
              if video_formats:
                  return video_formats[0]
          else:
-            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
+            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
              if format_spec in extensions:
                  filter_f = lambda f: f['ext'] == format_spec
              else:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 56f560d26c6afa5f366f207e21b3dd8ec50d20b0..4aa7fba6ab2e1fcd284f70c0a3e8e8052987d215 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -38,7 +38,7 @@ from .update import update_self
  from .downloader import (
      FileDownloader,
  )
-from .extractor import gen_extractors
+from .extractor import gen_extractors, list_extractors
  from .YoutubeDL import YoutubeDL
  
  
@@ -95,17 +95,15 @@ def _real_main(argv=None):
      _enc = preferredencoding()
      all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
  
-    extractors = gen_extractors()
-
      if opts.list_extractors:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
              compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
              matchedUrls = [url for url in all_urls if ie.suitable(url)]
              for mu in matchedUrls:
                  compat_print('  ' + mu)
          sys.exit(0)
      if opts.list_extractor_descriptions:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
              if not ie._WORKING:
                  continue
              desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
@@ -365,3 +363,5 @@ def main(argv=None):
          sys.exit('ERROR: fixed output name but more than one file to download')
      except KeyboardInterrupt:
          sys.exit('\nERROR: Interrupted by user')
+
+__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 8e47bd60dce440f383720fde04e31679d6554cd5..8dacc2c54a24f39f67678a22be9511363e92b095 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -159,6 +159,7 @@ from .gametrailers import GametrailersIE
  from .gdcvault import GDCVaultIE
  from .generic import GenericIE
  from .giantbomb import GiantBombIE
+from .giga import GigaIE
  from .glide import GlideIE
  from .globo import GloboIE
  from .godtube import GodTubeIE
@@ -574,6 +575,17 @@ def gen_extractors():
      return [klass() for klass in _ALL_CLASSES]
  
  
+def list_extractors(age_limit):
+    """
+    Return a list of extractors that are suitable for the given age,
+    sorted by extractor ID.
+    """
+
+    return sorted(
+        filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
+        key=lambda ie: ie.IE_NAME.lower())
+
+
  def get_info_extractor(ie_name):
      """Returns the info extractor class with the given ie_name"""
      return globals()[ie_name + 'IE']
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py

index 241b904a9e57f7cc3e61b6f086550578feb71b05..75d744852edc382721cee8556067f89ccb0092df 100644 (file)
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -4,9 +4,7 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..compat import compat_parse_qs
  from ..utils import (
-    ExtractorError,
      int_or_none,
      unified_strdate,
  )
@@ -54,45 +52,38 @@ class BiliBiliIE(InfoExtractor):
          thumbnail = self._html_search_meta(
              'thumbnailUrl', video_code, 'thumbnail', fatal=False)
  
-        player_params = compat_parse_qs(self._html_search_regex(
-            r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
-            webpage, 'player params'))
+        cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
  
-        if 'cid' in player_params:
-            cid = player_params['cid'][0]
+        lq_doc = self._download_xml(
+            'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
+            video_id,
+            note='Downloading LQ video info'
+        )
+        lq_durl = lq_doc.find('./durl')
+        formats = [{
+            'format_id': 'lq',
+            'quality': 1,
+            'url': lq_durl.find('./url').text,
+            'filesize': int_or_none(
+                lq_durl.find('./size'), get_attr='text'),
+        }]
  
-            lq_doc = self._download_xml(
-                'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
-                video_id,
-                note='Downloading LQ video info'
-            )
-            lq_durl = lq_doc.find('.//durl')
-            formats = [{
-                'format_id': 'lq',
-                'quality': 1,
-                'url': lq_durl.find('./url').text,
+        hq_doc = self._download_xml(
+            'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
+            video_id,
+            note='Downloading HQ video info',
+            fatal=False,
+        )
+        if hq_doc is not False:
+            hq_durl = hq_doc.find('./durl')
+            formats.append({
+                'format_id': 'hq',
+                'quality': 2,
+                'ext': 'flv',
+                'url': hq_durl.find('./url').text,
                  'filesize': int_or_none(
-                    lq_durl.find('./size'), get_attr='text'),
-            }]
-
-            hq_doc = self._download_xml(
-                'http://interface.bilibili.cn/playurl?cid=%s' % cid,
-                video_id,
-                note='Downloading HQ video info',
-                fatal=False,
-            )
-            if hq_doc is not False:
-                hq_durl = hq_doc.find('.//durl')
-                formats.append({
-                    'format_id': 'hq',
-                    'quality': 2,
-                    'ext': 'flv',
-                    'url': hq_durl.find('./url').text,
-                    'filesize': int_or_none(
-                        hq_durl.find('./size'), get_attr='text'),
-                })
-        else:
-            raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
+                    hq_durl.find('./size'), get_attr='text'),
+            })
  
          self._sort_formats(formats)
          return {
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py

index ba8376338c6d2046951e18c34fb778c27d3e530c..f70e090bb5b01942713149493e48bc0e51f7f74b 100644 (file)
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
  
  import re
  
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
  from ..compat import (
      compat_urllib_request,
      compat_urllib_parse,
@@ -15,7 +15,7 @@ from ..utils import (
  )
  
  
-class CeskaTelevizeIE(InfoExtractor):
+class CeskaTelevizeIE(SubtitlesInfoExtractor):
      _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
  
      _TESTS = [
@@ -104,6 +104,17 @@ class CeskaTelevizeIE(InfoExtractor):
          duration = float_or_none(item.get('duration'))
          thumbnail = item.get('previewImageUrl')
  
+        subtitles = {}
+        subs = item.get('subtitles')
+        if subs:
+            subtitles['cs'] = subs[0]['url']
+
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
+        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
+
          return {
              'id': episode_id,
              'title': title,
@@ -111,4 +122,34 @@ class CeskaTelevizeIE(InfoExtractor):
              'thumbnail': thumbnail,
              'duration': duration,
              'formats': formats,
+            'subtitles': subtitles,
          }
+
+    @staticmethod
+    def _fix_subtitles(subtitles):
+        """ Convert millisecond-based subtitles to SRT """
+        if subtitles is None:
+            return subtitles  # subtitles not requested
+
+        def _msectotimecode(msec):
+            """ Helper utility to convert milliseconds to timecode """
+            components = []
+            for divider in [1000, 60, 60, 100]:
+                components.append(msec % divider)
+                msec //= divider
+            return "{3:02}:{2:02}:{1:02},{0:03}".format(*components)
+
+        def _fix_subtitle(subtitle):
+            for line in subtitle.splitlines():
+                m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line)
+                if m:
+                    yield m.group(1)
+                    start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
+                    yield "{0} --> {1}".format(start, stop)
+                else:
+                    yield line
+
+        fixed_subtitles = {}
+        for k, v in subtitles.items():
+            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
+        return fixed_subtitles
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 562e656e0b24b2a43c69cae81994fed902eb2851..d703893dcfef1e772f1e294b0c08430ee6c15db3 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -21,6 +21,7 @@ from ..compat import (
      compat_str,
  )
  from ..utils import (
+    age_restricted,
      clean_html,
      compiled_regex_type,
      ExtractorError,
@@ -593,7 +594,7 @@ class InfoExtractor(object):
          return self._html_search_regex(
              r'''(?isx)<meta
                      (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
-                    [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
+                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
              html, display_name, fatal=fatal, group='content', **kwargs)
  
      def _dc_search_uploader(self, html):
@@ -877,6 +878,35 @@ class InfoExtractor(object):
              None, '/', True, False, expire_time, '', None, None, None)
          self._downloader.cookiejar.set_cookie(cookie)
  
+    def get_testcases(self, include_onlymatching=False):
+        t = getattr(self, '_TEST', None)
+        if t:
+            assert not hasattr(self, '_TESTS'), \
+                '%s has _TEST and _TESTS' % type(self).__name__
+            tests = [t]
+        else:
+            tests = getattr(self, '_TESTS', [])
+        for t in tests:
+            if not include_onlymatching and t.get('only_matching', False):
+                continue
+            t['name'] = type(self).__name__[:-len('IE')]
+            yield t
+
+    def is_suitable(self, age_limit):
+        """ Test whether the extractor is generally suitable for the given
+        age limit (i.e. pornographic sites are not, all others usually are) """
+
+        any_restricted = False
+        for tc in self.get_testcases(include_onlymatching=False):
+            if 'playlist' in tc:
+                tc = tc['playlist'][0]
+            is_restricted = age_restricted(
+                tc.get('info_dict', {}).get('age_limit'), age_limit)
+            if not is_restricted:
+                return True
+            any_restricted = any_restricted or is_restricted
+        return not any_restricted
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py

index d09d1c13a70cffb725329f69368f37359d7f7a08..190d9f9adc292bfc33d2085eb9bd057ec4c95502 100644 (file)
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@@ -13,7 +13,7 @@ from ..utils import (
  
  class FKTVIE(InfoExtractor):
      IE_NAME = 'fernsehkritik.tv'
-    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
+    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
  
      _TEST = {
          'url': 'http://fernsehkritik.tv/folge-1',
@@ -26,29 +26,32 @@ class FKTVIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        episode = int(mobj.group('ep'))
+        episode = int(self._match_id(url))
  
-        server = random.randint(2, 4)
-        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
-        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
+        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
+        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
                                                 episode)
          playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
                                        'playlist', flags=re.DOTALL)
          files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
-        # TODO: return a single multipart video
+
          videos = []
          for i, _ in enumerate(files, 1):
              video_id = '%04d%d' % (episode, i)
-            video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
+            video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
              videos.append({
+                'ext': 'flv',
                  'id': video_id,
                  'url': video_url,
                  'title': clean_html(get_element_by_id('eptitle', start_webpage)),
                  'description': clean_html(get_element_by_id('contentlist', start_webpage)),
                  'thumbnail': video_thumbnail
              })
-        return videos
+        return {
+            '_type': 'multi_video',
+            'entries': videos,
+            'id': 'folge-%s' % episode,
+        }
  
  
  class FKTVPosteckeIE(InfoExtractor):
diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py

index 75f180928e2c29f59d2d77c1d80e267f6bc8d0a0..a07d69841f9278b932754b603249dc9f8eea53a0 100644 (file)
--- a/youtube_dl/extractor/gameone.py
+++ b/youtube_dl/extractor/gameone.py
@@ -57,8 +57,7 @@ class GameOneIE(InfoExtractor):
      ]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
  
          webpage = self._download_webpage(url, video_id)
          og_video = self._og_search_video_url(webpage, secure=False)
diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py

new file mode 100644 (file)

index 0000000..7758901
--- /dev/null
+++ b/youtube_dl/extractor/giga.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+    qualities,
+    compat_str,
+    parse_duration,
+    parse_iso8601,
+    str_to_int,
+)
+
+
+class GigaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
+    _TESTS = [{
+        'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
+        'md5': '6bc5535e945e724640664632055a584f',
+        'info_dict': {
+            'id': '2622086',
+            'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
+            'ext': 'mp4',
+            'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
+            'description': 'md5:afdf5862241aded4718a30dff6a57baf',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 578,
+            'timestamp': 1414749706,
+            'upload_date': '20141031',
+            'uploader': 'Robin Schweiger',
+            'view_count': int,
+        },
+    }, {
+        'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            [r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
+            webpage, 'video id')
+
+        playlist = self._download_json(
+            'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
+            % video_id, video_id)[0]
+
+        quality = qualities(['normal', 'hd720'])
+
+        formats = []
+        for format_id in itertools.count(0):
+            fmt = playlist.get(compat_str(format_id))
+            if not fmt:
+                break
+            formats.append({
+                'url': fmt['src'],
+                'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
+                'quality': quality(fmt['quality']),
+            })
+        self._sort_formats(formats)
+
+        title = self._html_search_meta(
+            'title', webpage, 'title', fatal=True)
+        description = self._html_search_meta(
+            'description', webpage, 'description')
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        duration = parse_duration(self._search_regex(
+            r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
+            webpage, 'duration', fatal=False))
+
+        timestamp = parse_iso8601(self._search_regex(
+            r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
+        uploader = self._search_regex(
+            r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
+
+        view_count = str_to_int(self._search_regex(
+            r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'view_count': view_count,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py

index 4ccf6b9b8a82c3ef28c1d9d04dcc6f26ce2a8f8d..a38eae421a9199b578b3a724d205b13e6367c67a 100644 (file)
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@@ -39,8 +39,9 @@ class HuffPostIE(InfoExtractor):
          data = self._download_json(api_url, video_id)['data']
  
          video_title = data['title']
-        duration = parse_duration(data['running_time'])
-        upload_date = unified_strdate(data['schedule']['starts_at'])
+        duration = parse_duration(data.get('running_time'))
+        upload_date = unified_strdate(
+            data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
          description = data.get('description')
  
          thumbnails = []
@@ -59,16 +60,11 @@ class HuffPostIE(InfoExtractor):
              'ext': 'mp4',
              'url': url,
              'vcodec': 'none' if key.startswith('audio/') else None,
-        } for key, url in data['sources']['live'].items()]
-        if data.get('fivemin_id'):
-            fid = data['fivemin_id']
-            fcat = str(int(fid) // 100 + 1)
-            furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
-            formats.append({
-                'format': 'fivemin',
-                'url': furl,
-                'preference': 1,
-            })
+        } for key, url in data.get('sources', {}).get('live', {}).items()]
+
+        if not formats and data.get('fivemin_id'):
+            return self.url_result('5min:%s' % data['fivemin_id'])
+
          self._sort_formats(formats)
  
          return {
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py

index 13a53a0cb39f70ed1aaf1713283852cdc3cebeb4..f29df36b5bf6bd7e732ad84cbfd7d3eeb412f5ff 100644 (file)
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -16,7 +16,6 @@ class ImdbIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.imdb.com/video/imdb/vi2524815897',
-        'md5': '9f34fa777ade3a6e57a054fdbcb3a068',
          'info_dict': {
              'id': '2524815897',
              'ext': 'mp4',
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py

index 78787e8f1cbb067527bd56e3167a921a3541e6d9..3c61a850f296c32861cdfd35095746c2cf1ef4ad 100644 (file)
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -105,6 +105,9 @@ class OCWMITIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
                  'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
+                'upload_date': '20121109',
+                'uploader_id': 'MIT',
+                'uploader': 'MIT OpenCourseWare',
                  # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
              }
          },
@@ -114,6 +117,9 @@ class OCWMITIE(InfoExtractor):
                  'id': '7K1sB05pE0A',
                  'ext': 'mp4',
                  'title': 'Session 1: Introduction to Derivatives',
+                'upload_date': '20090818',
+                'uploader_id': 'MIT',
+                'uploader': 'MIT OpenCourseWare',
                  'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
                  # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
              }
diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py

index f5ca74e976bc10ff896bf7e6134a14332c7b131c..c1a482dba39fb98efdb28e85b681565eb58e3f9e 100644 (file)
--- a/youtube_dl/extractor/motorsport.py
+++ b/youtube_dl/extractor/motorsport.py
@@ -1,63 +1,49 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-import hashlib
-import json
-import time
-
  from .common import InfoExtractor
  from ..compat import (
-    compat_parse_qs,
-    compat_str,
-)
-from ..utils import (
-    int_or_none,
+    compat_urlparse,
  )
  
  
  class MotorsportIE(InfoExtractor):
      IE_DESC = 'motorsport.com'
-    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
+    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
      _TEST = {
          'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
-        'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
          'info_dict': {
-            'id': '7063',
+            'id': '2-T3WuR-KMM',
              'ext': 'mp4',
              'title': 'Red Bull Racing: 2014 Rules Explained',
-            'duration': 207,
+            'duration': 208,
              'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
-            'uploader': 'rainiere',
-            'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
-        }
+            'uploader': 'mcomstaff',
+            'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ',
+            'upload_date': '20140903',
+            'thumbnail': r're:^https?://.+\.jpg$'
+        },
+        'add_ie': ['Youtube'],
+        'params': {
+            'skip_download': True,
+        },
      }
  
      def _real_extract(self, url):
          display_id = self._match_id(url)
          webpage = self._download_webpage(url, display_id)
  
-        flashvars_code = self._html_search_regex(
-            r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
-        flashvars = compat_parse_qs(flashvars_code)
-        params = json.loads(flashvars['parameters'][0])
-
-        e = compat_str(int(time.time()) + 24 * 60 * 60)
-        base_video_url = params['location'] + '?e=' + e
-        s = 'h3hg713fh32'
-        h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
-        video_url = base_video_url + '&h=' + h
-
-        uploader = self._html_search_regex(
-            r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
-            'uploader', fatal=False)
+        iframe_path = self._html_search_regex(
+            r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage,
+            'iframe path')
+        iframe = self._download_webpage(
+            compat_urlparse.urljoin(url, iframe_path), display_id,
+            'Downloading iframe')
+        youtube_id = self._search_regex(
+            r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')
  
          return {
-            'id': params['video_id'],
+            '_type': 'url_transparent',
              'display_id': display_id,
-            'title': params['title'],
-            'url': video_url,
-            'description': params.get('description'),
-            'thumbnail': params.get('main_thumb'),
-            'duration': int_or_none(params.get('duration')),
-            'uploader': uploader,
+            'url': 'https://youtube.com/watch?v=%s' % youtube_id,
          }
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py

index 43e8e619f6d0562982097a0e36da5eee58e5c713..321ce5ce707c7006ad1a8f5979e01afab1bddd23 100644 (file)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -72,7 +72,7 @@ class NRKIE(InfoExtractor):
  
  
  class NRKTVIE(InfoExtractor):
-    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'
+    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
  
      _TESTS = [
          {
@@ -85,7 +85,7 @@ class NRKTVIE(InfoExtractor):
                  'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
                  'upload_date': '20140523',
                  'duration': 1741.52,
-            }
+            },
          },
          {
              'url': 'http://tv.nrk.no/program/mdfp15000514',
@@ -97,39 +97,119 @@ class NRKTVIE(InfoExtractor):
                  'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
                  'upload_date': '20140524',
                  'duration': 4605.0,
-            }
+            },
          },
+        {
+            # single playlist video
+            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+            'md5': 'adbd1dbd813edaf532b0a253780719c2',
+            'info_dict': {
+                'id': 'MSPO40010515-part2',
+                'ext': 'flv',
+                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                'upload_date': '20150106',
+            },
+            'skip': 'Only works from Norway',
+        },
+        {
+            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+            'playlist': [
+                {
+                    'md5': '9480285eff92d64f06e02a5367970a7a',
+                    'info_dict': {
+                        'id': 'MSPO40010515-part1',
+                        'ext': 'flv',
+                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
+                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                        'upload_date': '20150106',
+                    },
+                },
+                {
+                    'md5': 'adbd1dbd813edaf532b0a253780719c2',
+                    'info_dict': {
+                        'id': 'MSPO40010515-part2',
+                        'ext': 'flv',
+                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                        'upload_date': '20150106',
+                    },
+                },
+            ],
+            'info_dict': {
+                'id': 'MSPO40010515',
+                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
+                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                'upload_date': '20150106',
+                'duration': 6947.5199999999995,
+            },
+            'skip': 'Only works from Norway',
+        }
      ]
  
+    def _extract_f4m(self, manifest_url, video_id):
+        return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
-
-        page = self._download_webpage(url, video_id)
-
-        title = self._html_search_meta('title', page, 'title')
-        description = self._html_search_meta('description', page, 'description')
-        thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
-        upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
-        duration = float_or_none(
-            self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
+        part_id = mobj.group('part_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta(
+            'title', webpage, 'title')
+        description = self._html_search_meta(
+            'description', webpage, 'description')
+
+        thumbnail = self._html_search_regex(
+            r'data-posterimage="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+        upload_date = unified_strdate(self._html_search_meta(
+            'rightsfrom', webpage, 'upload date', fatal=False))
+        duration = float_or_none(self._html_search_regex(
+            r'data-duration="([^"]+)"',
+            webpage, 'duration', fatal=False))
+
+        # playlist
+        parts = re.findall(
+            r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
+        if parts:
+            entries = []
+            for current_part_id, stream_url, part_title in parts:
+                if part_id and current_part_id != part_id:
+                    continue
+                video_part_id = '%s-part%s' % (video_id, current_part_id)
+                formats = self._extract_f4m(stream_url, video_part_id)
+                entries.append({
+                    'id': video_part_id,
+                    'title': part_title,
+                    'description': description,
+                    'thumbnail': thumbnail,
+                    'upload_date': upload_date,
+                    'formats': formats,
+                })
+            if part_id:
+                if entries:
+                    return entries[0]
+            else:
+                playlist = self.playlist_result(entries, video_id, title, description)
+                playlist.update({
+                    'thumbnail': thumbnail,
+                    'upload_date': upload_date,
+                    'duration': duration,
+                })
+                return playlist
  
          formats = []
  
-        f4m_url = re.search(r'data-media="([^"]+)"', page)
+        f4m_url = re.search(r'data-media="([^"]+)"', webpage)
          if f4m_url:
-            formats.append({
-                'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
-                'format_id': 'f4m',
-                'ext': 'flv',
-            })
+            formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
  
-        m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
+        m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
          if m3u8_url:
-            formats.append({
-                'url': m3u8_url.group(1),
-                'format_id': 'm3u8',
-            })
+            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
  
          self._sort_formats(formats)
  
diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py

index c833fc8ee817bce2ce09fce724f67633a54ba459..6446d26dc416703da688386a578f904d24b102a4 100644 (file)
--- a/youtube_dl/extractor/sexykarma.py
+++ b/youtube_dl/extractor/sexykarma.py
@@ -24,7 +24,7 @@ class SexyKarmaIE(InfoExtractor):
              'title': 'Taking a quick pee.',
              'thumbnail': 're:^https?://.*\.jpg$',
              'uploader': 'wildginger7',
-            'upload_date': '20141007',
+            'upload_date': '20141008',
              'duration': 22,
              'view_count': int,
              'comment_count': int,
@@ -45,6 +45,7 @@ class SexyKarmaIE(InfoExtractor):
              'view_count': int,
              'comment_count': int,
              'categories': list,
+            'age_limit': 18,
          }
      }, {
          'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
@@ -61,6 +62,7 @@ class SexyKarmaIE(InfoExtractor):
              'view_count': int,
              'comment_count': int,
              'categories': list,
+            'age_limit': 18,
          }
      }]
  
@@ -114,4 +116,5 @@ class SexyKarmaIE(InfoExtractor):
              'view_count': view_count,
              'comment_count': comment_count,
              'categories': categories,
+            'age_limit': 18,
          }
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py

index 6c3445d792206395b7a36d016b8a42ad255ea9cc..82675431f863fded8768241e2ad21c4874f8525d 100644 (file)
--- a/youtube_dl/extractor/teachertube.py
+++ b/youtube_dl/extractor/teachertube.py
@@ -57,9 +57,7 @@ class TeacherTubeIE(InfoExtractor):
      }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
          title = self._html_search_meta('title', webpage, 'title', fatal=True)
diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py

index 4ce5aeeba242b94b78d71e3c9d033aa318b588fb..b6b1f2568f23a6ea9fe8e12c86deb6b30d44a809 100644 (file)
--- a/youtube_dl/extractor/tunein.py
+++ b/youtube_dl/extractor/tunein.py
@@ -24,7 +24,7 @@ class TuneInIE(InfoExtractor):
      _INFO_DICT = {
          'id': '34682',
          'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
-        'ext': 'AAC',
+        'ext': 'aac',
          'thumbnail': 're:^https?://.*\.png$',
          'location': 'Tacoma, WA',
      }
@@ -78,14 +78,21 @@ class TuneInIE(InfoExtractor):
          for stream in streams:
              if stream.get('Type') == 'Live':
                  is_live = True
+            reliability = stream.get('Reliability')
+            format_note = (
+                'Reliability: %d%%' % reliability
+                if reliability is not None else None)
              formats.append({
+                'preference': (
+                    0 if reliability is None or reliability > 90
+                    else 1),
                  'abr': stream.get('Bandwidth'),
-                'ext': stream.get('MediaType'),
+                'ext': stream.get('MediaType').lower(),
                  'acodec': stream.get('MediaType'),
                  'vcodec': 'none',
                  'url': stream.get('Url'),
-                # Sometimes streams with the highest quality do not exist
-                'preference': stream.get('Reliability'),
+                'source_preference': reliability,
+                'format_note': format_note,
              })
          self._sort_formats(formats)
  
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

index 15f31529822bcba124cfb12bcb9e56566b3bfba7..944901e1482a666ae90cc5e1c0f86e325ec2aecc 100644 (file)
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -17,7 +17,6 @@ class VikiIE(SubtitlesInfoExtractor):
      _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
      _TEST = {
          'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
-        'md5': 'a21454021c2646f5433514177e2caa5f',
          'info_dict': {
              'id': '1023585v',
              'ext': 'mp4',
@@ -31,8 +30,7 @@ class VikiIE(SubtitlesInfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = self._match_id(url)
  
          webpage = self._download_webpage(url, video_id)
          title = self._og_search_title(webpage)
diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py

index 88bbbb21967c6807c536ecc5b06d8d8f41095219..c17bebd6e919673d9011de3ac37dfff2929b2cc8 100644 (file)
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@@ -10,14 +10,14 @@ from ..utils import (
  
  
  class WashingtonPostIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
      _TEST = {
          'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
          'info_dict': {
              'title': 'Sinkhole of bureaucracy',
          },
          'playlist': [{
-            'md5': 'c3f4b4922ffa259243f68e928db2db8c',
+            'md5': '79132cc09ec5309fa590ae46e4cc31bc',
              'info_dict': {
                  'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
                  'ext': 'mp4',
@@ -29,7 +29,7 @@ class WashingtonPostIE(InfoExtractor):
                  'upload_date': '20140322',
              },
          }, {
-            'md5': 'f645a07652c2950cd9134bb852c5f5eb',
+            'md5': 'e1d5734c06865cc504ad99dc2de0d443',
              'info_dict': {
                  'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
                  'ext': 'mp4',
@@ -44,10 +44,9 @@ class WashingtonPostIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        page_id = mobj.group('id')
-
+        page_id = self._match_id(url)
          webpage = self._download_webpage(url, page_id)
+
          title = self._og_search_title(webpage)
          uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
          entries = []
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py

index 95f1c8f3cf20bdd83b3e524808e5c5fb20cde680..e8490b028e53080b8e685be13577a05603a4af9e 100644 (file)
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -95,6 +95,7 @@ class XTubeUserIE(InfoExtractor):
          'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
          'info_dict': {
              'id': 'greenshowers',
+            'age_limit': 18,
          },
          'playlist_mincount': 155,
      }
@@ -124,6 +125,7 @@ class XTubeUserIE(InfoExtractor):
          return {
              '_type': 'playlist',
              'id': username,
+            'age_limit': 18,
              'entries': [{
                  '_type': 'url',
                  'url': eurl,
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index d1bbf0b01ae7ccb7c67bb468a904b9659e6fd14b..e719560711dbdd9d3f31ccc7432ee705132a91d4 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -287,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
          '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
          '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
+        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
          '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
+        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
  
          # Dash webm audio
          '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -412,7 +414,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'id': 'HtVdAasjOgU',
                  'ext': 'mp4',
                  'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
-                'description': 'md5:eca57043abae25130f58f655ad9a7771',
+                'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                  'uploader': 'The Witcher',
                  'uploader_id': 'WitcherGame',
                  'upload_date': '20140605',
@@ -1046,7 +1048,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                      for f in formats:
                          if f['format_id'] in dash_keys:
                              f['format_id'] = 'nondash-%s' % f['format_id']
-                            f['preference'] -= 10000
+                            f['preference'] = f.get('preference', 0) - 10000
                      formats.extend(dash_formats)
  
          self._sort_formats(formats)
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py

index 74c76a9a0446482c303f3b4182f3ef2bd4942c0d..98f15177bd6665bd1c6b96a071d59d4b67e5d918 100644 (file)
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -119,7 +119,7 @@ class ZDFChannelIE(InfoExtractor):
          'info_dict': {
              'id': '1586442',
          },
-        'playlist_count': 4,
+        'playlist_count': 3,
      }
      _PAGE_SIZE = 50
  
diff --git a/youtube_dl/options.py b/youtube_dl/options.py

index a018c1d711ffcb67997a15d4fdcec528cb7f99ba..058342dec9990bcad43d2a5744ea1fc5e974b40a 100644 (file)
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -267,10 +267,12 @@ def parseOpts(overrideArguments=None):
          action='store', dest='format', metavar='FORMAT', default=None,
          help=(
              'video format code, specify the order of preference using'
-            ' slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also'
-            ' supported. You can also use the special names "best",'
-            ' "bestvideo", "bestaudio", "worst", "worstvideo" and'
-            ' "worstaudio". By default, youtube-dl will pick the best quality.'
+            ' slashes, as in -f 22/17/18 . '
+            ' Instead of format codes, you can select by extension for the '
+            'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
+            'You can also use the special names "best",'
+            ' "bestvideo", "bestaudio", "worst". '
+            ' By default, youtube-dl will pick the best quality.'
              ' Use commas to download multiple audio formats, such as'
              ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'
              ' You can merge the video and audio of two formats into a single'
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index d4951c406c73d8216803f5d9149200787883676b..29739a4833de0a782b359d958d331316cbaf1c24 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1560,3 +1560,13 @@ def urlhandle_detect_ext(url_handle):
          getheader = url_handle.info().getheader
  
      return getheader('Content-Type').split("/")[1]
+
+
+def age_restricted(content_limit, age_limit):
+    """ Returns True iff the content should be blocked """
+
+    if age_limit is None:  # No limit set
+        return False
+    if content_limit is None:
+        return False  # Content available for everyone
+    return age_limit < content_limit
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 086f0ebf06d5fb01176ede63ab0b1e5a72b213ce..32019e3623d26fcbb41b47c9f4c3b12cbdf5c5c5 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.01.05'
+__version__ = '2015.01.08'
author	Philipp Hagemeister <phihag@phihag.de>
	Thu, 8 Jan 2015 15:37:49 +0000 (16:37 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Thu, 8 Jan 2015 15:37:49 +0000 (16:37 +0100)
AUTHORS		patch \| blob \| history
CONTRIBUTING.md		patch \| blob \| history
Makefile		patch \| blob \| history
README.md		patch \| blob \| history
devscripts/gh-pages/update-sites.py		patch \| blob \| history
devscripts/make_supportedsites.py		patch \| blob \| history
test/helper.py		patch \| blob \| history
test/test_InfoExtractor.py		patch \| blob \| history
test/test_YoutubeDL.py		patch \| blob \| history
test/test_subtitles.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/bilibili.py		patch \| blob \| history
youtube_dl/extractor/ceskatelevize.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/fktv.py		patch \| blob \| history
youtube_dl/extractor/gameone.py		patch \| blob \| history
youtube_dl/extractor/giga.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/huffpost.py		patch \| blob \| history
youtube_dl/extractor/imdb.py		patch \| blob \| history
youtube_dl/extractor/mit.py		patch \| blob \| history
youtube_dl/extractor/motorsport.py		patch \| blob \| history
youtube_dl/extractor/nrk.py		patch \| blob \| history
youtube_dl/extractor/sexykarma.py		patch \| blob \| history
youtube_dl/extractor/teachertube.py		patch \| blob \| history
youtube_dl/extractor/tunein.py		patch \| blob \| history
youtube_dl/extractor/viki.py		patch \| blob \| history
youtube_dl/extractor/washingtonpost.py		patch \| blob \| history
youtube_dl/extractor/xtube.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/extractor/zdf.py		patch \| blob \| history
youtube_dl/options.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history