Merge branch 'gamekings' of https://github.com/robin007bond/youtube-dl into robin007b...

author Sergey M․ <dstftw@gmail.com>

Sun, 8 Feb 2015 16:46:43 +0000 (22:46 +0600)

committer Sergey M․ <dstftw@gmail.com>

Sun, 8 Feb 2015 16:46:43 +0000 (22:46 +0600)
author Sergey M․ <dstftw@gmail.com>
Sun, 8 Feb 2015 16:46:43 +0000 (22:46 +0600)
committer Sergey M․ <dstftw@gmail.com>
Sun, 8 Feb 2015 16:46:43 +0000 (22:46 +0600)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index b188be63690adf4b45fd78ba0252fa70b7e2cb45..2d8f9c316b83780f56d6a99d59a457610cfe05ea 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -441,6 +441,7 @@
   - **tvp.pl**
   - **tvp.pl:Series**
   - **TVPlay**: TV3Play and related services
+ - **Tweakers**
   - **twitch:bookmarks**
   - **twitch:chapter**
   - **twitch:past_broadcasts**
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index 678b9f7d15ee66892ef010552753f0a9577b67a7..b1cd6a69f3ab0b909c1b328d83395aebee60ff44 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -13,6 +13,7 @@ import copy
  from test.helper import FakeYDL, assertRegexpMatches
  from youtube_dl import YoutubeDL
  from youtube_dl.extractor import YoutubeIE
+from youtube_dl.postprocessor.common import PostProcessor
  
  
  class YDL(FakeYDL):
@@ -370,5 +371,35 @@ class TestFormatSelection(unittest.TestCase):
              'vbr': 10,
          }), '^\s*10k$')
  
+    def test_postprocessors(self):
+        filename = 'post-processor-testfile.mp4'
+        audiofile = filename + '.mp3'
+
+        class SimplePP(PostProcessor):
+            def run(self, info):
+                with open(audiofile, 'wt') as f:
+                    f.write('EXAMPLE')
+                info['filepath']
+                return False, info
+
+        def run_pp(params):
+            with open(filename, 'wt') as f:
+                f.write('EXAMPLE')
+            ydl = YoutubeDL(params)
+            ydl.add_post_processor(SimplePP())
+            ydl.post_process(filename, {'filepath': filename})
+
+        run_pp({'keepvideo': True})
+        self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+        self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+        os.unlink(filename)
+        os.unlink(audiofile)
+
+        run_pp({'keepvideo': False})
+        self.assertFalse(os.path.exists(filename), '%s exists' % filename)
+        self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+        os.unlink(audiofile)
+
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 1730df4cd40b174404168ed73f4438e6d02d2847..633e3d8a141e2cd249a69da2137709f2f1858040 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -938,6 +938,9 @@ class YoutubeDL(object):
              def has_header(self, h):
                  return h in self.headers
  
+            def get_header(self, h, default=None):
+                return self.headers.get(h, default)
+
          pr = _PseudoRequest(info_dict['url'])
          self.cookiejar.add_cookie_header(pr)
          return pr.headers.get('Cookie')
@@ -1076,7 +1079,8 @@ class YoutubeDL(object):
                                  else self.params['merge_output_format'])
                              selected_format = {
                                  'requested_formats': formats_info,
-                                'format': rf,
+                                'format': '%s+%s' % (formats_info[0].get('format'),
+                                                     formats_info[1].get('format')),
                                  'format_id': '%s+%s' % (formats_info[0].get('format_id'),
                                                          formats_info[1].get('format_id')),
                                  'width': formats_info[0].get('width'),
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 047f7002a3e8dafaac1c4368ddbcc94f6550bc94..0d7a120bcd7244f38df056311d564090080f7ba3 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -6,6 +6,7 @@ from .academicearth import AcademicEarthCourseIE
  from .addanime import AddAnimeIE
  from .adobetv import AdobeTVIE
  from .adultswim import AdultSwimIE
+from .aftenposten import AftenpostenIE
  from .aftonbladet import AftonbladetIE
  from .aljazeera import AlJazeeraIE
  from .alphaporno import AlphaPornoIE
diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py

new file mode 100644 (file)

index 0000000..2b257ed
--- /dev/null
+++ b/youtube_dl/extractor/aftenposten.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+    xpath_with_ns,
+    xpath_text,
+    find_xpath_attr,
+)
+
+
+class AftenpostenIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
+
+    _TEST = {
+        'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=&section=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
+        'md5': 'fd828cd29774a729bf4d4425fe192972',
+        'info_dict': {
+            'id': '21039',
+            'ext': 'mov',
+            'title': 'TRAILER: "Sweatshop" - I can´t take any more',
+            'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
+            'timestamp': 1416927969,
+            'upload_date': '20141125',
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._html_search_regex(
+            r'data-xs-id="(\d+)"', webpage, 'video id')
+
+        data = self._download_xml(
+            'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
+
+        NS_MAP = {
+            'atom': 'http://www.w3.org/2005/Atom',
+            'xt': 'http://xstream.dk/',
+            'media': 'http://search.yahoo.com/mrss/',
+        }
+
+        entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
+
+        title = xpath_text(
+            entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
+        description = xpath_text(
+            entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
+        timestamp = parse_iso8601(xpath_text(
+            entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
+
+        formats = []
+        media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
+        for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
+            media_url = media_content.get('url')
+            if not media_url:
+                continue
+            tbr = int_or_none(media_content.get('bitrate'))
+            mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
+            if mobj:
+                formats.append({
+                    'url': mobj.group('url'),
+                    'play_path': 'mp4:%s' % mobj.group('playpath'),
+                    'app': mobj.group('app'),
+                    'ext': 'flv',
+                    'tbr': tbr,
+                    'format_id': 'rtmp-%d' % tbr,
+                })
+            else:
+                formats.append({
+                    'url': media_url,
+                    'tbr': tbr,
+                })
+        self._sort_formats(formats)
+
+        link = find_xpath_attr(
+            entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
+        if link is not None:
+            formats.append({
+                'url': link.get('href'),
+                'format_id': link.get('rel'),
+            })
+
+        thumbnails = [{
+            'url': splash.get('url'),
+            'width': int_or_none(splash.get('width')),
+            'height': int_or_none(splash.get('height')),
+        } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'timestamp': timestamp,
+            'formats': formats,
+            'thumbnails': thumbnails,
+        }
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py

index 15006336faacb0c7f6ab9c24263726776866dbb6..63429780e8abf528165daf7e50a6317bce9a6c7d 100644 (file)
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -20,6 +20,7 @@ class AparatIE(InfoExtractor):
              'id': 'wP8On',
              'ext': 'mp4',
              'title': 'تیم گلکسی 11 - زومیت',
+            'age_limit': 0,
          },
          # 'skip': 'Extremely unreliable',
      }
@@ -34,7 +35,8 @@ class AparatIE(InfoExtractor):
                       video_id + '/vt/frame')
          webpage = self._download_webpage(embed_url, video_id)
  
-        video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
+        video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
+            r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
          for i, video_url in enumerate(video_urls):
              req = HEADRequest(video_url)
              res = self._request_webpage(
@@ -46,7 +48,7 @@ class AparatIE(InfoExtractor):
  
          title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
          thumbnail = self._search_regex(
-            r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+            r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
  
          return {
              'id': video_id,
@@ -54,4 +56,5 @@ class AparatIE(InfoExtractor):
              'url': video_url,
              'ext': 'mp4',
              'thumbnail': thumbnail,
+            'age_limit': self._family_friendly_search(webpage),
          }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 602601b24360766c4d37e33300ee01df0eaf122a..2f5ba7aee37fa7b4f26c6ebc68bc1a5abe31296c 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -656,6 +656,21 @@ class InfoExtractor(object):
          }
          return RATING_TABLE.get(rating.lower(), None)
  
+    def _family_friendly_search(self, html):
+        # See http://schema.org/VideoObj
+        family_friendly = self._html_search_meta('isFamilyFriendly', html)
+
+        if not family_friendly:
+            return None
+
+        RATING_TABLE = {
+            '1': 0,
+            'true': 0,
+            '0': 18,
+            'false': 18,
+        }
+        return RATING_TABLE.get(family_friendly.lower(), None)
+
      def _twitter_search_player(self, html):
          return self._html_search_meta('twitter:player', html,
                                        'twitter card player')
@@ -707,9 +722,9 @@ class InfoExtractor(object):
                  f.get('quality') if f.get('quality') is not None else -1,
                  f.get('tbr') if f.get('tbr') is not None else -1,
                  f.get('vbr') if f.get('vbr') is not None else -1,
-                ext_preference,
                  f.get('height') if f.get('height') is not None else -1,
                  f.get('width') if f.get('width') is not None else -1,
+                ext_preference,
                  f.get('abr') if f.get('abr') is not None else -1,
                  audio_ext_preference,
                  f.get('fps') if f.get('fps') is not None else -1,
@@ -765,7 +780,7 @@ class InfoExtractor(object):
          self.to_screen(msg)
          time.sleep(timeout)
  
-    def _extract_f4m_formats(self, manifest_url, video_id):
+    def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
          manifest = self._download_xml(
              manifest_url, video_id, 'Downloading f4m manifest',
              'Unable to download f4m manifest')
@@ -778,26 +793,28 @@ class InfoExtractor(object):
              media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
          for i, media_el in enumerate(media_nodes):
              if manifest_version == '2.0':
-                manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
+                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
+                                + (media_el.attrib.get('href') or media_el.attrib.get('url')))
              tbr = int_or_none(media_el.attrib.get('bitrate'))
-            format_id = 'f4m-%d' % (i if tbr is None else tbr)
              formats.append({
-                'format_id': format_id,
+                'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
                  'url': manifest_url,
                  'ext': 'flv',
                  'tbr': tbr,
                  'width': int_or_none(media_el.attrib.get('width')),
                  'height': int_or_none(media_el.attrib.get('height')),
+                'preference': preference,
              })
          self._sort_formats(formats)
  
          return formats
  
      def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
-                              entry_protocol='m3u8', preference=None):
+                              entry_protocol='m3u8', preference=None,
+                              m3u8_id=None):
  
          formats = [{
-            'format_id': 'm3u8-meta',
+            'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
              'url': m3u8_url,
              'ext': ext,
              'protocol': 'm3u8',
@@ -833,9 +850,8 @@ class InfoExtractor(object):
                      formats.append({'url': format_url(line)})
                      continue
                  tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
-
                  f = {
-                    'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
+                    'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
                      'url': format_url(line.strip()),
                      'tbr': tbr,
                      'ext': ext,
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py

index b116d251d5d3f30c6affc852454e7e326d14f660..1d9166455aae935f1eb51777d170e0f6259ffd4e 100644 (file)
--- a/youtube_dl/extractor/goshgay.py
+++ b/youtube_dl/extractor/goshgay.py
@@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor):
          duration = parse_duration(self._html_search_regex(
              r'<span class="duration">\s*-?\s*(.*?)</span>',
              webpage, 'duration', fatal=False))
-        family_friendly = self._html_search_meta(
-            'isFamilyFriendly', webpage, default='false')
  
          flashvars = compat_parse_qs(self._html_search_regex(
              r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
@@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor):
              'title': title,
              'thumbnail': thumbnail,
              'duration': duration,
-            'age_limit': 0 if family_friendly == 'true' else 18,
+            'age_limit': self._family_friendly_search(webpage),
          }
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py

index d16d483eeb0d533debe041b7cd6c7b4826d41dde..99a1361f844c15520c842cd9fffa1e5c2e9b6974 100644 (file)
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dl/extractor/izlesene.py
@@ -80,9 +80,6 @@ class IzleseneIE(InfoExtractor):
              r'comment_count\s*=\s*\'([^\']+)\';',
              webpage, 'comment_count', fatal=False)
  
-        family_friendly = self._html_search_meta(
-            'isFamilyFriendly', webpage, 'age limit', fatal=False)
-
          content_url = self._html_search_meta(
              'contentURL', webpage, 'content URL', fatal=False)
          ext = determine_ext(content_url, 'mp4')
@@ -120,6 +117,6 @@ class IzleseneIE(InfoExtractor):
              'duration': duration,
              'view_count': int_or_none(view_count),
              'comment_count': int_or_none(comment_count),
-            'age_limit': 18 if family_friendly == 'False' else 0,
+            'age_limit': self._family_friendly_search(webpage),
              'formats': formats,
          }
diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py

index 7736cabbac6b855be739b8c7521a073ea630f32f..ecf4939cdc031683eca7ddd7240a2439f803947d 100644 (file)
--- a/youtube_dl/extractor/rtp.py
+++ b/youtube_dl/extractor/rtp.py
@@ -1,16 +1,16 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-import json
+import re
  
  from .common import InfoExtractor
-from ..utils import js_to_json
  
  
  class RTPIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
      _TESTS = [{
          'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+        'md5': 'e736ce0c665e459ddb818546220b4ef8',
          'info_dict': {
              'id': 'e174042',
              'ext': 'mp3',
@@ -18,9 +18,6 @@ class RTPIE(InfoExtractor):
              'description': 'As paixões musicais de António Cartaxo e António Macedo',
              'thumbnail': 're:^https?://.*\.jpg',
          },
-        'params': {
-            'skip_download': True,  # RTMP download
-        },
      }, {
          'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
          'only_matching': True,
@@ -37,20 +34,48 @@ class RTPIE(InfoExtractor):
  
          player_config = self._search_regex(
              r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
-        config = json.loads(js_to_json(player_config))
+        config = self._parse_json(player_config, video_id)
  
          path, ext = config.get('file').rsplit('.', 1)
          formats = [{
+            'format_id': 'rtmp',
+            'ext': ext,
+            'vcodec': config.get('type') == 'audio' and 'none' or None,
+            'preference': -2,
+            'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
              'app': config.get('application'),
              'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
              'page_url': url,
-            'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
              'rtmp_live': config.get('live', False),
-            'ext': ext,
-            'vcodec': config.get('type') == 'audio' and 'none' or None,
              'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
+            'rtmp_real_time': True,
          }]
  
+        # Construct regular HTTP download URLs
+        replacements = {
+            'audio': {
+                'format_id': 'mp3',
+                'pattern': r'^nas2\.share/wavrss/',
+                'repl': 'http://rsspod.rtp.pt/podcasts/',
+                'vcodec': 'none',
+            },
+            'video': {
+                'format_id': 'mp4_h264',
+                'pattern': r'^nas2\.share/h264/',
+                'repl': 'http://rsspod.rtp.pt/videocasts/',
+                'vcodec': 'h264',
+            },
+        }
+        r = replacements[config['type']]
+        if re.match(r['pattern'], config['file']) is not None:
+            formats.append({
+                'format_id': r['format_id'],
+                'url': re.sub(r['pattern'], r['repl'], config['file']),
+                'vcodec': r['vcodec'],
+            })
+
+        self._sort_formats(formats)
+
          return {
              'id': video_id,
              'title': title,
diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py

index 5e84c109802e34ce8f57496ee3b7e2cd409c0788..d0981115da9c64f1addf3108b8a9e6acf0c6508e 100644 (file)
--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@@ -6,12 +6,14 @@ import re
  from .common import InfoExtractor
  from ..compat import (
      compat_str,
+    compat_urllib_parse_urlparse,
  )
  from ..utils import (
      int_or_none,
      parse_duration,
      parse_iso8601,
      unescapeHTML,
+    xpath_text,
  )
  
  
@@ -159,11 +161,27 @@ class RTSIE(InfoExtractor):
              return int_or_none(self._search_regex(
                  r'-([0-9]+)k\.', url, 'bitrate', default=None))
  
-        formats = [{
-            'format_id': fid,
-            'url': furl,
-            'tbr': extract_bitrate(furl),
-        } for fid, furl in info['streams'].items()]
+        formats = []
+        for format_id, format_url in info['streams'].items():
+            if format_url.endswith('.f4m'):
+                token = self._download_xml(
+                    'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path,
+                    video_id, 'Downloading %s token' % format_id)
+                auth_params = xpath_text(token, './/authparams', 'auth params')
+                if not auth_params:
+                    continue
+                formats.extend(self._extract_f4m_formats(
+                    '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params),
+                    video_id, f4m_id=format_id))
+            elif format_url.endswith('.m3u8'):
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id=format_id))
+            else:
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'tbr': extract_bitrate(format_url),
+                })
  
          if 'media' in info:
              formats.extend([{
diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py

deleted file mode 100644 (file)

index feef33e..0000000
--- a/youtube_dl/extractor/soulanime.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    HEADRequest,
-    urlhandle_detect_ext,
-)
-
-
-class SoulAnimeWatchingIE(InfoExtractor):
-    IE_NAME = "soulanime:watching"
-    IE_DESC = "SoulAnime video"
-    _TEST = {
-        'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
-        'md5': '05fae04abf72298098b528e98abf4298',
-        'info_dict': {
-            'id': 'seirei-tsukai-no-blade-dance-episode-9',
-            'ext': 'mp4',
-            'title': 'seirei-tsukai-no-blade-dance-episode-9',
-            'description': 'seirei-tsukai-no-blade-dance-episode-9'
-        }
-    }
-    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        domain = mobj.group('domain')
-
-        page = self._download_webpage(url, video_id)
-
-        video_url_encoded = self._html_search_regex(
-            r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
-        video_url = "http://www.soul-anime." + domain + video_url_encoded
-
-        ext_req = HEADRequest(video_url)
-        ext_handle = self._request_webpage(
-            ext_req, video_id, note='Determining extension')
-        ext = urlhandle_detect_ext(ext_handle)
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'ext': ext,
-            'title': video_id,
-            'description': video_id
-        }
-
-
-class SoulAnimeSeriesIE(InfoExtractor):
-    IE_NAME = "soulanime:series"
-    IE_DESC = "SoulAnime Series"
-
-    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
-
-    _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
-
-    _TEST = {
-        'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
-        'info_dict': {
-            'id': 'black-rock-shooter-tv'
-        },
-        'playlist_count': 8
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        series_id = mobj.group('id')
-        domain = mobj.group('domain')
-
-        pattern = re.compile(self._EPISODE_REGEX)
-
-        page = self._download_webpage(url, series_id, "Downloading series page")
-        mobj = pattern.findall(page)
-
-        entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
-
-        return self.playlist_result(entries, series_id)
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index 18a8237197ca4f017252fa181b08bfacf67c44b2..e85d452a31277af6a298296bbe07958fec8aac86 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -15,7 +15,8 @@ class TeamcocoIE(InfoExtractor):
                  'id': '80187',
                  'ext': 'mp4',
                  'title': 'Conan Becomes A Mary Kay Beauty Consultant',
-                'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+                'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+                'age_limit': 0,
              }
          }, {
              'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
@@ -24,7 +25,8 @@ class TeamcocoIE(InfoExtractor):
                  'id': '19705',
                  'ext': 'mp4',
                  "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
-                "title": "Louis C.K. Interview Pt. 1 11/3/11"
+                "title": "Louis C.K. Interview Pt. 1 11/3/11",
+                'age_limit': 0,
              }
          }
      ]
@@ -83,4 +85,5 @@ class TeamcocoIE(InfoExtractor):
              'title': self._og_search_title(webpage),
              'thumbnail': self._og_search_thumbnail(webpage),
              'description': self._og_search_description(webpage),
+            'age_limit': self._family_friendly_search(webpage),
          }
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py

index ba65996dc01646e019cfd5820aa36c1934365d9b..102362b295450f58ff085ec9be7d21921a1ac494 100644 (file)
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -1,6 +1,8 @@
  # encoding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..utils import (
      float_or_none,
@@ -11,7 +13,7 @@ from ..utils import (
  class TvigleIE(InfoExtractor):
      IE_NAME = 'tvigle'
      IE_DESC = 'Интернет-телевидение Tvigle.ru'
-    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
+    _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
  
      _TESTS = [
          {
@@ -38,16 +40,22 @@ class TvigleIE(InfoExtractor):
                  'duration': 186.080,
                  'age_limit': 0,
              },
-        },
+        }, {
+            'url': 'https://cloud.tvigle.ru/video/5267604/',
+            'only_matching': True,
+        }
      ]
  
      def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
  
-        video_id = self._html_search_regex(
-            r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id')
+        if not video_id:
+            webpage = self._download_webpage(url, display_id)
+            video_id = self._html_search_regex(
+                r'<li class="video-preview current_playing" id="(\d+)">',
+                webpage, 'video id')
  
          video_data = self._download_json(
              'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py

index e332d469487263746a91d08ab5f3c831413e7113..c80ec15cf1170d2e307dfc851cd5a91c4126afc2 100644 (file)
--- a/youtube_dl/extractor/tweakers.py
+++ b/youtube_dl/extractor/tweakers.py
@@ -1,35 +1,65 @@
-# coding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    xpath_with_ns,
+    int_or_none,
+    float_or_none,
+)
  
  
  class TweakersIE(InfoExtractor):
-    _VALID_URL = r'https?://tweakers\.net/video/(?P<id>[0-9]+).*'
+    _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
      _TEST = {
          'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
-        'md5': 'f7f7f3027166a7f32f024b4ae6571ced',
+        'md5': '1b5afa817403bb5baa08359dca31e6df',
          'info_dict': {
              'id': '9926',
              'ext': 'mp4',
-            'title': 'New-Nintendo-3Ds-Xl-Op-Alle-Fronten-Beter',
+            'title': 'New Nintendo 3DS XL - Op alle fronten beter',
+            'description': 'md5:f97324cc71e86e11c853f0763820e3ba',
+            'thumbnail': 're:^https?://.*\.jpe?g$',
+            'duration': 386,
          }
      }
  
      def _real_extract(self, url):
-        splitted_url = re.split('.html|/', url)
-        del splitted_url[-1]  # To remove extra '/' at the end
          video_id = self._match_id(url)
-        title = splitted_url[5].title()  # Retrieve title for URL and capitalize
-        splitted_url[3] = splitted_url[3] + '/player'  # Add /player to get the player page
-        player_url = '/'.join(splitted_url) + '.html'
-        player_page = self._download_webpage(player_url, video_id)
+
+        playlist = self._download_xml(
+            'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id,
+            video_id)
+
+        NS_MAP = {
+            'xspf': 'http://xspf.org/ns/0/',
+            's1': 'http://static.streamone.nl/player/ns/0',
+        }
+
+        track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP))
+
+        title = xpath_text(
+            track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
+        description = xpath_text(
+            track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+        thumbnail = xpath_text(
+            track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+        duration = float_or_none(
+            xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'),
+            1000)
+
+        formats = [{
+            'url': location.text,
+            'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+            'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+            'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+        } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
  
          return {
              'id': video_id,
-            'ext': 'mp4',
              'title': title,
-            'url': re.findall('http.*mp4', player_page)[0],
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
          }
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py

index 4a4422c5a6132cd9a3dfe3116fe554ac2a374ca5..01d25f760963454561ab2ae81888dff7499279e4 100644 (file)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -166,14 +166,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
          if filecodec is None:
              raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
  
-        uses_avconv = self._uses_avconv()
          more_opts = []
          if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
              if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
                  # Lossless, but in another container
                  acodec = 'copy'
                  extension = 'm4a'
-                more_opts = ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+                more_opts = ['-bsf:a', 'aac_adtstoasc']
              elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
                  # Lossless if possible
                  acodec = 'copy'
@@ -189,9 +188,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                  more_opts = []
                  if self._preferredquality is not None:
                      if int(self._preferredquality) < 10:
-                        more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+                        more_opts += ['-q:a', self._preferredquality]
                      else:
-                        more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+                        more_opts += ['-b:a', self._preferredquality + 'k']
          else:
              # We convert the audio (lossy)
              acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
@@ -200,13 +199,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
              if self._preferredquality is not None:
                  # The opus codec doesn't support the -aq option
                  if int(self._preferredquality) < 10 and extension != 'opus':
-                    more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+                    more_opts += ['-q:a', self._preferredquality]
                  else:
-                    more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+                    more_opts += ['-b:a', self._preferredquality + 'k']
              if self._preferredcodec == 'aac':
                  more_opts += ['-f', 'adts']
              if self._preferredcodec == 'm4a':
-                more_opts += ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+                more_opts += ['-bsf:a', 'aac_adtstoasc']
              if self._preferredcodec == 'vorbis':
                  extension = 'ogg'
              if self._preferredcodec == 'wav':
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 36591a43d389119f883f50e92a5310403e1917dd..1091ae61bbf7599ef59390978f893bc2cdc1f28f 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.02.04'
+__version__ = '2015.02.06'
author	Sergey M․ <dstftw@gmail.com>
	Sun, 8 Feb 2015 16:46:43 +0000 (22:46 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Sun, 8 Feb 2015 16:46:43 +0000 (22:46 +0600)
docs/supportedsites.md		patch \| blob \| history
test/test_YoutubeDL.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/aftenposten.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/aparat.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/goshgay.py		patch \| blob \| history
youtube_dl/extractor/izlesene.py		patch \| blob \| history
youtube_dl/extractor/rtp.py		patch \| blob \| history
youtube_dl/extractor/rts.py		patch \| blob \| history
youtube_dl/extractor/soulanime.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/teamcoco.py		patch \| blob \| history
youtube_dl/extractor/tvigle.py		patch \| blob \| history
youtube_dl/extractor/tweakers.py		patch \| blob \| history
youtube_dl/postprocessor/ffmpeg.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history