Merge branch 'sbs_website_changes' of https://github.com/seamusphelan/youtube-dl...

author Sergey M․ <dstftw@gmail.com>

Fri, 17 Jul 2015 20:13:55 +0000 (02:13 +0600)

committer Sergey M․ <dstftw@gmail.com>

Fri, 17 Jul 2015 20:13:55 +0000 (02:13 +0600)
author Sergey M․ <dstftw@gmail.com>
Fri, 17 Jul 2015 20:13:55 +0000 (02:13 +0600)
committer Sergey M․ <dstftw@gmail.com>
Fri, 17 Jul 2015 20:13:55 +0000 (02:13 +0600)
diff --git a/AUTHORS b/AUTHORS

index c0201d8758d5f2806fbd2be509b81bb6bd572b24..531ec5767c9b976bebfc77c760e7e971f405f9be 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -131,3 +131,4 @@ jackyzy823
  George Brighton
  Remita Amine
  Aurélio A. Heckert
+Bernhard Minks
diff --git a/README.md b/README.md

index 9779c2058a302503f20e652b25fe60231d12ae58..a2cc89cdb392cddb9dcd2a552ed2c3d864973cfb 100644 (file)
--- a/README.md
+++ b/README.md
@@ -238,6 +238,26 @@ which means you can modify it, redistribute it or use it however you like.
  
  You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
  
+### Authentication with `.netrc` file ###
+
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in shell command history. You can achieve this using [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
+```
+touch $HOME/.netrc
+chmod a-rwx,u+rw $HOME/.netrc
+```
+After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
+```
+machine <extractor> login <login> password <password>
+```
+For example:
+```
+machine youtube login myaccount@gmail.com password my_youtube_password
+machine twitch login my_twitch_account_name password my_twitch_password
+```
+To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration).
+
+On Windows you may also need to setup `%HOME%` environment variable manually.
+
  # OUTPUT TEMPLATE
  
  The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
diff --git a/test/test_compat.py b/test/test_compat.py

index 1eb454e068970eb1a0d48cc3cd881e0bf71f9463..c3ba8ad2e3aa1f5cd33dd5a61a184d52cc0c07a9 100644 (file)
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -14,6 +14,8 @@ from youtube_dl.utils import get_filesystem_encoding
  from youtube_dl.compat import (
      compat_getenv,
      compat_expanduser,
+    compat_urllib_parse_unquote,
+    compat_urllib_parse_unquote_plus,
  )
  
  
@@ -42,5 +44,28 @@ class TestCompat(unittest.TestCase):
              dir(youtube_dl.compat))) - set(['unicode_literals'])
          self.assertEqual(all_names, sorted(present_names))
  
+    def test_compat_urllib_parse_unquote(self):
+        self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
+        self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
+        self.assertEqual(compat_urllib_parse_unquote(''), '')
+        self.assertEqual(compat_urllib_parse_unquote('%'), '%')
+        self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
+        self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
+        self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
+        self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
+        self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
+        self.assertEqual(
+            compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
+%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
+            '''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
+%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
+        self.assertEqual(
+            compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80    %E2%87%80    %E2%87%80    %E2%87%80    %E2%87%80    %E2%87%80    %E2%86%B6%I%Break%25Things%'''),
+            '''(^◣_◢^)っ︻デ═一    ⇀    ⇀    ⇀    ⇀    ⇀    ↶%I%Break%Things%''')
+
+    def test_compat_urllib_parse_unquote_plus(self):
+        self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
+        self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py

index c3783337a5a801cd5230f2ae2f5d82112d4b6cea..db0da5828dd317eb5eef0b0945dec9300c7c1f75 100644 (file)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -75,42 +75,73 @@ except ImportError:
      import BaseHTTPServer as compat_http_server
  
  try:
+    from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
      from urllib.parse import unquote as compat_urllib_parse_unquote
-except ImportError:
-    def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
-        if string == '':
+    from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+except ImportError:  # Python 2
+    _asciire = re.compile('([\x00-\x7f]+)') if sys.version_info < (2, 7) else compat_urllib_parse._asciire
+
+    # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
+    # implementations from cpython 3.4.3's stdlib. Python 2's version
+    # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
+
+    def compat_urllib_parse_unquote_to_bytes(string):
+        """unquote_to_bytes('abc%20def') -> b'abc def'."""
+        # Note: strings are encoded as UTF-8. This is only an issue if it contains
+        # unescaped non-ASCII characters, which URIs should not.
+        if not string:
+            # Is it a string-like object?
+            string.split
+            return b''
+        if isinstance(string, unicode):
+            string = string.encode('utf-8')
+        bits = string.split(b'%')
+        if len(bits) == 1:
              return string
-        res = string.split('%')
-        if len(res) == 1:
+        res = [bits[0]]
+        append = res.append
+        for item in bits[1:]:
+            try:
+                append(compat_urllib_parse._hextochr[item[:2]])
+                append(item[2:])
+            except KeyError:
+                append(b'%')
+                append(item)
+        return b''.join(res)
+
+    def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
+        """Replace %xx escapes by their single-character equivalent. The optional
+        encoding and errors parameters specify how to decode percent-encoded
+        sequences into Unicode characters, as accepted by the bytes.decode()
+        method.
+        By default, percent-encoded sequences are decoded with UTF-8, and invalid
+        sequences are replaced by a placeholder character.
+
+        unquote('abc%20def') -> 'abc def'.
+        """
+        if '%' not in string:
+            string.split
              return string
          if encoding is None:
              encoding = 'utf-8'
          if errors is None:
              errors = 'replace'
-        # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
-        pct_sequence = b''
-        string = res[0]
-        for item in res[1:]:
-            try:
-                if not item:
-                    raise ValueError
-                pct_sequence += item[:2].decode('hex')
-                rest = item[2:]
-                if not rest:
-                    # This segment was just a single percent-encoded character.
-                    # May be part of a sequence of code units, so delay decoding.
-                    # (Stored in pct_sequence).
-                    continue
-            except ValueError:
-                rest = '%' + item
-            # Encountered non-percent-encoded characters. Flush the current
-            # pct_sequence.
-            string += pct_sequence.decode(encoding, errors) + rest
-            pct_sequence = b''
-        if pct_sequence:
-            # Flush the final pct_sequence
-            string += pct_sequence.decode(encoding, errors)
-        return string
+        bits = _asciire.split(string)
+        res = [bits[0]]
+        append = res.append
+        for i in range(1, len(bits), 2):
+            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
+            append(bits[i + 1])
+        return ''.join(res)
+
+    def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
+        """Like unquote(), but also replace plus signs by spaces, as required for
+        unquoting HTML form values.
+
+        unquote_plus('%7e/abc+def') -> '~/abc def'
+        """
+        string = string.replace('+', ' ')
+        return compat_urllib_parse_unquote(string, encoding, errors)
  
  try:
      compat_str = unicode  # Python 2
@@ -422,6 +453,8 @@ __all__ = [
      'compat_urllib_error',
      'compat_urllib_parse',
      'compat_urllib_parse_unquote',
+    'compat_urllib_parse_unquote_plus',
+    'compat_urllib_parse_unquote_to_bytes',
      'compat_urllib_parse_urlparse',
      'compat_urllib_request',
      'compat_urlparse',
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 3f4f23521b15f9f43c77d0723c385615d2b9da2c..06f21064b699fe8b3012836949c655e6e284ef29 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -389,7 +389,8 @@ from .npo import (
      NPOLiveIE,
      NPORadioIE,
      NPORadioFragmentIE,
-    TegenlichtVproIE,
+    VPROIE,
+    WNLIE
  )
  from .nrk import (
      NRKIE,
diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py

index 26b934543a7ac0d28cabf0ca70610460fc253a2f..03dad4636afdf0443735fde8f1d643aea553ba10 100644 (file)
--- a/youtube_dl/extractor/bet.py
+++ b/youtube_dl/extractor/bet.py
@@ -1,7 +1,7 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote
  from ..utils import (
      xpath_text,
      xpath_with_ns,
@@ -57,7 +57,7 @@ class BetIE(InfoExtractor):
          display_id = self._match_id(url)
          webpage = self._download_webpage(url, display_id)
  
-        media_url = compat_urllib_parse.unquote(self._search_regex(
+        media_url = compat_urllib_parse_unquote(self._search_regex(
              [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
              webpage, 'media URL'))
  
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py

index fb56cd78d07ab396ae26ff5a15ac7ebdef933237..a69ee482b122be066ded94702bcd28e3ddec1794 100644 (file)
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -14,6 +14,8 @@ from ..utils import (
      int_or_none,
      parse_iso8601,
      unescapeHTML,
+    xpath_text,
+    xpath_with_ns,
  )
  
  
@@ -23,10 +25,10 @@ class BlipTVIE(InfoExtractor):
      _TESTS = [
          {
              'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
-            'md5': 'c6934ad0b6acf2bd920720ec888eb812',
+            'md5': '80baf1ec5c3d2019037c1c707d676b9f',
              'info_dict': {
                  'id': '5779306',
-                'ext': 'mov',
+                'ext': 'm4v',
                  'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
                  'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
                  'timestamp': 1323138843,
@@ -100,6 +102,20 @@ class BlipTVIE(InfoExtractor):
                  'vcodec': 'none',
              }
          },
+        {
+            # missing duration
+            'url': 'http://blip.tv/rss/flash/6700880',
+            'info_dict': {
+                'id': '6684191',
+                'ext': 'm4v',
+                'title': 'Cowboy Bebop: Gateway Shuffle Review',
+                'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
+                'timestamp': 1386639757,
+                'upload_date': '20131210',
+                'uploader': 'sfdebris',
+                'uploader_id': '706520',
+            }
+        }
      ]
  
      @staticmethod
@@ -128,35 +144,34 @@ class BlipTVIE(InfoExtractor):
  
          rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
  
-        def blip(s):
-            return '{http://blip.tv/dtd/blip/1.0}%s' % s
-
-        def media(s):
-            return '{http://search.yahoo.com/mrss/}%s' % s
-
-        def itunes(s):
-            return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s
+        def _x(p):
+            return xpath_with_ns(p, {
+                'blip': 'http://blip.tv/dtd/blip/1.0',
+                'media': 'http://search.yahoo.com/mrss/',
+                'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
+            })
  
          item = rss.find('channel/item')
  
-        video_id = item.find(blip('item_id')).text
-        title = item.find('./title').text
-        description = clean_html(compat_str(item.find(blip('puredescription')).text))
-        timestamp = parse_iso8601(item.find(blip('datestamp')).text)
-        uploader = item.find(blip('user')).text
-        uploader_id = item.find(blip('userid')).text
-        duration = int(item.find(blip('runtime')).text)
-        media_thumbnail = item.find(media('thumbnail'))
-        thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text
-        categories = [category.text for category in item.findall('category')]
+        video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
+        title = xpath_text(item, 'title', 'title', fatal=True)
+        description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
+        timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
+        uploader = xpath_text(item, _x('blip:user'), 'uploader')
+        uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
+        duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
+        media_thumbnail = item.find(_x('media:thumbnail'))
+        thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
+                     else xpath_text(item, 'image', 'thumbnail'))
+        categories = [category.text for category in item.findall('category') if category is not None]
  
          formats = []
          subtitles_urls = {}
  
-        media_group = item.find(media('group'))
-        for media_content in media_group.findall(media('content')):
+        media_group = item.find(_x('media:group'))
+        for media_content in media_group.findall(_x('media:content')):
              url = media_content.get('url')
-            role = media_content.get(blip('role'))
+            role = media_content.get(_x('blip:role'))
              msg = self._download_webpage(
                  url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
                  video_id, 'Resolving URL for %s' % role)
@@ -175,8 +190,8 @@ class BlipTVIE(InfoExtractor):
                      'url': real_url,
                      'format_id': role,
                      'format_note': media_type,
-                    'vcodec': media_content.get(blip('vcodec')) or 'none',
-                    'acodec': media_content.get(blip('acodec')),
+                    'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
+                    'acodec': media_content.get(_x('blip:acodec')),
                      'filesize': media_content.get('filesize'),
                      'width': int_or_none(media_content.get('width')),
                      'height': int_or_none(media_content.get('height')),
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py

index 65f6be62313dfc623cf1f9aa7adc52282872aade..dda583680a03ba3cb420beb74a99af2ec60cbc83 100644 (file)
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
  from ..compat import (
      compat_urllib_request,
      compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urllib_parse_urlparse,
  )
  from ..utils import (
@@ -88,7 +89,7 @@ class CeskaTelevizeIE(InfoExtractor):
          if playlist_url == 'error_region':
              raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
  
-        req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
+        req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
          req.add_header('Referer', url)
  
          playlist = self._download_json(req, video_id)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 82f5de2d898c4238839b8ada7c43aea6b166fd5f..5a2d0d995c75cbbc94709bb8669928de84f46a29 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -27,7 +27,9 @@ from ..utils import (
      bug_reports_message,
      clean_html,
      compiled_regex_type,
+    determine_ext,
      ExtractorError,
+    fix_xml_ampersands,
      float_or_none,
      int_or_none,
      RegexNotFoundError,
@@ -706,10 +708,23 @@ class InfoExtractor(object):
                                        'twitter card player')
  
      @staticmethod
-    def _form_hidden_inputs(html):
-        return dict(re.findall(
-            r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
-            html))
+    def _hidden_inputs(html):
+        return dict([
+            (input.group('name'), input.group('value')) for input in re.finditer(
+                r'''(?x)
+                    <input\s+
+                        type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
+                        name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
+                        (?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
+                        value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
+                ''', html)
+        ])
+
+    def _form_hidden_inputs(self, form_id, html):
+        form = self._search_regex(
+            r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+            html, '%s form' % form_id, group='form')
+        return self._hidden_inputs(form)
  
      def _sort_formats(self, formats, field_preference=None):
          if not formats:
@@ -821,10 +836,14 @@ class InfoExtractor(object):
          self.to_screen(msg)
          time.sleep(timeout)
  
-    def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
+    def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
+                             transform_source=lambda s: fix_xml_ampersands(s).strip()):
          manifest = self._download_xml(
              manifest_url, video_id, 'Downloading f4m manifest',
-            'Unable to download f4m manifest')
+            'Unable to download f4m manifest',
+            # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+            # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
+            transform_source=transform_source)
  
          formats = []
          manifest_version = '1.0'
@@ -834,8 +853,19 @@ class InfoExtractor(object):
              media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
          for i, media_el in enumerate(media_nodes):
              if manifest_version == '2.0':
-                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
-                                (media_el.attrib.get('href') or media_el.attrib.get('url')))
+                media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
+                if not media_url:
+                    continue
+                manifest_url = (
+                    media_url if media_url.startswith('http://') or media_url.startswith('https://')
+                    else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
+                # If media_url is itself a f4m manifest do the recursive extraction
+                # since bitrates in parent manifest (this one) and media_url manifest
+                # may differ leading to inability to resolve the format by requested
+                # bitrate in f4m downloader
+                if determine_ext(manifest_url) == 'f4m':
+                    formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id))
+                    continue
              tbr = int_or_none(media_el.attrib.get('bitrate'))
              formats.append({
                  'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py

index 73f1e22efdc5040d55042fdc1eb47a78c4e56468..d1b6d7366e847015af4581160c918d4a1ee6e11f 100644 (file)
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -12,6 +12,7 @@ from math import pow, sqrt, floor
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urllib_request,
  )
  from ..utils import (
@@ -254,7 +255,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
              video_upload_date = unified_strdate(video_upload_date)
          video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
  
-        playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
+        playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
          playerdata_req = compat_urllib_request.Request(playerdata_url)
          playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
          playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py

index 8852f0add793ebee0bf0efae4fd0db70d54b8210..1a41c0db181c43c9c35d24988b9fd118c248f399 100644 (file)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -53,6 +53,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                  'uploader': 'IGN',
                  'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
                  'upload_date': '20150306',
+                'duration': 74,
              }
          },
          # Vevo video
@@ -164,6 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
              'thumbnail': info['thumbnail_url'],
              'age_limit': age_limit,
              'view_count': view_count,
+            'duration': info['duration']
          }
  
      def _get_subtitles(self, video_id, webpage):
diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py

index 8049779b0a31049f704bae256a3752a9a22ad789..263532cc6e66a94c79670caa5e1600444ce909da 100644 (file)
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@@ -3,42 +3,47 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..utils import unified_strdate
  
  
  class DFBIE(InfoExtractor):
      IE_NAME = 'tv.dfb.de'
-    _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)'
  
      _TEST = {
-        'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
+        'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
          # The md5 is different each time
          'info_dict': {
-            'id': '9070',
+            'id': '11633',
+            'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
              'ext': 'flv',
-            'title': 'Highlights des Empfangs in Berlin',
-            'upload_date': '20140716',
+            'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
+            'upload_date': '20150714',
          },
      }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
  
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)
          player_info = self._download_xml(
              'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
-            video_id)
+            display_id)
          video_info = player_info.find('video')
  
-        f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
+        f4m_info = self._download_xml(
+            self._proto_relative_url(video_info.find('url').text.strip()), display_id)
          token_el = f4m_info.find('token')
          manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
+        formats = self._extract_f4m_formats(manifest_url, display_id)
  
          return {
              'id': video_id,
+            'display_id': display_id,
              'title': video_info.find('title').text,
-            'url': manifest_url,
-            'ext': 'flv',
              'thumbnail': self._og_search_thumbnail(webpage),
-            'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
+            'upload_date': unified_strdate(video_info.find('time_date').text),
+            'formats': formats,
          }
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py

index ca41a3abf324a9c2237c02047f7384f5ecc9cff6..38e6597c80f203b30a90a13c92027a4a5a305bd7 100644 (file)
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -23,8 +23,23 @@ class DramaFeverBaseIE(InfoExtractor):
      _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
      _NETRC_MACHINE = 'dramafever'
  
+    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
+
+    _consumer_secret = None
+
+    def _get_consumer_secret(self):
+        mainjs = self._download_webpage(
+            'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
+            None, 'Downloading main.js', fatal=False)
+        if not mainjs:
+            return self._CONSUMER_SECRET
+        return self._search_regex(
+            r"var\s+cs\s*=\s*'([^']+)'", mainjs,
+            'consumer secret', default=self._CONSUMER_SECRET)
+
      def _real_initialize(self):
          self._login()
+        self._consumer_secret = self._get_consumer_secret()
  
      def _login(self):
          (username, password) = self._get_login_info()
@@ -119,6 +134,23 @@ class DramaFeverIE(DramaFeverBaseIE):
                  'url': href,
              }]
  
+        series_id, episode_number = video_id.split('.')
+        episode_info = self._download_json(
+            # We only need a single episode info, so restricting page size to one episode
+            # and dealing with page number as with episode number
+            r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
+            % (self._consumer_secret, series_id, episode_number),
+            video_id, 'Downloading episode info JSON', fatal=False)
+        if episode_info:
+            value = episode_info.get('value')
+            if value:
+                subfile = value[0].get('subfile') or value[0].get('new_subfile')
+                if subfile and subfile != 'http://www.dramafever.com/st/':
+                    subtitles.setdefault('English', []).append({
+                        'ext': 'srt',
+                        'url': subfile,
+                    })
+
          return {
              'id': video_id,
              'title': title,
@@ -152,27 +184,14 @@ class DramaFeverSeriesIE(DramaFeverBaseIE):
          'playlist_count': 20,
      }]
  
-    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
      _PAGE_SIZE = 60  # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
  
-    def _get_consumer_secret(self, video_id):
-        mainjs = self._download_webpage(
-            'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
-            video_id, 'Downloading main.js', fatal=False)
-        if not mainjs:
-            return self._CONSUMER_SECRET
-        return self._search_regex(
-            r"var\s+cs\s*=\s*'([^']+)'", mainjs,
-            'consumer secret', default=self._CONSUMER_SECRET)
-
      def _real_extract(self, url):
          series_id = self._match_id(url)
  
-        consumer_secret = self._get_consumer_secret(series_id)
-
          series = self._download_json(
              'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
-            % (consumer_secret, series_id),
+            % (self._consumer_secret, series_id),
              series_id, 'Downloading series JSON')['series'][series_id]
  
          title = clean_html(series['name'])
@@ -182,7 +201,7 @@ class DramaFeverSeriesIE(DramaFeverBaseIE):
          for page_num in itertools.count(1):
              episodes = self._download_json(
                  'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
-                % (consumer_secret, series_id, self._PAGE_SIZE, page_num),
+                % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
                  series_id, 'Downloading episodes JSON page #%d' % page_num)
              for episode in episodes.get('value', []):
                  episode_url = episode.get('episode_url')
diff --git a/youtube_dl/extractor/ehow.py b/youtube_dl/extractor/ehow.py

index 9cb1bf301b9ae3e327e4831bdb8a7d2437b43803..b1cd4f5d4e6fe1dbaaf6ec230aad75947887301d 100644 (file)
--- a/youtube_dl/extractor/ehow.py
+++ b/youtube_dl/extractor/ehow.py
@@ -1,9 +1,7 @@
  from __future__ import unicode_literals
  
-from ..compat import (
-    compat_urllib_parse,
-)
  from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
  
  
  class EHowIE(InfoExtractor):
@@ -26,7 +24,7 @@ class EHowIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
          video_url = self._search_regex(
              r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
-        final_url = compat_urllib_parse.unquote(video_url)
+        final_url = compat_urllib_parse_unquote(video_url)
          uploader = self._html_search_meta('uploader', webpage)
          title = self._og_search_title(webpage).replace(' | eHow', '')
  
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py

index 82dc27bc6ff3ed2edd3b318f3ed3d14e360ef22d..e17bb9aeac51e2e10e2b68b4391d3022af35bcd5 100644 (file)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -9,7 +9,7 @@ from ..compat import (
      compat_http_client,
      compat_str,
      compat_urllib_error,
-    compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urllib_request,
  )
  from ..utils import (
@@ -136,7 +136,7 @@ class FacebookIE(InfoExtractor):
              else:
                  raise ExtractorError('Cannot parse data')
          data = dict(json.loads(m.group(1)))
-        params_raw = compat_urllib_parse.unquote(data['params'])
+        params_raw = compat_urllib_parse_unquote(data['params'])
          params = json.loads(params_raw)
          video_data = params['video_data'][0]
  
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py

index 2d33fa7f50d203a4522229d319467595881b138a..b3f1bafcc37ee98f1c5b89a644909f3ee0a32049 100644 (file)
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -5,7 +5,7 @@ import json
  
  from .common import InfoExtractor
  from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urlparse,
  )
  from ..utils import (
@@ -75,7 +75,7 @@ class GameSpotIE(InfoExtractor):
          return {
              'id': data_video['guid'],
              'display_id': page_id,
-            'title': compat_urllib_parse.unquote(data_video['title']),
+            'title': compat_urllib_parse_unquote(data_video['title']),
              'formats': formats,
              'description': self._html_search_meta('description', webpage),
              'thumbnail': self._og_search_thumbnail(webpage),
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 392ad36486c8e953f4d3bed341353a3b5d11e65a..a62287e50d34f36840aab34ad3cb3988f85b0193 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -8,7 +8,6 @@ import re
  from .common import InfoExtractor
  from .youtube import YoutubeIE
  from ..compat import (
-    compat_urllib_parse,
      compat_urllib_parse_unquote,
      compat_urllib_request,
      compat_urlparse,
@@ -1115,7 +1114,7 @@ class GenericIE(InfoExtractor):
          # Sometimes embedded video player is hidden behind percent encoding
          # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
          # Unescaping the whole page allows to handle those cases in a generic way
-        webpage = compat_urllib_parse.unquote(webpage)
+        webpage = compat_urllib_parse_unquote(webpage)
  
          # it's tempting to parse this further, but you would
          # have to take into account all the variations like
@@ -1369,7 +1368,7 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group('url'))
          mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
          if mobj is not None:
-            return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
+            return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
  
          # Look for funnyordie embed
          matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@@ -1682,7 +1681,7 @@ class GenericIE(InfoExtractor):
          entries = []
          for video_url in found:
              video_url = compat_urlparse.urljoin(url, video_url)
-            video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
+            video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
  
              # Sometimes, jwplayer extraction will result in a YouTube URL
              if YoutubeIE.suitable(video_url):
diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py

index aabf07a20677c6e940d609565e44dfd330512cd7..f006f0cb105dc9d7b0c1f495dbcd4c840597858a 100644 (file)
--- a/youtube_dl/extractor/gorillavid.py
+++ b/youtube_dl/extractor/gorillavid.py
@@ -78,7 +78,7 @@ class GorillaVidIE(InfoExtractor):
          if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
              raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
  
          if fields['op'] == 'download1':
              countdown = int_or_none(self._search_regex(
diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py

index 63f579592cf4d9c78c00ef30176aceade4a50594..a3154cfdeccf9b4c18cf5a5b01f7944243fb1509 100644 (file)
--- a/youtube_dl/extractor/hostingbulk.py
+++ b/youtube_dl/extractor/hostingbulk.py
@@ -58,7 +58,7 @@ class HostingBulkIE(InfoExtractor):
              r'<img src="([^"]+)".+?class="pic"',
              webpage, 'thumbnail', fatal=False)
  
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
  
          request = compat_urllib_request.Request(url, urlencode_postdata(fields))
          request.add_header('Content-type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py

index e9733912132798d99be18bb935dcd3c3b190525d..663e6632a194d8ee271a0c031a921d7eed139005 100644 (file)
--- a/youtube_dl/extractor/howstuffworks.py
+++ b/youtube_dl/extractor/howstuffworks.py
@@ -10,7 +10,7 @@ from ..utils import (
  
  
  class HowStuffWorksIE(InfoExtractor):
-    _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
+    _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
      _TESTS = [
          {
              'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
@@ -46,6 +46,10 @@ class HowStuffWorksIE(InfoExtractor):
                  'thumbnail': 're:^https?://.*\.jpg$',
              },
          },
+        {
+            'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
+            'only_matching': True,
+        }
      ]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py

index 91a1b3ccb70d41027d7ecfa41f1b606ead8f056a..71cfd12c56549d0be540c9daee6a2732959039de 100644 (file)
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -4,7 +4,7 @@ import base64
  
  from .common import InfoExtractor
  from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urlparse,
  )
  
@@ -39,7 +39,7 @@ class InfoQIE(InfoExtractor):
          # Extract video URL
          encoded_id = self._search_regex(
              r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
-        real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
+        real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
          playpath = 'mp4:' + real_id
  
          video_filename = playpath.split('/')[-1]
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py

index d0720ff561c16e8c0816c5ff7ab333e54c297dbc..1df084d87ae4c712d9bcfa1aac6d6367641287b5 100644 (file)
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -8,9 +8,9 @@ from .common import InfoExtractor
  
  
  class JeuxVideoIE(InfoExtractor):
-    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
+    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
          'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
          'info_dict': {
@@ -19,7 +19,10 @@ class JeuxVideoIE(InfoExtractor):
              'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
              'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
          },
-    }
+    }, {
+        'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py

index e3b43ff8dbfec5e065aa069a0fb140dcfb5822c9..06daf5a89ce3ffde4d71d7dc8ceee9441840b72b 100644 (file)
--- a/youtube_dl/extractor/karaoketv.py
+++ b/youtube_dl/extractor/karaoketv.py
@@ -2,7 +2,7 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote_plus
  from ..utils import (
      js_to_json,
  )
@@ -24,7 +24,7 @@ class KaraoketvIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          page_video_url = self._og_search_video_url(webpage, video_id)
-        config_json = compat_urllib_parse.unquote_plus(self._search_regex(
+        config_json = compat_urllib_parse_unquote_plus(self._search_regex(
              r'config=(.*)', page_video_url, 'configuration'))
  
          urls_info_json = self._download_json(
diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py

index 0b85a59d1c644d7d04e573aae0bdd03ebd4f6c80..92511a671ae300287fe6eb57b91b9c708dba45c5 100644 (file)
--- a/youtube_dl/extractor/malemotion.py
+++ b/youtube_dl/extractor/malemotion.py
@@ -2,9 +2,7 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
  
  
  class MalemotionIE(InfoExtractor):
@@ -24,7 +22,7 @@ class MalemotionIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        video_url = compat_urllib_parse.unquote(self._search_regex(
+        video_url = compat_urllib_parse_unquote(self._search_regex(
              r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
          video_title = self._html_search_regex(
              r'<title>(.*?)</title', webpage, 'title')
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py

index 8bc333b0277e27e6fd8f3d4f11b3c9c7eabdd7d7..6e2e73a5162f10ea5818b636da579c932b4f2e7d 100644 (file)
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -6,6 +6,7 @@ from .common import InfoExtractor
  from ..compat import (
      compat_parse_qs,
      compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urllib_request,
  )
  from ..utils import (
@@ -155,7 +156,7 @@ class MetacafeIE(InfoExtractor):
          video_url = None
          mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
          if mobj is not None:
-            mediaURL = compat_urllib_parse.unquote(mobj.group(1))
+            mediaURL = compat_urllib_parse_unquote(mobj.group(1))
              video_ext = mediaURL[-3:]
  
              # Extract gdaKey if available
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py

index 7091f3335e8223ea0a089ba7cff127a983b12d7d..852d722664a3d63aafed0f8246949335b4150c09 100644 (file)
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -5,6 +5,7 @@ import json
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urlparse,
  )
  from ..utils import (
@@ -48,7 +49,7 @@ class MiTeleIE(InfoExtractor):
              domain = 'http://' + domain
          info_url = compat_urlparse.urljoin(
              domain,
-            compat_urllib_parse.unquote(embed_data['flashvars']['host'])
+            compat_urllib_parse_unquote(embed_data['flashvars']['host'])
          )
          info_el = self._download_xml(info_url, episode).find('./video/info')
  
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py

index 425a4ccf16fff96b1bface874748b93762d2194b..d47aecedae388829babaed8642611c5a6b7d29fe 100644 (file)
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -3,9 +3,7 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
  from ..utils import (
      ExtractorError,
      HEADRequest,
@@ -60,7 +58,7 @@ class MixcloudIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          uploader = mobj.group(1)
          cloudcast_name = mobj.group(2)
-        track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
+        track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
  
          webpage = self._download_webpage(url, track_id)
  
diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py

index 2cec12d35ec1797dd7612ad49c5739e87f77e6c9..9bf99a54a98c4838c2b878db3ec165c867602110 100644 (file)
--- a/youtube_dl/extractor/mofosex.py
+++ b/youtube_dl/extractor/mofosex.py
@@ -5,9 +5,9 @@ import re
  
  from .common import InfoExtractor
  from ..compat import (
+    compat_urllib_parse_unquote,
      compat_urllib_parse_urlparse,
      compat_urllib_request,
-    compat_urllib_parse,
  )
  
  
@@ -34,7 +34,7 @@ class MofosexIE(InfoExtractor):
          webpage = self._download_webpage(req, video_id)
  
          video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
+        video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
          path = compat_urllib_parse_urlparse(video_url).path
          extension = os.path.splitext(path)[1][1:]
          format = path.split('/')[5].split('_')[:2]
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py

index 5b9b9fbcd0844897d6d63305ed00729e70c7f4fb..4557a2b13b3e47a75242ebd4a5c095bf17cbaacf 100644 (file)
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -35,7 +35,8 @@ class MySpassIE(InfoExtractor):
  
          # get metadata
          metadata_url = META_DATA_URL_TEMPLATE % video_id
-        metadata = self._download_xml(metadata_url, video_id)
+        metadata = self._download_xml(
+            metadata_url, video_id, transform_source=lambda s: s.strip())
  
          # extract values from metadata
          url_flv_el = metadata.find('url_flv')
diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py

index 5e754fcffb6403cbd359b9b358ad3879ef279f53..c96f472a39e569c7dfb88682d36fad9ed6ce2c10 100644 (file)
--- a/youtube_dl/extractor/myvideo.py
+++ b/youtube_dl/extractor/myvideo.py
@@ -10,6 +10,7 @@ from .common import InfoExtractor
  from ..compat import (
      compat_ord,
      compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urllib_request,
  )
  from ..utils import (
@@ -107,7 +108,7 @@ class MyVideoIE(InfoExtractor):
              if not a == '_encxml':
                  params[a] = b
              else:
-                encxml = compat_urllib_parse.unquote(b)
+                encxml = compat_urllib_parse_unquote(b)
          if not params.get('domain'):
              params['domain'] = 'www.myvideo.de'
          xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
@@ -135,7 +136,7 @@ class MyVideoIE(InfoExtractor):
          video_url = None
          mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
          if mobj:
-            video_url = compat_urllib_parse.unquote(mobj.group(1))
+            video_url = compat_urllib_parse_unquote(mobj.group(1))
              if 'myvideo2flash' in video_url:
                  self.report_warning(
                      'Rewriting URL to use unencrypted rtmp:// ...',
@@ -147,10 +148,10 @@ class MyVideoIE(InfoExtractor):
              mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
              if mobj is None:
                  raise ExtractorError('unable to extract url')
-            video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
+            video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
  
          video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
-        video_file = compat_urllib_parse.unquote(video_file)
+        video_file = compat_urllib_parse_unquote(video_file)
  
          if not video_file.endswith('f4m'):
              ppath, prefix = video_file.split('.')
@@ -159,7 +160,7 @@ class MyVideoIE(InfoExtractor):
              video_playpath = ''
  
          video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
-        video_swfobj = compat_urllib_parse.unquote(video_swfobj)
+        video_swfobj = compat_urllib_parse_unquote(video_swfobj)
  
          video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
                                                webpage, 'title')
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py

index 173e46cd8b78dfa0dcdf33b6d3c280bf40a70681..0b5ff47600559e50a2282b184b23d3fc7263d46d 100644 (file)
--- a/youtube_dl/extractor/nowtv.py
+++ b/youtube_dl/extractor/nowtv.py
@@ -133,7 +133,7 @@ class NowTVIE(InfoExtractor):
          station = mobj.group('station')
  
          info = self._download_json(
-            'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id,
+            'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
              display_id)
  
          video_id = compat_str(info['id'])
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py

index 62d12b7a66c7722c0b51d15092a26aa9e1f03416..0c2d02c108ed425cf4688c34aa2a826ddaa400b4 100644 (file)
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,5 +1,7 @@
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..utils import (
      fix_xml_ampersands,
@@ -7,7 +9,6 @@ from ..utils import (
      qualities,
      strip_jsonp,
      unified_strdate,
-    url_basename,
  )
  
  
@@ -37,8 +38,21 @@ class NPOBaseIE(InfoExtractor):
  
  
  class NPOIE(NPOBaseIE):
-    IE_NAME = 'npo.nl'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
+    IE_NAME = 'npo'
+    IE_DESC = 'npo.nl and ntr.nl'
+    _VALID_URL = r'''(?x)
+                    (?:
+                        npo:|
+                        https?://
+                            (?:www\.)?
+                            (?:
+                                npo\.nl/(?!live|radio)(?:[^/]+/){2}|
+                                ntr\.nl/(?:[^/]+/){2,}|
+                                omroepwnl\.nl/video/fragment/[^/]+__
+                            )
+                        )
+                        (?P<id>[^/?#]+)
+                '''
  
      _TESTS = [
          {
@@ -58,7 +72,7 @@ class NPOIE(NPOBaseIE):
              'info_dict': {
                  'id': 'VARA_101191800',
                  'ext': 'm4v',
-                'title': 'De Mega Mike & Mega Thomas show',
+                'title': 'De Mega Mike & Mega Thomas show: The best of.',
                  'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
                  'upload_date': '20090227',
                  'duration': 2400,
@@ -70,8 +84,8 @@ class NPOIE(NPOBaseIE):
              'info_dict': {
                  'id': 'VPWON_1169289',
                  'ext': 'm4v',
-                'title': 'Tegenlicht',
-                'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+                'title': 'Tegenlicht: De toekomst komt uit Afrika',
+                'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
                  'upload_date': '20130225',
                  'duration': 3000,
              },
@@ -100,6 +114,30 @@ class NPOIE(NPOBaseIE):
                  'title': 'Hoe gaat Europa verder na Parijs?',
              },
          },
+        {
+            'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
+            'md5': '01c6a2841675995da1f0cf776f03a9c3',
+            'info_dict': {
+                'id': 'VPWON_1233944',
+                'ext': 'm4v',
+                'title': 'Aap, poot, pies',
+                'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
+                'upload_date': '20150508',
+                'duration': 599,
+            },
+        },
+        {
+            'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
+            'md5': 'd30cd8417b8b9bca1fdff27428860d08',
+            'info_dict': {
+                'id': 'POW_00996502',
+                'ext': 'm4v',
+                'title': '''"Dit is wel een 'landslide'..."''',
+                'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
+                'upload_date': '20150508',
+                'duration': 462,
+            },
+        }
      ]
  
      def _real_extract(self, url):
@@ -114,6 +152,18 @@ class NPOIE(NPOBaseIE):
              transform_source=strip_jsonp,
          )
  
+        # For some videos actual video id (prid) is different (e.g. for
+        # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
+        # video id is POMS_WNL_853698 but prid is POW_00996502)
+        video_id = metadata.get('prid') or video_id
+
+        # titel is too generic in some cases so utilize aflevering_titel as well
+        # when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html)
+        title = metadata['titel']
+        sub_title = metadata.get('aflevering_titel')
+        if sub_title and sub_title != title:
+            title += ': %s' % sub_title
+
          token = self._get_token(video_id)
  
          formats = []
@@ -186,8 +236,8 @@ class NPOIE(NPOBaseIE):
  
          return {
              'id': video_id,
-            'title': metadata['titel'],
-            'description': metadata['info'],
+            'title': title,
+            'description': metadata.get('info'),
              'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
              'upload_date': unified_strdate(metadata.get('gidsdatum')),
              'duration': parse_duration(metadata.get('tijdsduur')),
@@ -356,9 +406,8 @@ class NPORadioFragmentIE(InfoExtractor):
          }
  
  
-class TegenlichtVproIE(NPOIE):
-    IE_NAME = 'tegenlicht.vpro.nl'
-    _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
+class VPROIE(NPOIE):
+    _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
  
      _TESTS = [
          {
@@ -367,17 +416,72 @@ class TegenlichtVproIE(NPOIE):
              'info_dict': {
                  'id': 'VPWON_1169289',
                  'ext': 'm4v',
-                'title': 'Tegenlicht',
-                'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+                'title': 'De toekomst komt uit Afrika',
+                'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
                  'upload_date': '20130225',
              },
          },
+        {
+            'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
+            'info_dict': {
+                'id': 'sergio-herman',
+                'title': 'Sergio Herman: Fucking perfect',
+            },
+            'playlist_count': 2,
+        },
+        {
+            # playlist with youtube embed
+            'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
+            'info_dict': {
+                'id': 'education-education',
+                'title': '2Doc',
+            },
+            'playlist_count': 2,
+        }
      ]
  
      def _real_extract(self, url):
-        name = url_basename(url)
-        webpage = self._download_webpage(url, name)
-        urn = self._html_search_meta('mediaurn', webpage)
-        info_page = self._download_json(
-            'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
-        return self._get_info(info_page['mid'])
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
+            for video_id in re.findall(r'data-media-id="([^"]+)"', webpage)
+        ]
+
+        playlist_title = self._search_regex(
+            r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>',
+            webpage, 'playlist title', default=None) or self._og_search_title(webpage)
+
+        return self.playlist_result(entries, playlist_id, playlist_title)
+
+
+class WNLIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
+
+    _TEST = {
+        'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
+        'info_dict': {
+            'id': 'vandaag-de-dag-6-mei',
+            'title': 'Vandaag de Dag 6 mei',
+        },
+        'playlist_count': 4,
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('npo:%s' % video_id, 'NPO')
+            for video_id, part in re.findall(
+                r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage)
+        ]
+
+        playlist_title = self._html_search_regex(
+            r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>',
+            webpage, 'playlist title')
+
+        return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py

index 6c7149fe3859732978d34356f8c18aca14e74f63..215ffe87b55db126300f0c18c98d9c5bfd920ed7 100644 (file)
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -2,7 +2,7 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote
  from ..utils import (
      unified_strdate,
      int_or_none,
@@ -62,7 +62,7 @@ class OdnoklassnikiIE(InfoExtractor):
              metadata = self._parse_json(metadata, video_id)
          else:
              metadata = self._download_json(
-                compat_urllib_parse.unquote(flashvars['metadataUrl']),
+                compat_urllib_parse_unquote(flashvars['metadataUrl']),
                  video_id, 'Downloading metadata JSON')
  
          movie = metadata['movie']
diff --git a/youtube_dl/extractor/openfilm.py b/youtube_dl/extractor/openfilm.py

index 2249657eb1b796970c155f4baf3445fef9b60681..d2ceedd018fe1f0237aefa17330d5dbe3d94f68c 100644 (file)
--- a/youtube_dl/extractor/openfilm.py
+++ b/youtube_dl/extractor/openfilm.py
@@ -3,9 +3,9 @@ from __future__ import unicode_literals
  import json
  
  from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
  from ..utils import (
      parse_iso8601,
-    compat_urllib_parse,
      parse_age_limit,
      int_or_none,
  )
@@ -37,7 +37,7 @@ class OpenFilmIE(InfoExtractor):
  
          webpage = self._download_webpage(url, display_id)
  
-        player = compat_urllib_parse.unquote_plus(
+        player = compat_urllib_parse_unquote_plus(
              self._og_search_video_url(webpage))
  
          video = json.loads(self._search_regex(
diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py

index c66db3cdc84e55a6a3a904ddf3ff7c09aaac9573..788411ccc18082f59588d40704900c26dba1fe21 100644 (file)
--- a/youtube_dl/extractor/photobucket.py
+++ b/youtube_dl/extractor/photobucket.py
@@ -4,7 +4,7 @@ import json
  import re
  
  from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote
  
  
  class PhotobucketIE(InfoExtractor):
@@ -34,7 +34,7 @@ class PhotobucketIE(InfoExtractor):
          info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
                                         webpage, 'info json')
          info = json.loads(info_json)
-        url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
+        url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
          return {
              'id': video_id,
              'url': url,
diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py

index 9fe1524f2505b16b13b020fa89b791bd504b3ed4..8a1c296dda8b57611a0e464387be43ab0fc9a370 100644 (file)
--- a/youtube_dl/extractor/played.py
+++ b/youtube_dl/extractor/played.py
@@ -38,7 +38,7 @@ class PlayedIE(InfoExtractor):
          if m_error:
              raise ExtractorError(m_error.group('msg'), expected=True)
  
-        data = self._form_hidden_inputs(orig_webpage)
+        data = self._hidden_inputs(orig_webpage)
  
          self._sleep(2, video_id)
  
diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py

index c3e667e9e72ea0aaf6e5db731f630816e6a2861d..2eb4fd96dcbc071c1c2ecfb596ab20c4526018bd 100644 (file)
--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dl/extractor/playvid.py
@@ -4,7 +4,8 @@ import re
  
  from .common import InfoExtractor
  from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_unquote,
+    compat_urllib_parse_unquote_plus,
  )
  from ..utils import (
      clean_html,
@@ -44,7 +45,7 @@ class PlayvidIE(InfoExtractor):
          flashvars = self._html_search_regex(
              r'flashvars="(.+?)"', webpage, 'flashvars')
  
-        infos = compat_urllib_parse.unquote(flashvars).split(r'&')
+        infos = compat_urllib_parse_unquote(flashvars).split(r'&')
          for info in infos:
              videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
              if videovars_match:
@@ -52,7 +53,7 @@ class PlayvidIE(InfoExtractor):
                  val = videovars_match.group(2)
  
                  if key == 'title':
-                    video_title = compat_urllib_parse.unquote_plus(val)
+                    video_title = compat_urllib_parse_unquote_plus(val)
                  if key == 'duration':
                      try:
                          duration = int(val)
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

index 8172bc9976755f7cc4361e1f6dba8d9d7b53d5fd..0b7886840fbced3d9fa6fb219050f40ac709c080 100644 (file)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -5,7 +5,8 @@ import re
  
  from .common import InfoExtractor
  from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_unquote,
+    compat_urllib_parse_unquote_plus,
      compat_urllib_parse_urlparse,
      compat_urllib_request,
  )
@@ -69,7 +70,7 @@ class PornHubIE(InfoExtractor):
              webpage, 'uploader', fatal=False)
          thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
          if thumbnail:
-            thumbnail = compat_urllib_parse.unquote(thumbnail)
+            thumbnail = compat_urllib_parse_unquote(thumbnail)
  
          view_count = self._extract_count(
              r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
@@ -80,9 +81,9 @@ class PornHubIE(InfoExtractor):
          comment_count = self._extract_count(
              r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
  
-        video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
+        video_urls = list(map(compat_urllib_parse_unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
          if webpage.find('"encrypted":true') != -1:
-            password = compat_urllib_parse.unquote_plus(
+            password = compat_urllib_parse_unquote_plus(
                  self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
              video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
  
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py

index 9aa0c862a4643873fddc0e560c35030c5b223aeb..304359dc5b189b8ce27c967c2d369b26db334532 100644 (file)
--- a/youtube_dl/extractor/primesharetv.py
+++ b/youtube_dl/extractor/primesharetv.py
@@ -29,7 +29,7 @@ class PrimeShareTVIE(InfoExtractor):
          if '>File not exist<' in webpage:
              raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
  
          headers = {
              'Referer': url,
diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py

index 81a63c7fc40b21135b2b307ecb075967448a2d44..8190ed6766ce5c878fc82700524ec6d012d70a57 100644 (file)
--- a/youtube_dl/extractor/promptfile.py
+++ b/youtube_dl/extractor/promptfile.py
@@ -35,7 +35,7 @@ class PromptFileIE(InfoExtractor):
              raise ExtractorError('Video %s does not exist' % video_id,
                                   expected=True)
  
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
          post = compat_urllib_parse.urlencode(fields)
          req = compat_urllib_request.Request(url, post)
          req.add_header('Content-type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py

index 536a42dc88a4e17bbd039289508521d1ea13e282..fec008ce7687a0360d2e54e3130425245f30b4fc 100644 (file)
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -9,8 +9,9 @@ from ..compat import (
      compat_urllib_parse,
  )
  from ..utils import (
-    unified_strdate,
+    determine_ext,
      int_or_none,
+    unified_strdate,
  )
  
  
@@ -21,6 +22,11 @@ class ProSiebenSat1IE(InfoExtractor):
  
      _TESTS = [
          {
+            # Tests changes introduced in https://github.com/rg3/youtube-dl/pull/6242
+            # in response to fixing https://github.com/rg3/youtube-dl/issues/6215:
+            # - malformed f4m manifest support
+            # - proper handling of URLs starting with `https?://` in 2.0 manifests
+            # - recursive child f4m manifests extraction
              'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
              'info_dict': {
                  'id': '2104602',
@@ -208,7 +214,7 @@ class ProSiebenSat1IE(InfoExtractor):
          clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
  
          access_token = 'prosieben'
-        client_name = 'kolibri-1.12.6'
+        client_name = 'kolibri-2.0.19-splec4'
          client_location = url
  
          videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
@@ -275,8 +281,9 @@ class ProSiebenSat1IE(InfoExtractor):
  
          for source in urls_sources:
              protocol = source['protocol']
+            source_url = source['url']
              if protocol == 'rtmp' or protocol == 'rtmpe':
-                mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source['url'])
+                mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
                  if not mobj:
                      continue
                  path = mobj.group('path')
@@ -293,9 +300,11 @@ class ProSiebenSat1IE(InfoExtractor):
                      'ext': 'mp4',
                      'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
                  })
+            elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+                formats.extend(self._extract_f4m_formats(source_url, clip_id))
              else:
                  formats.append({
-                    'url': source['url'],
+                    'url': source_url,
                      'vbr': fix_bitrate(source['bitrate']),
                  })
  
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py

index 6e2b94e7d5b7f30076b910d12e7bf1b371062dc0..a07677686a4ecc2923b310c3aeeeaab610bb0868 100644 (file)
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -34,7 +34,7 @@ class SharedIE(InfoExtractor):
              raise ExtractorError(
                  'Video %s does not exist' % video_id, expected=True)
  
-        download_form = self._form_hidden_inputs(webpage)
+        download_form = self._hidden_inputs(webpage)
          request = compat_urllib_request.Request(
              url, compat_urllib_parse.urlencode(download_form))
          request.add_header('Content-Type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py

index bff75d6b2945584e0193b50ff8915b91fec26f1f..5fa6faf18b738aa32e384972bf65ad56188ad9b4 100644 (file)
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -4,7 +4,7 @@ import re
  
  from .common import InfoExtractor
  from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urllib_parse_urlparse,
      compat_urllib_request,
  )
@@ -68,7 +68,7 @@ class SpankwireIE(InfoExtractor):
              webpage, 'comment count', fatal=False))
  
          video_urls = list(map(
-            compat_urllib_parse.unquote,
+            compat_urllib_parse_unquote,
              re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
          if webpage.find('flashvars\.encrypted = "true"') != -1:
              password = self._search_regex(
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py

index af2b798fb41467ddb7c6db1a82f2a6d7e940ec30..92b6dc1b820eef8e6a7a0b42bd48bfc5d2422e38 100644 (file)
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -59,7 +59,7 @@ class TwitchBaseIE(InfoExtractor):
          login_page = self._download_webpage(
              self._LOGIN_URL, None, 'Downloading login page')
  
-        login_form = self._form_hidden_inputs(login_page)
+        login_form = self._hidden_inputs(login_page)
  
          login_form.update({
              'login': username.encode('utf-8'),
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py

index 4667ed83b71f4aec5f081741834e2c9cca010e82..e2bab52fef3451596ec1cf0de19e3131e378b5dd 100644 (file)
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@@ -15,7 +15,8 @@ from ..utils import (
  class UdemyIE(InfoExtractor):
      IE_NAME = 'udemy'
      _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
-    _LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
+    _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
+    _ORIGIN_URL = 'https://www.udemy.com'
      _NETRC_MACHINE = 'udemy'
  
      _TESTS = [{
@@ -74,29 +75,33 @@ class UdemyIE(InfoExtractor):
                  expected=True)
  
          login_popup = self._download_webpage(
-            'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None,
-            'Downloading login popup')
+            self._LOGIN_URL, None, 'Downloading login popup')
  
          if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
              return
  
-        csrf = self._html_search_regex(
-            r'<input type="hidden" name="csrf" value="(.+?)"',
-            login_popup, 'csrf token')
+        login_form = self._form_hidden_inputs('login-form', login_popup)
+
+        login_form.update({
+            'email': username.encode('utf-8'),
+            'password': password.encode('utf-8'),
+        })
  
-        login_form = {
-            'email': username,
-            'password': password,
-            'csrf': csrf,
-            'displayType': 'json',
-            'isSubmitted': '1',
-        }
          request = compat_urllib_request.Request(
              self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
-        response = self._download_json(
+        request.add_header('Referer', self._ORIGIN_URL)
+        request.add_header('Origin', self._ORIGIN_URL)
+
+        response = self._download_webpage(
              request, None, 'Logging in as %s' % username)
  
-        if 'returnUrl' not in response:
+        if all(logout_pattern not in response
+               for logout_pattern in ['href="https://www.udemy.com/user/logout/', '>Logout<']):
+            error = self._html_search_regex(
+                r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
+                response, 'error message', default=None)
+            if error:
+                raise ExtractorError('Unable to login: %s' % error, expected=True)
              raise ExtractorError('Unable to log in')
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py

index 346edf485998e973f4a56f8dee3213093f90aae1..0d8d832cc0890a8cb99c8388b1b9fc1964396d23 100644 (file)
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@@ -5,6 +5,7 @@ import json
  
  from .common import InfoExtractor
  from ..compat import (
+    compat_urllib_parse_unquote,
      compat_urlparse,
  )
  from ..utils import (
@@ -76,7 +77,7 @@ class VeeHDIE(InfoExtractor):
  
          if config_json:
              config = json.loads(config_json)
-            video_url = compat_urlparse.unquote(config['clip']['url'])
+            video_url = compat_urllib_parse_unquote(config['clip']['url'])
  
          if not video_url:
              video_url = self._html_search_regex(
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index d63c03183ce98c9c306677fbf92e3bd5921cb2ea..10d6745af703e00d6962d3e14c8b01f2419ad955 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -452,7 +452,7 @@ class VimeoChannelIE(InfoExtractor):
          password = self._downloader.params.get('videopassword', None)
          if password is None:
              raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
-        fields = self._form_hidden_inputs(login_form)
+        fields = self._hidden_inputs(login_form)
          token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
          fields['token'] = token
          fields['password'] = password
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

index 8ac3aeac0ca5a4ebf41654a88953c5b975cfef4a..8f677cae3a503ef34230cf205c12378e3c2ecb66 100644 (file)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -168,7 +168,7 @@ class VKIE(InfoExtractor):
          login_page = self._download_webpage(
              'https://vk.com', None, 'Downloading login page')
  
-        login_form = self._form_hidden_inputs(login_page)
+        login_form = self._hidden_inputs(login_page)
  
          login_form.update({
              'email': username.encode('cp1251'),
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py

index 4804692bfda542c0e4175a67d230ad89698a3d33..ccf1928b5d323f277b4e8a47bd4d008e821b147c 100644 (file)
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -26,7 +26,7 @@ class VodlockerIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
  
          if fields['op'] == 'download1':
              self._sleep(3, video_id)  # they do detect when requests happen too fast!
diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py

index 80c48c37d32c0849e689d626811ee34c5b414ee0..4ff99e5ca37fb8f4f0b663cc99761c31e75f1cf4 100644 (file)
--- a/youtube_dl/extractor/xbef.py
+++ b/youtube_dl/extractor/xbef.py
@@ -1,9 +1,7 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
  
  
  class XBefIE(InfoExtractor):
@@ -30,7 +28,7 @@ class XBefIE(InfoExtractor):
          config_url_enc = self._download_webpage(
              'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
              note='Retrieving config URL')
-        config_url = compat_urllib_parse.unquote(config_url_enc)
+        config_url = compat_urllib_parse_unquote(config_url_enc)
          config = self._download_xml(
              config_url, video_id, note='Retrieving config')
  
diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py

index 79ed6c744242bf132afd033ae35949cc1e2263b5..5a41f8ffa0c5a46a3d0431a6aac8e93ba8ca1cb9 100644 (file)
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@@ -2,9 +2,7 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_unquote
  
  
  class XNXXIE(InfoExtractor):
@@ -26,7 +24,7 @@ class XNXXIE(InfoExtractor):
  
          video_url = self._search_regex(r'flv_url=(.*?)&amp;',
                                         webpage, 'video URL')
-        video_url = compat_urllib_parse.unquote(video_url)
+        video_url = compat_urllib_parse_unquote(video_url)
  
          video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
                                                webpage, 'title')
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py

index 1644f53c876329f053406be3d3dc1aa463cddc1b..779e4f46a1dd5315c6a9be3dad09e65c07a205b2 100644 (file)
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -5,7 +5,7 @@ import re
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_request,
-    compat_urllib_parse,
+    compat_urllib_parse_unquote,
  )
  from ..utils import (
      parse_duration,
@@ -59,7 +59,7 @@ class XTubeIE(InfoExtractor):
          for format_id, video_url in re.findall(
                  r'flashvars\.quality_(.+?)\s*=\s*"([^"]+)"', webpage):
              fmt = {
-                'url': compat_urllib_parse.unquote(video_url),
+                'url': compat_urllib_parse_unquote(video_url),
                  'format_id': format_id,
              }
              m = re.search(r'^(?P<height>\d+)[pP]', format_id)
@@ -68,7 +68,7 @@ class XTubeIE(InfoExtractor):
              formats.append(fmt)
  
          if not formats:
-            video_url = compat_urllib_parse.unquote(self._search_regex(
+            video_url = compat_urllib_parse_unquote(self._search_regex(
                  r'flashvars\.video_url\s*=\s*"([^"]+)"',
                  webpage, 'video URL'))
              formats.append({'url': video_url})
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py

index d8415bed49ea8bab3bbc30f7136f31a69ac357c2..5dcf2fdd12f9140f0bd373fd5db41c93f4b18b38 100644 (file)
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -4,7 +4,7 @@ import re
  
  from .common import InfoExtractor
  from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_unquote,
      compat_urllib_request,
  )
  from ..utils import (
@@ -37,7 +37,7 @@ class XVideosIE(InfoExtractor):
          if mobj:
              raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
  
-        video_url = compat_urllib_parse.unquote(
+        video_url = compat_urllib_parse_unquote(
              self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
          video_title = self._html_search_regex(
              r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py

index 894678a23dac9d1b03e07f0cd9b2eecc7e690e18..869f3e8190ca0b751366a85f142a0b49fe294fa1 100644 (file)
--- a/youtube_dl/extractor/ynet.py
+++ b/youtube_dl/extractor/ynet.py
@@ -5,7 +5,7 @@ import re
  import json
  
  from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_unquote_plus
  
  
  class YnetIE(InfoExtractor):
@@ -34,7 +34,7 @@ class YnetIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage))
+        content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage))
          config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
          f4m_url = config['clip']['url']
          title = self._og_search_title(webpage)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 3c629d38a1c7cea5f0b45d600c41f9d9ff658873..e7f5c7861fdda48ff309dbfd96eae8b2b5dcefc0 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -17,6 +17,8 @@ from ..compat import (
      compat_chr,
      compat_parse_qs,
      compat_urllib_parse,
+    compat_urllib_parse_unquote,
+    compat_urllib_parse_unquote_plus,
      compat_urllib_request,
      compat_urlparse,
      compat_str,
@@ -865,7 +867,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          # Extract original video URL from URL with redirection, like age verification, using next_url parameter
          mobj = re.search(self._NEXT_URL_RE, url)
          if mobj:
-            url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+            url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
          video_id = self.extract_id(url)
  
          # Get video webpage
@@ -973,7 +975,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          # uploader
          if 'author' not in video_info:
              raise ExtractorError('Unable to extract uploader name')
-        video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
+        video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
  
          # uploader_id
          video_uploader_id = None
@@ -1000,7 +1002,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              self._downloader.report_warning('unable to extract video thumbnail')
              video_thumbnail = None
          else:   # don't panic if we can't find it
-            video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
+            video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
  
          # upload date
          upload_date = self._html_search_meta(
@@ -1062,7 +1064,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              self._downloader.report_warning('unable to extract video duration')
              video_duration = None
          else:
-            video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
+            video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
  
          # annotations
          video_annotations = None
@@ -1609,7 +1611,7 @@ class YoutubeSearchURLIE(InfoExtractor):
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        query = compat_urllib_parse.unquote_plus(mobj.group('query'))
+        query = compat_urllib_parse_unquote_plus(mobj.group('query'))
  
          webpage = self._download_webpage(url, query)
          result_code = self._search_regex(
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py

index 1ecce22e7eb149ed470eea6ac05b6022e640f3b1..1f723908be8d4ff0247affc5aed9ffc44e777602 100644 (file)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -265,7 +265,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
          # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
          if (new_path == path or
                  (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
-            self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
+            self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
              return [], information
  
          try:
author	Sergey M․ <dstftw@gmail.com>
	Fri, 17 Jul 2015 20:13:55 +0000 (02:13 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Fri, 17 Jul 2015 20:13:55 +0000 (02:13 +0600)
AUTHORS		patch \| blob \| history
README.md		patch \| blob \| history
test/test_compat.py		patch \| blob \| history
youtube_dl/compat.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/bet.py		patch \| blob \| history
youtube_dl/extractor/bliptv.py		patch \| blob \| history
youtube_dl/extractor/ceskatelevize.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/crunchyroll.py		patch \| blob \| history
youtube_dl/extractor/dailymotion.py		patch \| blob \| history
youtube_dl/extractor/dfb.py		patch \| blob \| history
youtube_dl/extractor/dramafever.py		patch \| blob \| history
youtube_dl/extractor/ehow.py		patch \| blob \| history
youtube_dl/extractor/facebook.py		patch \| blob \| history
youtube_dl/extractor/gamespot.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/gorillavid.py		patch \| blob \| history
youtube_dl/extractor/hostingbulk.py		patch \| blob \| history
youtube_dl/extractor/howstuffworks.py		patch \| blob \| history
youtube_dl/extractor/infoq.py		patch \| blob \| history
youtube_dl/extractor/jeuxvideo.py		patch \| blob \| history
youtube_dl/extractor/karaoketv.py		patch \| blob \| history
youtube_dl/extractor/malemotion.py		patch \| blob \| history
youtube_dl/extractor/metacafe.py		patch \| blob \| history
youtube_dl/extractor/mitele.py		patch \| blob \| history
youtube_dl/extractor/mixcloud.py		patch \| blob \| history
youtube_dl/extractor/mofosex.py		patch \| blob \| history
youtube_dl/extractor/myspass.py		patch \| blob \| history
youtube_dl/extractor/myvideo.py		patch \| blob \| history
youtube_dl/extractor/nowtv.py		patch \| blob \| history
youtube_dl/extractor/npo.py		patch \| blob \| history
youtube_dl/extractor/odnoklassniki.py		patch \| blob \| history
youtube_dl/extractor/openfilm.py		patch \| blob \| history
youtube_dl/extractor/photobucket.py		patch \| blob \| history
youtube_dl/extractor/played.py		patch \| blob \| history
youtube_dl/extractor/playvid.py		patch \| blob \| history
youtube_dl/extractor/pornhub.py		patch \| blob \| history
youtube_dl/extractor/primesharetv.py		patch \| blob \| history
youtube_dl/extractor/promptfile.py		patch \| blob \| history
youtube_dl/extractor/prosiebensat1.py		patch \| blob \| history
youtube_dl/extractor/shared.py		patch \| blob \| history
youtube_dl/extractor/spankwire.py		patch \| blob \| history
youtube_dl/extractor/twitch.py		patch \| blob \| history
youtube_dl/extractor/udemy.py		patch \| blob \| history
youtube_dl/extractor/veehd.py		patch \| blob \| history
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/vk.py		patch \| blob \| history
youtube_dl/extractor/vodlocker.py		patch \| blob \| history
youtube_dl/extractor/xbef.py		patch \| blob \| history
youtube_dl/extractor/xnxx.py		patch \| blob \| history
youtube_dl/extractor/xtube.py		patch \| blob \| history
youtube_dl/extractor/xvideos.py		patch \| blob \| history
youtube_dl/extractor/ynet.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/postprocessor/ffmpeg.py		patch \| blob \| history