Merge branch 'vlive' of https://github.com/ping/youtube-dl into ping-vlive
authorYen Chi Hsuan <yan12125@gmail.com>
Sat, 5 Sep 2015 07:24:09 +0000 (15:24 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Sat, 5 Sep 2015 07:24:09 +0000 (15:24 +0800)
53 files changed:
CONTRIBUTING.md
README.md
docs/supportedsites.md
test/test_subtitles.py
test/test_utils.py
youtube_dl/YoutubeDL.py
youtube_dl/downloader/external.py
youtube_dl/downloader/f4m.py
youtube_dl/downloader/hls.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/cinemassacre.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/dumpert.py
youtube_dl/extractor/eroprofile.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/folketinget.py
youtube_dl/extractor/foxnews.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/globo.py
youtube_dl/extractor/imgur.py
youtube_dl/extractor/kaltura.py
youtube_dl/extractor/kontrtube.py
youtube_dl/extractor/krasview.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/mailru.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/nowtv.py
youtube_dl/extractor/nowvideo.py
youtube_dl/extractor/odnoklassniki.py
youtube_dl/extractor/pluralsight.py [new file with mode: 0644]
youtube_dl/extractor/rtl2.py
youtube_dl/extractor/rtp.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/ruutu.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/screenwavemedia.py
youtube_dl/extractor/shared.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spankwire.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/tubitv.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/yandexmusic.py
youtube_dl/extractor/youku.py
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/postprocessor/common.py
youtube_dl/utils.py
youtube_dl/version.py

index 42333c4506d32c79e1dbb41ad93cb3bd6b433ca1..f8ab29631e1edad8c45ef3c6fbe9eaa24b5c6ea4 100644 (file)
@@ -125,7 +125,7 @@ If you want to add support for a new site, you can follow this quick list (assum
     ```
 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
-7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
 
index 25844eb6dd756e9f15566266d93d99b57bd3c76d..24bfe38a242f3e798a225710187cde8a56d4c157 100644 (file)
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ You can also use pip:
 
     sudo pip install youtube-dl
 
-Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
+Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
 
 # DESCRIPTION
 **youtube-dl** is a small command-line program to download videos from
@@ -207,7 +207,7 @@ which means you can modify it, redistribute it or use it however you like.
     -p, --password PASSWORD          Account password. If this option is left out, youtube-dl will ask interactively.
     -2, --twofactor TWOFACTOR        Two-factor auth code
     -n, --netrc                      Use .netrc authentication data
-    --video-password PASSWORD        Video password (vimeo, smotri)
+    --video-password PASSWORD        Video password (vimeo, smotri, youku)
 
 ## Post-processing Options:
     -x, --extract-audio              Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
@@ -552,7 +552,7 @@ If you want to add support for a new site, you can follow this quick list (assum
     ```
 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
-7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
 
index 9099e2da44127978c2d47fd49908538ea6dc7d07..04b9959ac61b0f77da6cf46b07661414b81083c1 100644 (file)
  - **Folketinget**: Folketinget (ft.dk; Danish parliament)
  - **FootyRoom**
  - **Foxgay**
- - **FoxNews**
+ - **FoxNews**: Fox News and Fox Business Video
  - **FoxSports**
  - **france2.fr:generation-quoi**
  - **FranceCulture**
  - **imdb**: Internet Movie Database trailers
  - **imdb:list**: Internet Movie Database lists
  - **Imgur**
+ - **ImgurAlbum**
  - **Ina**
  - **Indavideo**
  - **IndavideoEmbed**
  - **Moviezine**
  - **movshare**: MovShare
  - **MPORA**
+ - **MSNBC**
  - **MTV**
+ - **mtv.de**
  - **mtviggy.com**
  - **mtvservices:embedded**
  - **MuenchenTV**: münchen.tv
  - **MusicPlayOn**
  - **MusicVault**
  - **muzu.tv**
+ - **Mwave**
  - **MySpace**
  - **MySpace:album**
  - **MySpass**
  - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
  - **Playvid**
  - **Playwire**
+ - **pluralsight**
+ - **pluralsight:course**
  - **plus.google**: Google Plus
  - **pluzz.francetv.fr**
  - **podomatic**
  - **Sexu**
  - **SexyKarma**: Sexy Karma and Watch Indian Porn
  - **Shahid**
- - **Shared**
+ - **Shared**: shared.sx and vivo.sx
  - **ShareSix**
  - **Sina**
  - **Slideshare**
  - **TF1**
  - **TheOnion**
  - **ThePlatform**
+ - **ThePlatformFeed**
  - **TheSixtyOne**
  - **ThisAmericanLife**
  - **ThisAV**
  - **Viddler**
  - **video.google:search**: Google Video search
  - **video.mit.edu**
- - **VideoBam**
  - **VideoDetective**
  - **videofy.me**
  - **videolectures.net**
index c4e3adb67b7d1034b36cdd3c45969fe321351c64..0343967d9d35c26f869825f9a3a41b1b0016f67a 100644 (file)
@@ -25,6 +25,7 @@ from youtube_dl.extractor import (
     RaiIE,
     VikiIE,
     ThePlatformIE,
+    ThePlatformFeedIE,
     RTVEALaCartaIE,
     FunnyOrDieIE,
 )
@@ -307,6 +308,18 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
 
 
+class TestThePlatformFeedSubtitles(BaseTestSubtitles):
+    url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
+    IE = ThePlatformFeedIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
+
+
 class TestRtveSubtitles(BaseTestSubtitles):
     url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
     IE = RTVEALaCartaIE
index a759b2da99ea9f2c7676171d3bc0d5d93aa2149d..a5f164c493a505667dcaa5bd54c1a2394e4e32b5 100644 (file)
@@ -57,11 +57,16 @@ from youtube_dl.utils import (
     urlencode_postdata,
     version_tuple,
     xpath_with_ns,
+    xpath_element,
     xpath_text,
+    xpath_attr,
     render_table,
     match_str,
     parse_dfxp_time_expr,
     dfxp2srt,
+    cli_option,
+    cli_valueless_option,
+    cli_bool_option,
 )
 
 
@@ -264,6 +269,16 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(find('media:song/media:author').text, 'The Author')
         self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
 
+    def test_xpath_element(self):
+        doc = xml.etree.ElementTree.Element('root')
+        div = xml.etree.ElementTree.SubElement(doc, 'div')
+        p = xml.etree.ElementTree.SubElement(div, 'p')
+        p.text = 'Foo'
+        self.assertEqual(xpath_element(doc, 'div/p'), p)
+        self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default')
+        self.assertTrue(xpath_element(doc, 'div/bar') is None)
+        self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True)
+
     def test_xpath_text(self):
         testxml = '''<root>
             <div>
@@ -272,9 +287,25 @@ class TestUtil(unittest.TestCase):
         </root>'''
         doc = xml.etree.ElementTree.fromstring(testxml)
         self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
+        self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
         self.assertTrue(xpath_text(doc, 'div/bar') is None)
         self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
 
+    def test_xpath_attr(self):
+        testxml = '''<root>
+            <div>
+                <p x="a">Foo</p>
+            </div>
+        </root>'''
+        doc = xml.etree.ElementTree.fromstring(testxml)
+        self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
+        self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
+        self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
+        self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default')
+        self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default')
+        self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True)
+        self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True)
+
     def test_smuggle_url(self):
         data = {"ö": "ö", "abc": [3]}
         url = 'https://foo.bar/baz?x=y#a'
@@ -646,6 +677,51 @@ The first line
 '''
         self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
 
+    def test_cli_option(self):
+        self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
+        self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
+        self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
+
+    def test_cli_valueless_option(self):
+        self.assertEqual(cli_valueless_option(
+            {'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader'])
+        self.assertEqual(cli_valueless_option(
+            {'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), [])
+        self.assertEqual(cli_valueless_option(
+            {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate'])
+        self.assertEqual(cli_valueless_option(
+            {'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), [])
+        self.assertEqual(cli_valueless_option(
+            {'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), [])
+        self.assertEqual(cli_valueless_option(
+            {'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate'])
+
+    def test_cli_bool_option(self):
+        self.assertEqual(
+            cli_bool_option(
+                {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'),
+            ['--no-check-certificate', 'true'])
+        self.assertEqual(
+            cli_bool_option(
+                {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='),
+            ['--no-check-certificate=true'])
+        self.assertEqual(
+            cli_bool_option(
+                {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
+            ['--check-certificate', 'false'])
+        self.assertEqual(
+            cli_bool_option(
+                {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+            ['--check-certificate=false'])
+        self.assertEqual(
+            cli_bool_option(
+                {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
+            ['--check-certificate', 'true'])
+        self.assertEqual(
+            cli_bool_option(
+                {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+            ['--check-certificate=true'])
+
 
 if __name__ == '__main__':
     unittest.main()
index cad6b026e81936e1f80cc527cc43c16bd4b9896d..982e658cea98dc55edfacea3e9eca4399b5ceb1a 100755 (executable)
@@ -2009,7 +2009,7 @@ class YoutubeDL(object):
                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
                 try:
                     uf = self.urlopen(t['url'])
-                    with open(thumb_filename, 'wb') as thumbf:
+                    with open(encodeFilename(thumb_filename), 'wb') as thumbf:
                         shutil.copyfileobj(uf, thumbf)
                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
index 6c310346c7db946c2393185a50c416a72bfad4bb..2bc01126693fa4b520a34afe0ad0a67a61370829 100644 (file)
@@ -5,6 +5,10 @@ import subprocess
 
 from .common import FileDownloader
 from ..utils import (
+    cli_option,
+    cli_valueless_option,
+    cli_bool_option,
+    cli_configuration_args,
     encodeFilename,
     encodeArgument,
 )
@@ -46,19 +50,16 @@ class ExternalFD(FileDownloader):
         return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
 
     def _option(self, command_option, param):
-        param = self.params.get(param)
-        if param is None:
-            return []
-        if isinstance(param, bool):
-            return [command_option]
-        return [command_option, param]
+        return cli_option(self.params, command_option, param)
+
+    def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
+        return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
+
+    def _valueless_option(self, command_option, param, expected_value=True):
+        return cli_valueless_option(self.params, command_option, param, expected_value)
 
     def _configuration_args(self, default=[]):
-        ex_args = self.params.get('external_downloader_args')
-        if ex_args is None:
-            return default
-        assert isinstance(ex_args, list)
-        return ex_args
+        return cli_configuration_args(self.params, 'external_downloader_args', default)
 
     def _call_downloader(self, tmpfilename, info_dict):
         """ Either overwrite this or implement _make_cmd """
@@ -80,6 +81,8 @@ class CurlFD(ExternalFD):
         for key, val in info_dict['http_headers'].items():
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += self._option('--interface', 'source_address')
+        cmd += self._option('--proxy', 'proxy')
+        cmd += self._valueless_option('--insecure', 'nocheckcertificate')
         cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]
         return cmd
@@ -102,7 +105,7 @@ class WgetFD(ExternalFD):
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += self._option('--bind-address', 'source_address')
         cmd += self._option('--proxy', 'proxy')
-        cmd += self._option('--no-check-certificate', 'nocheckcertificate')
+        cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
         cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]
         return cmd
@@ -121,6 +124,7 @@ class Aria2cFD(ExternalFD):
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += self._option('--interface', 'source_address')
         cmd += self._option('--all-proxy', 'proxy')
+        cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
         cmd += ['--', info_dict['url']]
         return cmd
 
index 275564b5976b9d28a7d67f839c81467029aa5c18..174180db5bd4e5e42fca7a9feecdf8af4ea5e155 100644 (file)
@@ -13,6 +13,8 @@ from ..compat import (
     compat_urllib_error,
 )
 from ..utils import (
+    encodeFilename,
+    sanitize_open,
     struct_pack,
     struct_unpack,
     xpath_text,
@@ -343,18 +345,19 @@ class F4mFD(FragmentFD):
                 success = ctx['dl'].download(frag_filename, {'url': url})
                 if not success:
                     return False
-                with open(frag_filename, 'rb') as down:
-                    down_data = down.read()
-                    reader = FlvReader(down_data)
-                    while True:
-                        _, box_type, box_data = reader.read_box_info()
-                        if box_type == b'mdat':
-                            dest_stream.write(box_data)
-                            break
+                (down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
+                down_data = down.read()
+                down.close()
+                reader = FlvReader(down_data)
+                while True:
+                    _, box_type, box_data = reader.read_box_info()
+                    if box_type == b'mdat':
+                        dest_stream.write(box_data)
+                        break
                 if live:
-                    os.remove(frag_filename)
+                    os.remove(encodeFilename(frag_sanitized))
                 else:
-                    frags_filenames.append(frag_filename)
+                    frags_filenames.append(frag_sanitized)
             except (compat_urllib_error.HTTPError, ) as err:
                 if live and (err.code == 404 or err.code == 410):
                     # We didn't keep up with the live window. Continue
@@ -375,6 +378,6 @@ class F4mFD(FragmentFD):
         self._finish_frag_download(ctx)
 
         for frag_file in frags_filenames:
-            os.remove(frag_file)
+            os.remove(encodeFilename(frag_file))
 
         return True
index 2b6c3370f5c16a0998e8125223a51c53e0ef5c90..71aafdc73e17a29bd6784f5200b193126fb35fe4 100644 (file)
@@ -12,6 +12,7 @@ from ..postprocessor.ffmpeg import FFmpegPostProcessor
 from ..utils import (
     encodeArgument,
     encodeFilename,
+    sanitize_open,
 )
 
 
@@ -89,13 +90,13 @@ class NativeHlsFD(FragmentFD):
             success = ctx['dl'].download(frag_filename, {'url': frag_url})
             if not success:
                 return False
-            with open(frag_filename, 'rb') as down:
-                ctx['dest_stream'].write(down.read())
-            frags_filenames.append(frag_filename)
+            down, frag_sanitized = sanitize_open(frag_filename, 'rb')
+            ctx['dest_stream'].write(down.read())
+            frags_filenames.append(frag_sanitized)
 
         self._finish_frag_download(ctx)
 
         for frag_file in frags_filenames:
-            os.remove(frag_file)
+            os.remove(encodeFilename(frag_file))
 
         return True
index 6bee5b63cc4e0679ecd5d54b5619837843aa08c8..5d2ea39d0abfdc5d83d04e172e87edbdc54de44a 100644 (file)
@@ -241,7 +241,10 @@ from .imdb import (
     ImdbIE,
     ImdbListIE
 )
-from .imgur import ImgurIE
+from .imgur import (
+    ImgurIE,
+    ImgurAlbumIE,
+)
 from .ina import InaIE
 from .indavideo import (
     IndavideoIE,
@@ -340,6 +343,7 @@ from .mtv import (
     MTVIE,
     MTVServicesEmbeddedIE,
     MTVIggyIE,
+    MTVDEIE,
 )
 from .muenchentv import MuenchenTVIE
 from .musicplayon import MusicPlayOnIE
@@ -454,6 +458,10 @@ from .playfm import PlayFMIE
 from .playtvak import PlaytvakIE
 from .playvid import PlayvidIE
 from .playwire import PlaywireIE
+from .pluralsight import (
+    PluralsightIE,
+    PluralsightCourseIE,
+)
 from .podomatic import PodomaticIE
 from .porn91 import Porn91IE
 from .pornhd import PornHdIE
index dc0fb85d6048962505d1d207ae590940d69f52e6..f9a389f674560347f9d3bc93aeb4ef2aba990941 100644 (file)
@@ -1,16 +1,20 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    js_to_json,
+    int_or_none,
+)
 
 
 class ABCIE(InfoExtractor):
     IE_NAME = 'abc.net.au'
     _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
         'md5': 'cb3dd03b18455a661071ee1e28344d9f',
         'info_dict': {
@@ -19,22 +23,47 @@ class ABCIE(InfoExtractor):
             'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
             'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
         },
-    }
+    }, {
+        'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
+        'md5': 'db2a5369238b51f9811ad815b69dc086',
+        'info_dict': {
+            'id': 'NvqvPeNZsHU',
+            'ext': 'mp4',
+            'upload_date': '20150816',
+            'uploader': 'ABC News (Australia)',
+            'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef',
+            'uploader_id': 'NewsOnABC',
+            'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
+        },
+        'add_ie': ['Youtube'],
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        urls_info_json = self._search_regex(
-            r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
-            flags=re.DOTALL)
-        urls_info = json.loads(urls_info_json.replace('\'', '"'))
+        mobj = re.search(
+            r'inline(?P<type>Video|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
+            webpage)
+        if mobj is None:
+            raise ExtractorError('Unable to extract video urls')
+
+        urls_info = self._parse_json(
+            mobj.group('json_data'), video_id, transform_source=js_to_json)
+
+        if not isinstance(urls_info, list):
+            urls_info = [urls_info]
+
+        if mobj.group('type') == 'YouTube':
+            return self.playlist_result([
+                self.url_result(url_info['url']) for url_info in urls_info])
+
         formats = [{
             'url': url_info['url'],
-            'width': int(url_info['width']),
-            'height': int(url_info['height']),
-            'tbr': int(url_info['bitrate']),
-            'filesize': int(url_info['filesize']),
+            'width': int_or_none(url_info.get('width')),
+            'height': int_or_none(url_info.get('height')),
+            'tbr': int_or_none(url_info.get('bitrate')),
+            'filesize': int_or_none(url_info.get('filesize')),
         } for url_info in urls_info]
         self._sort_formats(formats)
 
index c949a481477c187d9433f39e6e851b87c97edf79..fd1770dac958c477c4fb29e80e1e8722e459c45e 100644 (file)
@@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 from .bliptv import BlipTVIE
+from .screenwavemedia import ScreenwaveMediaIE
 
 
 class CinemassacreIE(InfoExtractor):
@@ -83,10 +84,10 @@ class CinemassacreIE(InfoExtractor):
 
         playerdata_url = self._search_regex(
             [
-                r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
-                r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
+                ScreenwaveMediaIE.EMBED_PATTERN,
+                r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
             ],
-            webpage, 'player data URL', default=None)
+            webpage, 'player data URL', default=None, group='url')
         if not playerdata_url:
             playerdata_url = BlipTVIE._extract_url(webpage)
         if not playerdata_url:
index ce2030d28264eb33b53571c0e53ef24ddac09502..39cef9c5b99d1c47545455970344bc836c77494e 100644 (file)
@@ -510,6 +510,12 @@ class InfoExtractor(object):
         """Report attempt to log in."""
         self.to_screen('Logging in')
 
+    @staticmethod
+    def raise_login_required(msg='This video is only available for registered users'):
+        raise ExtractorError(
+            '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
+            expected=True)
+
     # Methods for following #608
     @staticmethod
     def url_result(url, ie=None, video_id=None, video_title=None):
@@ -1151,7 +1157,7 @@ class InfoExtractor(object):
                 }
                 if type_ in SUBTITLES_TYPES:
                     ext = SUBTITLES_TYPES[type_]
-            lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or subtitles_lang
+            lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
             subtitles.setdefault(lang, []).append({
                 'url': src,
                 'ext': ext,
@@ -1279,6 +1285,23 @@ class InfoExtractor(object):
     def _get_subtitles(self, *args, **kwargs):
         raise NotImplementedError("This method must be implemented by subclasses")
 
+    @staticmethod
+    def _merge_subtitle_items(subtitle_list1, subtitle_list2):
+        """ Merge subtitle items for one language. Items with duplicated URLs
+        will be dropped. """
+        list1_urls = set([item['url'] for item in subtitle_list1])
+        ret = list(subtitle_list1)
+        ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
+        return ret
+
+    @classmethod
+    def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
+        """ Merge two subtitle dictionaries, language by language. """
+        ret = dict(subtitle_dict1)
+        for lang in subtitle_dict2:
+            ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+        return ret
+
     def extract_automatic_captions(self, *args, **kwargs):
         if (self._downloader.params.get('writeautomaticsub', False) or
                 self._downloader.params.get('listsubtitles')):
index 33a033a7f3175fec3b85725841581d2ac252033d..ce123482e8036bc50c39a65e6036ee8eb534b83a 100644 (file)
@@ -20,9 +20,11 @@ from ..utils import (
     ExtractorError,
     bytes_to_intlist,
     intlist_to_bytes,
+    int_or_none,
     remove_end,
     unified_strdate,
     urlencode_postdata,
+    xpath_text,
 )
 from ..aes import (
     aes_cbc_decrypt,
@@ -237,7 +239,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             webpage_url = 'http://www.' + mobj.group('url')
 
         webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
-        note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
+        note_m = self._html_search_regex(
+            r'<div class="showmedia-trailer-notice">(.+?)</div>',
+            webpage, 'trailer-notice', default='')
         if note_m:
             raise ExtractorError(note_m)
 
@@ -247,6 +251,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             if msg.get('type') == 'error':
                 raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
 
+        if 'To view this, please log in to verify you are 18 or older.' in webpage:
+            self.raise_login_required()
+
         video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
         video_title = re.sub(r' {2,}', ' ', video_title)
         video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
@@ -281,6 +288,13 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             stream_info = streamdata.find('./{default}preload/stream_info')
             video_url = stream_info.find('./host').text
             video_play_path = stream_info.find('./file').text
+            metadata = stream_info.find('./metadata')
+            format_info = {
+                'format': video_format,
+                'format_id': video_format,
+                'height': int_or_none(xpath_text(metadata, './height')),
+                'width': int_or_none(xpath_text(metadata, './width')),
+            }
 
             if '.fplive.net/' in video_url:
                 video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
@@ -289,19 +303,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                     netloc='v.lvlt.crcdn.net',
                     path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
                 if self._is_valid_url(direct_video_url, video_id, video_format):
-                    formats.append({
+                    format_info.update({
                         'url': direct_video_url,
-                        'format_id': video_format,
                     })
+                    formats.append(format_info)
                     continue
 
-            formats.append({
+            format_info.update({
                 'url': video_url,
                 'play_path': video_play_path,
                 'ext': 'flv',
-                'format': video_format,
-                'format_id': video_format,
             })
+            formats.append(format_info)
 
         subtitles = self.extract_subtitles(video_id, webpage)
 
index 999fb5620df2976073122fb95fbad1bb133f357a..1f00386feae15d00a4421b3166335c15f3b01aa9 100644 (file)
@@ -9,8 +9,8 @@ from ..utils import qualities
 
 
 class DumpertIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+    _TESTS = [{
         'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
         'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
         'info_dict': {
@@ -20,11 +20,15 @@ class DumpertIE(InfoExtractor):
             'description': 'Niet schrikken hoor',
             'thumbnail': 're:^https?://.*\.jpg$',
         }
-    }
+    }, {
+        'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        url = 'https://www.dumpert.nl/mediabase/' + video_id
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'nsfw=1; cpc=10')
         webpage = self._download_webpage(req, video_id)
index 316033cf18b42cefead780ceca15b361ebbddac7..7fcd0151d8efdfd2b2378c0097b363e563e3171b 100644 (file)
@@ -71,8 +71,7 @@ class EroProfileIE(InfoExtractor):
 
         m = re.search(r'You must be logged in to view this video\.', webpage)
         if m:
-            raise ExtractorError(
-                'This video requires login. Please specify a username and password and try again.', expected=True)
+            self.raise_login_required('This video requires login')
 
         video_id = self._search_regex(
             [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
index e4f7195a8ab345175b8d474a17955c5d015071d1..5c1137e94c4445605d678b2bf7e6c3d12e96280e 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
+    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
     IE_NAME = 'fc2'
     _NETRC_MACHINE = 'fc2'
     _TESTS = [{
@@ -37,6 +37,9 @@ class FC2IE(InfoExtractor):
             'password': '(snip)',
             'skip': 'requires actual password'
         }
+    }, {
+        'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
+        'only_matching': True,
     }]
 
     def _login(self):
@@ -80,7 +83,7 @@ class FC2IE(InfoExtractor):
 
         title = self._og_search_title(webpage)
         thumbnail = self._og_search_thumbnail(webpage)
-        refer = url.replace('/content/', '/a/content/')
+        refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
 
         mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
 
index 0fb29de75228f0133c0b8d54a015fbd5d90954c1..75399fa7d2a3164c67f2d72c24628a861ed77806 100644 (file)
@@ -30,6 +30,10 @@ class FolketingetIE(InfoExtractor):
             'upload_date': '20141120',
             'duration': 3960,
         },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):
index 917f76b1effb3a2fff9d4f4c17c1cca348280132..3a4a59135da5b8b813090fffaf3f7cb9477f3743 100644 (file)
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     parse_iso8601,
@@ -8,7 +10,8 @@ from ..utils import (
 
 
 class FoxNewsIE(InfoExtractor):
-    _VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
+    IE_DESC = 'Fox News and Fox Business Video'
+    _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
@@ -42,13 +45,19 @@ class FoxNewsIE(InfoExtractor):
             'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
             'only_matching': True,
         },
+        {
+            'url': 'http://video.foxbusiness.com/v/4442309889001',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
 
         video = self._download_json(
-            'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
+            'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
 
         item = video['channel']['item']
         title = item['title']
index 75723c00dc9e96c018e3b6771e634ff93c293ba1..129984a5fb1ea793d49937dd251d460b509c60c7 100644 (file)
@@ -78,9 +78,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                 })
         self._sort_formats(formats)
 
+        title = info['titre']
+        subtitle = info.get('sous_titre')
+        if subtitle:
+            title += ' - %s' % subtitle
+
         return {
             'id': video_id,
-            'title': info['titre'],
+            'title': title,
             'description': clean_html(info['synopsis']),
             'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
             'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
@@ -214,15 +219,15 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
         },
         # france5
         {
-            'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
-            'md5': '78f0f4064f9074438e660785bbf2c5d9',
+            'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1',
+            'md5': 'f6c577df3806e26471b3d21631241fd0',
             'info_dict': {
-                'id': '108961659',
+                'id': '123327454',
                 'ext': 'flv',
-                'title': 'C à dire ?!',
-                'description': 'md5:1a4aeab476eb657bf57c4ff122129f81',
-                'upload_date': '20140915',
-                'timestamp': 1410795000,
+                'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?',
+                'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4',
+                'upload_date': '20150831',
+                'timestamp': 1441035120,
             },
         },
         # franceo
index 376feecae7a5ef59ae8e4cf674d984deefc0d6b0..953ec32c3f506ad8674a2c44a0a650a13cc56bf4 100644 (file)
@@ -48,6 +48,7 @@ from .vimeo import VimeoIE
 from .dailymotion import DailymotionCloudIE
 from .onionstudios import OnionStudiosIE
 from .snagfilms import SnagFilmsEmbedIE
+from .screenwavemedia import ScreenwaveMediaIE
 
 
 class GenericIE(InfoExtractor):
@@ -1001,6 +1002,16 @@ class GenericIE(InfoExtractor):
                 'description': 'New experience with Acrobat DC',
                 'duration': 248.667,
             },
+        },
+        # ScreenwaveMedia embed
+        {
+            'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
+            'md5': '24ace5baba0d35d55c6810b51f34e9e0',
+            'info_dict': {
+                'id': 'cinemasnob-55d26273809dd',
+                'ext': 'mp4',
+                'title': 'cinemasnob',
+            },
         }
     ]
 
@@ -1718,6 +1729,11 @@ class GenericIE(InfoExtractor):
         if snagfilms_url:
             return self.url_result(snagfilms_url)
 
+        # Look for ScreenwaveMedia embeds
+        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
+        if mobj is not None:
+            return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
+
         # Look for AdobeTVVideo embeds
         mobj = re.search(
             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
index 8a95793cae07734e67340bf49db088cdb043d1cb..33d6432a6f29942d2bf7e53e6cf9adf353d79b25 100644 (file)
@@ -13,6 +13,7 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     float_or_none,
+    int_or_none,
 )
 
 
@@ -359,13 +360,8 @@ class GloboIE(InfoExtractor):
             self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
 
         title = video['title']
-        duration = float_or_none(video['duration'], 1000)
-        like_count = video['likes']
-        uploader = video['channel']
-        uploader_id = video['channel_id']
 
         formats = []
-
         for resource in video['resources']:
             resource_id = resource.get('_id')
             if not resource_id:
@@ -407,6 +403,11 @@ class GloboIE(InfoExtractor):
 
         self._sort_formats(formats)
 
+        duration = float_or_none(video.get('duration'), 1000)
+        like_count = int_or_none(video.get('likes'))
+        uploader = video.get('channel')
+        uploader_id = video.get('channel_id')
+
         return {
             'id': video_id,
             'title': title,
index d692ea79ab493174038c9649445e6a592a86687c..70c8ca64e6346d86ef5ecea02722597829db1316 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)'
 
     _TESTS = [{
         'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -97,3 +97,28 @@ class ImgurIE(InfoExtractor):
             'description': self._og_search_description(webpage),
             'title': self._og_search_title(webpage),
         }
+
+
+class ImgurAlbumIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)'
+
+    _TEST = {
+        'url': 'http://imgur.com/gallery/Q95ko',
+        'info_dict': {
+            'id': 'Q95ko',
+        },
+        'playlist_count': 25,
+    }
+
+    def _real_extract(self, url):
+        album_id = self._match_id(url)
+
+        album_images = self._download_json(
+            'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
+            album_id)['data']['images']
+
+        entries = [
+            self.url_result('http://imgur.com/%s' % image['hash'])
+            for image in album_images if image.get('hash')]
+
+        return self.playlist_result(entries, album_id)
index d2873049202813be7587067b629503dc2da0f877..3dca0e566f886888987bfb68360b9a0a51b04575 100644 (file)
@@ -13,12 +13,24 @@ from ..utils import (
 
 class KalturaIE(InfoExtractor):
     _VALID_URL = r'''(?x)
-    (?:kaltura:|
-       https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
-    )(?P<partner_id>\d+)
-    (?::|
-       /(?:[^/]+/)*?entry_id/
-    )(?P<id>[0-9a-z_]+)'''
+                (?:
+                    kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)|
+                    https?://
+                        (:?(?:www|cdnapisec)\.)?kaltura\.com/
+                        (?:
+                            (?:
+                                # flash player
+                                index\.php/kwidget/
+                                (?:[^/]+/)*?wid/_(?P<partner_id>\d+)/
+                                (?:[^/]+/)*?entry_id/(?P<id>[0-9a-z_]+)|
+                                # html5 player
+                                html5/html5lib/
+                                (?:[^/]+/)*?entry_id/(?P<id_html5>[0-9a-z_]+)
+                                .*\?.*\bwid=_(?P<partner_id_html5>\d+)
+                            )
+                        )
+                )
+                '''
     _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
     _TESTS = [
         {
@@ -43,6 +55,10 @@ class KalturaIE(InfoExtractor):
             'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
             'only_matching': True,
         },
+        {
+            'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
+            'only_matching': True,
+        }
     ]
 
     def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
@@ -105,9 +121,9 @@ class KalturaIE(InfoExtractor):
             video_id, actions, note='Downloading video info JSON')
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
         mobj = re.match(self._VALID_URL, url)
-        partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
+        partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
+        entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
 
         info, source_data = self._get_video_info(entry_id, partner_id)
 
@@ -126,7 +142,7 @@ class KalturaIE(InfoExtractor):
         self._sort_formats(formats)
 
         return {
-            'id': video_id,
+            'id': entry_id,
             'title': info['name'],
             'formats': formats,
             'description': info.get('description'),
index 720bc939bfd4c3a30c9a3709968c6008e6472067..a59c529f4c90d8f3211a783ec5e4e2d3f7be9d84 100644 (file)
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    parse_duration,
+)
 
 
 class KontrTubeIE(InfoExtractor):
@@ -34,33 +37,28 @@ class KontrTubeIE(InfoExtractor):
         webpage = self._download_webpage(
             url, display_id, 'Downloading page')
 
-        video_url = self._html_search_regex(
+        video_url = self._search_regex(
             r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
-        thumbnail = self._html_search_regex(
-            r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
+        thumbnail = self._search_regex(
+            r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
         title = self._html_search_regex(
-            r'<title>(.+?)</title>', webpage, 'video title')
+            r'(?s)<h2>(.+?)</h2>', webpage, 'title')
         description = self._html_search_meta(
-            'description', webpage, 'video description')
+            'description', webpage, 'description')
 
-        mobj = re.search(
-            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
-            webpage)
-        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
+        duration = self._search_regex(
+            r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
+        if duration:
+            duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
 
-        view_count = self._html_search_regex(
-            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
+        view_count = self._search_regex(
+            r'Просмотров: <em>([^<]+)</em>',
             webpage, 'view count', fatal=False)
+        if view_count:
+            view_count = int_or_none(view_count.replace(' ', ''))
 
-        comment_count = None
-        comment_str = self._html_search_regex(
-            r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
-        if comment_str.startswith('комментариев нет'):
-            comment_count = 0
-        else:
-            mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
-            if mobj:
-                comment_count = mobj.group('total')
+        comment_count = int_or_none(self._search_regex(
+            r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
 
         return {
             'id': video_id,
index 96f95979a22429d2a19af3575ad1ca25c463b13e..0ae8ebd687034343c364dbc968d90d84f5bc37df 100644 (file)
@@ -25,6 +25,9 @@ class KrasViewIE(InfoExtractor):
             'duration': 27,
             'thumbnail': 're:^https?://.*\.jpg',
         },
+        'params': {
+            'skip_download': 'Not accessible from Travis CI server',
+        },
     }
 
     def _real_extract(self, url):
index 5b9157ed43efb0169baac327ce37ec7d46eb69f5..378117270439e7ce2669c46422f3df63caa33051 100644 (file)
@@ -118,9 +118,7 @@ class LyndaIE(LyndaBaseIE):
                 'lynda returned error: %s' % video_json['Message'], expected=True)
 
         if video_json['HasAccess'] is False:
-            raise ExtractorError(
-                'Video %s is only available for members. '
-                % video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
+            self.raise_login_required('Video %s is only available for members' % video_id)
 
         video_id = compat_str(video_json['ID'])
         duration = video_json['DurationInSeconds']
index 54a14cb94c93dad587a83c58d58ec3d262f0eed8..ab1300185099a20835fdde08b88635ceacafe191 100644 (file)
@@ -25,6 +25,7 @@ class MailRuIE(InfoExtractor):
                 'uploader_id': 'sonypicturesrus@mail.ru',
                 'duration': 184,
             },
+            'skip': 'Not accessible from Travis CI server',
         },
         {
             'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
@@ -39,6 +40,7 @@ class MailRuIE(InfoExtractor):
                 'uploader_id': 'hitech@corp.mail.ru',
                 'duration': 245,
             },
+            'skip': 'Not accessible from Travis CI server',
         },
     ]
 
index b48fac5e3e434569642284d0b6388cab34696b01..a597714e97c2cc7313449d6b52a64978d4023d9d 100644 (file)
@@ -67,7 +67,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         return [{'url': url, 'ext': 'mp4'}]
 
     def _extract_video_formats(self, mdoc, mtvn_id):
-        if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
+        if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
             if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
                 self.to_screen('The normal version is not available from your '
                                'country, trying with the mobile version')
@@ -114,7 +114,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
         # Remove the templates, like &device={device}
         mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
         if 'acceptMethods' not in mediagen_url:
-            mediagen_url += '&acceptMethods=fms'
+            mediagen_url += '&' if '?' in mediagen_url else '?'
+            mediagen_url += 'acceptMethods=fms'
 
         mediagen_doc = self._download_xml(mediagen_url, video_id,
                                           'Downloading video urls')
@@ -141,7 +142,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if title_el is None:
             title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
         if title_el is None:
-            title_el = itemdoc.find('.//title')
+            title_el = itemdoc.find('.//title') or itemdoc.find('./title')
             if title_el.text is None:
                 title_el = None
 
@@ -174,8 +175,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if self._LANG:
             info_url += 'lang=%s&' % self._LANG
         info_url += data
+        return self._get_videos_info_from_url(info_url, video_id)
+
+    def _get_videos_info_from_url(self, url, video_id):
         idoc = self._download_xml(
-            info_url, video_id,
+            url, video_id,
             'Downloading info', transform_source=fix_xml_ampersands)
         return self.playlist_result(
             [self._get_video_info(item) for item in idoc.findall('.//item')])
@@ -288,3 +292,65 @@ class MTVIggyIE(MTVServicesInfoExtractor):
         }
     }
     _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
+
+
+class MTVDEIE(MTVServicesInfoExtractor):
+    IE_NAME = 'mtv.de'
+    _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
+    _TESTS = [{
+        'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
+        'info_dict': {
+            'id': 'music_video-a50bc5f0b3aa4b3190aa',
+            'ext': 'mp4',
+            'title': 'MusicVideo_cro-traum',
+            'description': 'Cro - Traum',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }, {
+        # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
+        'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
+        'info_dict': {
+            'id': 'local_playlist-f5ae778b9832cc837189',
+            'ext': 'mp4',
+            'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }, {
+        # single video in pagePlaylist with different id
+        'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
+        'info_dict': {
+            'id': 'local_playlist-4e760566473c4c8c5344',
+            'ext': 'mp4',
+            'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1',
+            'description': 'MTV Movies Supercut',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
+            video_id)
+
+        # news pages contain single video in playlist with different id
+        if len(playlist) == 1:
+            return self._get_videos_info_from_url(playlist[0]['mrss'], video_id)
+
+        for item in playlist:
+            item_id = item.get('id')
+            if item_id and compat_str(item_id) == video_id:
+                return self._get_videos_info_from_url(item['mrss'], video_id)
index 66c627becc845650cfb0fc5d712a07e79fdbb8ce..c8257719f8428c1f3e735c58846574ee7af895cd 100644 (file)
@@ -130,10 +130,16 @@ class NowTVIE(InfoExtractor):
     }, {
         'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
+        display_id_split = display_id.split('/')
+        if len(display_id) > 2:
+            display_id = '/'.join((display_id_split[0], display_id_split[-1]))
 
         info = self._download_json(
             'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
index dec09cdfef0087ee3400394b82750daf434ed29d..17baa96796fafbf5d70b220d2f796cdc707d216b 100644 (file)
@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
     IE_NAME = 'nowvideo'
     IE_DESC = 'NowVideo'
 
-    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co|li)'}
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
 
     _HOST = 'www.nowvideo.ch'
 
index 003d27de783b926e68657f7ad8eefe9b2c75d5cf..66520c2c56f102e53fe84c5e9970558b4714247b 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class OdnoklassnikiIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
     _TESTS = [{
         # metadata in JSON
         'url': 'http://ok.ru/video/20079905452',
@@ -43,9 +43,27 @@ class OdnoklassnikiIE(InfoExtractor):
             'like_count': int,
             'age_limit': 0,
         },
+    }, {
+        # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
+        'url': 'http://ok.ru/video/64211978996595-1',
+        'md5': '5d7475d428845cd2e13bae6f1a992278',
+        'info_dict': {
+            'id': '64211978996595-1',
+            'ext': 'mp4',
+            'title': 'Космическая среда от 26 августа 2015',
+            'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
+            'duration': 440,
+            'upload_date': '20150826',
+            'uploader_id': '750099571',
+            'uploader': 'Алина П',
+            'age_limit': 0,
+        },
     }, {
         'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
         'only_matching': True,
+    }, {
+        'url': 'http://www.ok.ru/video/20648036891',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -56,7 +74,8 @@ class OdnoklassnikiIE(InfoExtractor):
 
         player = self._parse_json(
             unescapeHTML(self._search_regex(
-                r'data-attributes="([^"]+)"', webpage, 'player')),
+                r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
+                webpage, 'player', group='player')),
             video_id)
 
         flashvars = player['flashvars']
@@ -89,16 +108,7 @@ class OdnoklassnikiIE(InfoExtractor):
 
         like_count = int_or_none(metadata.get('likeCount'))
 
-        quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
-
-        formats = [{
-            'url': f['url'],
-            'ext': 'mp4',
-            'format_id': f['name'],
-            'quality': quality(f['name']),
-        } for f in metadata['videos']]
-
-        return {
+        info = {
             'id': video_id,
             'title': title,
             'thumbnail': thumbnail,
@@ -108,5 +118,24 @@ class OdnoklassnikiIE(InfoExtractor):
             'uploader_id': uploader_id,
             'like_count': like_count,
             'age_limit': age_limit,
-            'formats': formats,
         }
+
+        if metadata.get('provider') == 'USER_YOUTUBE':
+            info.update({
+                '_type': 'url_transparent',
+                'url': movie['contentId'],
+            })
+            return info
+
+        quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
+
+        formats = [{
+            'url': f['url'],
+            'ext': 'mp4',
+            'format_id': f['name'],
+            'quality': quality(f['name']),
+        } for f in metadata['videos']]
+        self._sort_formats(formats)
+
+        info['formats'] = formats
+        return info
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py
new file mode 100644 (file)
index 0000000..fd32836
--- /dev/null
@@ -0,0 +1,207 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_request,
+    compat_urlparse,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    parse_duration,
+)
+
+
+class PluralsightIE(InfoExtractor):
+    IE_NAME = 'pluralsight'
+    _VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
+    _LOGIN_URL = 'https://www.pluralsight.com/id/'
+    _NETRC_MACHINE = 'pluralsight'
+
+    _TEST = {
+        'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
+        'md5': '4d458cf5cf4c593788672419a8dd4cf8',
+        'info_dict': {
+            'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
+            'ext': 'mp4',
+            'title': 'Management of SQL Server - Demo Monitoring',
+            'duration': 338,
+        },
+        'skip': 'Requires pluralsight account credentials',
+    }
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            self.raise_login_required('Pluralsight account is required')
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        login_form = self._hidden_inputs(login_page)
+
+        login_form.update({
+            'Username': username.encode('utf-8'),
+            'Password': password.encode('utf-8'),
+        })
+
+        post_url = self._search_regex(
+            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+            'post url', default=self._LOGIN_URL, group='url')
+
+        if not post_url.startswith('http'):
+            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+        request = compat_urllib_request.Request(
+            post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+
+        response = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        error = self._search_regex(
+            r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
+            response, 'error message', default=None)
+        if error:
+            raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        author = mobj.group('author')
+        name = mobj.group('name')
+        clip_id = mobj.group('clip')
+        course = mobj.group('course')
+
+        display_id = '%s-%s' % (name, clip_id)
+
+        webpage = self._download_webpage(url, display_id)
+
+        collection = self._parse_json(
+            self._search_regex(
+                r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
+                webpage, 'modules'),
+            display_id)
+
+        module, clip = None, None
+
+        for module_ in collection:
+            if module_.get('moduleName') == name:
+                module = module_
+                for clip_ in module_.get('clips', []):
+                    clip_index = clip_.get('clipIndex')
+                    if clip_index is None:
+                        continue
+                    if compat_str(clip_index) == clip_id:
+                        clip = clip_
+                        break
+
+        if not clip:
+            raise ExtractorError('Unable to resolve clip')
+
+        QUALITIES = {
+            'low': {'width': 640, 'height': 480},
+            'medium': {'width': 848, 'height': 640},
+            'high': {'width': 1024, 'height': 768},
+        }
+
+        ALLOWED_QUALITIES = (
+            ('webm', ('high',)),
+            ('mp4', ('low', 'medium', 'high',)),
+        )
+
+        formats = []
+        for ext, qualities in ALLOWED_QUALITIES:
+            for quality in qualities:
+                f = QUALITIES[quality].copy()
+                clip_post = {
+                    'a': author,
+                    'cap': 'false',
+                    'cn': clip_id,
+                    'course': course,
+                    'lc': 'en',
+                    'm': name,
+                    'mt': ext,
+                    'q': '%dx%d' % (f['width'], f['height']),
+                }
+                request = compat_urllib_request.Request(
+                    'http://www.pluralsight.com/training/Player/ViewClip',
+                    json.dumps(clip_post).encode('utf-8'))
+                request.add_header('Content-Type', 'application/json;charset=utf-8')
+                format_id = '%s-%s' % (ext, quality)
+                clip_url = self._download_webpage(
+                    request, display_id, 'Downloading %s URL' % format_id, fatal=False)
+                if not clip_url:
+                    continue
+                f.update({
+                    'url': clip_url,
+                    'ext': ext,
+                    'format_id': format_id,
+                })
+                formats.append(f)
+        self._sort_formats(formats)
+
+        # TODO: captions
+        # http://www.pluralsight.com/training/Player/ViewClip + cap = true
+        # or
+        # http://www.pluralsight.com/training/Player/Captions
+        # { a = author, cn = clip_id, lc = end, m = name }
+
+        return {
+            'id': clip['clipName'],
+            'title': '%s - %s' % (module['title'], clip['title']),
+            'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
+            'creator': author,
+            'formats': formats
+        }
+
+
+class PluralsightCourseIE(InfoExtractor):
+    IE_NAME = 'pluralsight:course'
+    _VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
+    _TEST = {
+        # Free course from Pluralsight Starter Subscription for Microsoft TechNet
+        # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
+        'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
+        'info_dict': {
+            'id': 'hosting-sql-server-windows-azure-iaas',
+            'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals',
+            'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
+        },
+        'playlist_count': 31,
+    }
+
+    def _real_extract(self, url):
+        course_id = self._match_id(url)
+
+        # TODO: PSM cookie
+
+        course = self._download_json(
+            'http://www.pluralsight.com/data/course/%s' % course_id,
+            course_id, 'Downloading course JSON')
+
+        title = course['title']
+        description = course.get('description') or course.get('shortDescription')
+
+        course_data = self._download_json(
+            'http://www.pluralsight.com/data/course/content/%s' % course_id,
+            course_id, 'Downloading course data JSON')
+
+        entries = []
+        for module in course_data:
+            for clip in module.get('clips', []):
+                player_parameters = clip.get('playerParameters')
+                if not player_parameters:
+                    continue
+                entries.append(self.url_result(
+                    'http://www.pluralsight.com/training/player?%s' % player_parameters,
+                    'Pluralsight'))
+
+        return self.playlist_result(entries, course_id, title, description)
index 72cd80498328ca4af9a9ac008afb16c5f0300c30..25f7faf76ea9783281157183329594f74b7678ed 100644 (file)
@@ -1,6 +1,7 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
+import re
 from .common import InfoExtractor
 
 
@@ -8,22 +9,28 @@ class RTL2IE(InfoExtractor):
     _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
     _TESTS = [{
         'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
-        'md5': 'bfcc179030535b08dc2b36b469b5adc7',
         'info_dict': {
             'id': 'folge-203-0',
             'ext': 'f4v',
             'title': 'GRIP sucht den Sommerkönig',
             'description': 'Matthias, Det und Helge treten gegeneinander an.'
         },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
-        'md5': 'ffcd517d2805b57ce11a58a2980c2b02',
         'info_dict': {
             'id': '21040-anna-erwischt-alex',
             'ext': 'mp4',
             'title': 'Anna erwischt Alex!',
             'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
         },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
@@ -34,12 +41,18 @@ class RTL2IE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        vico_id = self._html_search_regex(
-            r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
-        vivi_id = self._html_search_regex(
-            r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
+        mobj = re.search(
+            r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
+            webpage)
+        if mobj:
+            vico_id = mobj.group('vico_id')
+            vivi_id = mobj.group('vivi_id')
+        else:
+            vico_id = self._html_search_regex(
+                r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
+            vivi_id = self._html_search_regex(
+                r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
         info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
-        webpage = self._download_webpage(info_url, '')
 
         info = self._download_json(info_url, video_id)
         video_info = info['video']
index ecf4939cdc031683eca7ddd7240a2439f803947d..82b323cdd4e40b027d3a6c2c06e9ea9d58b171e2 100644 (file)
@@ -18,6 +18,10 @@ class RTPIE(InfoExtractor):
             'description': 'As paixões musicais de António Cartaxo e António Macedo',
             'thumbnail': 're:^https?://.*\.jpg',
         },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
         'only_matching': True,
index 82cd98ac742bf436b24fbbc77cac9a6fb8a44ff6..5b97d33caec2a08c79e648483ec67a1c288b6e4f 100644 (file)
@@ -6,7 +6,7 @@ import re
 import time
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import compat_urllib_request, compat_urlparse
 from ..utils import (
     ExtractorError,
     float_or_none,
@@ -102,7 +102,9 @@ class RTVEALaCartaIE(InfoExtractor):
         if info['state'] == 'DESPU':
             raise ExtractorError('The video is no longer available', expected=True)
         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
-        png = self._download_webpage(png_url, video_id, 'Downloading url information')
+        png_request = compat_urllib_request.Request(png_url)
+        png_request.add_header('Referer', url)
+        png = self._download_webpage(png_request, video_id, 'Downloading url information')
         video_url = _decrypt_url(png)
         if not video_url.endswith('.f4m'):
             auth_url = video_url.replace(
index 4e22628d031bc462f66394384f2d398465105baf..c67ad25ce510c888566748cffc7cb2639e4e7fe0 100644 (file)
@@ -6,19 +6,19 @@ from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
     determine_ext,
     int_or_none,
+    xpath_attr,
     xpath_text,
 )
 
 
 class RuutuIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?ruutu\.fi/video/(?P<id>\d+)'
     _TESTS = [
         {
-            'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
+            'url': 'http://www.ruutu.fi/video/2058907',
             'md5': 'ab2093f39be1ca8581963451b3c0234f',
             'info_dict': {
                 'id': '2058907',
-                'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
                 'ext': 'mp4',
                 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
                 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
@@ -28,14 +28,13 @@ class RuutuIE(InfoExtractor):
             },
         },
         {
-            'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa',
+            'url': 'http://www.ruutu.fi/video/2057306',
             'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
             'info_dict': {
                 'id': '2057306',
-                'display_id': 'superpesis-katso-koko-kausi-ruudussa',
                 'ext': 'mp4',
                 'title': 'Superpesis: katso koko kausi Ruudussa',
-                'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77',
+                'description': 'md5:da2736052fef3b2bd5e0005e63c25eac',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 40,
                 'age_limit': 0,
@@ -44,29 +43,10 @@ class RuutuIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, display_id)
-
-        video_id = self._search_regex(
-            r'data-media-id="(\d+)"', webpage, 'media id')
-
-        video_xml_url = None
-
-        media_data = self._search_regex(
-            r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage,
-            'media data', default=None)
-        if media_data:
-            media_json = self._parse_json(media_data, display_id, fatal=False)
-            if media_json:
-                xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl')
-                if xml_url:
-                    video_xml_url = xml_url.replace('{ID}', video_id)
-
-        if not video_xml_url:
-            video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id
-
-        video_xml = self._download_xml(video_xml_url, video_id)
+        video_xml = self._download_xml(
+            'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
 
         formats = []
         processed_urls = []
@@ -109,10 +89,9 @@ class RuutuIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'display_id': display_id,
-            'title': self._og_search_title(webpage),
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True),
+            'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'),
+            'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'),
             'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
             'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
             'formats': formats,
index f3c80708c86ab2fc29fbd029b245bbe894af2dfb..a602af6928d2a9d054fc8670342a6ddf7d9ef4da 100644 (file)
@@ -20,7 +20,6 @@ from ..utils import (
 class SafariBaseIE(InfoExtractor):
     _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
     _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
-    _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
     _NETRC_MACHINE = 'safari'
 
     _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
@@ -37,9 +36,7 @@ class SafariBaseIE(InfoExtractor):
     def _login(self):
         (username, password) = self._get_login_info()
         if username is None:
-            raise ExtractorError(
-                self._ACCOUNT_CREDENTIALS_HINT,
-                expected=True)
+            self.raise_login_required('safaribooksonline.com account is required')
 
         headers = std_headers
         if 'Referer' not in headers:
index 220d39078fab15e0e4191a1d1a33b162f342ffb8..05f93904c6ff856e8eacd5a8eaaa178d89e975fa 100644 (file)
@@ -12,8 +12,8 @@ from ..utils import (
 
 
 class ScreenwaveMediaIE(InfoExtractor):
-    _VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
-
+    _VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
+    EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
     _TESTS = [{
         'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
         'only_matching': True,
@@ -33,7 +33,7 @@ class ScreenwaveMediaIE(InfoExtractor):
             'http://player.screenwavemedia.com/player.js',
             video_id, 'Downloading playerconfig webpage')
 
-        videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver')
+        videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver')
 
         sources = self._parse_json(
             js_to_json(
@@ -56,6 +56,7 @@ class ScreenwaveMediaIE(InfoExtractor):
 
         # Fallback to hardcoded sources if JS changes again
         if not sources:
+            self.report_warning('Falling back to a hardcoded list of streams')
             sources = [{
                 'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
                 'type': 'mp4',
index a07677686a4ecc2923b310c3aeeeaab610bb0868..c5636e8e92fdf772bb102675294c2e100315b5ca 100644 (file)
@@ -14,17 +14,28 @@ from ..utils import (
 
 
 class SharedIE(InfoExtractor):
-    _VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
+    IE_DESC = 'shared.sx and vivo.sx'
+    _VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://shared.sx/0060718775',
         'md5': '106fefed92a8a2adb8c98e6a0652f49b',
         'info_dict': {
             'id': '0060718775',
             'ext': 'mp4',
             'title': 'Bmp4',
+            'filesize': 1720110,
         },
-    }
+    }, {
+        'url': 'http://vivo.sx/d7ddda0e78',
+        'md5': '15b3af41be0b4fe01f4df075c2678b2c',
+        'info_dict': {
+            'id': 'd7ddda0e78',
+            'ext': 'mp4',
+            'title': 'Chicken',
+            'filesize': 528031,
+        },
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index 93a7cfe15cc764bc61b912dd2e3283d950790565..35a81ee87fda041cb8208793e3f4fe34d3445cfd 100644 (file)
@@ -330,10 +330,7 @@ class SmotriBroadcastIE(InfoExtractor):
 
             (username, password) = self._get_login_info()
             if username is None:
-                raise ExtractorError(
-                    'Erotic broadcasts allowed only for registered users, '
-                    'use --username and --password options to provide account credentials.',
-                    expected=True)
+                self.raise_login_required('Erotic broadcasts allowed only for registered users')
 
             login_form = {
                 'login-hint53': '1',
index 6ce86cbcda9aeb2c8ebe0a8d2ce3063e67d9123c..ed5dcc0d39a0bf014d8650f3df4110749ae4d649 100644 (file)
@@ -309,7 +309,7 @@ class SoundcloudUserIE(SoundcloudIE):
             'id': '114582580',
             'title': 'The Akashic Chronicler (All)',
         },
-        'playlist_mincount': 112,
+        'playlist_mincount': 111,
     }, {
         'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
         'info_dict': {
@@ -330,14 +330,14 @@ class SoundcloudUserIE(SoundcloudIE):
             'id': '114582580',
             'title': 'The Akashic Chronicler (Reposts)',
         },
-        'playlist_mincount': 9,
+        'playlist_mincount': 7,
     }, {
         'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
         'info_dict': {
             'id': '114582580',
             'title': 'The Akashic Chronicler (Likes)',
         },
-        'playlist_mincount': 333,
+        'playlist_mincount': 321,
     }, {
         'url': 'https://soundcloud.com/grynpyret/spotlight',
         'info_dict': {
index 5fa6faf18b738aa32e384972bf65ad56188ad9b4..9e8fb35b2ebfc1343944db37438d8a7bdd4e70ea 100644 (file)
@@ -16,8 +16,9 @@ from ..aes import aes_decrypt_text
 
 
 class SpankwireIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)'
+    _TESTS = [{
+        # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
         'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
         'md5': '8bbfde12b101204b39e4b9fe7eb67095',
         'info_dict': {
@@ -30,14 +31,27 @@ class SpankwireIE(InfoExtractor):
             'upload_date': '20070507',
             'age_limit': 18,
         }
-    }
+    }, {
+        # download URL pattern: */mp4_<format_id>_<video_id>.mp4
+        'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
+        'md5': '09b3c20833308b736ae8902db2f8d7e6',
+        'info_dict': {
+            'id': '1921551',
+            'ext': 'mp4',
+            'title': 'Titcums Compiloation I',
+            'description': 'cum on tits',
+            'uploader': 'dannyh78999',
+            'uploader_id': '3056053',
+            'upload_date': '20150822',
+            'age_limit': 18,
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        video_id = mobj.group('id')
 
-        req = compat_urllib_request.Request(url)
+        req = compat_urllib_request.Request('http://www.' + mobj.group('url'))
         req.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(req, video_id)
 
@@ -54,7 +68,7 @@ class SpankwireIE(InfoExtractor):
             r'by:\s*<a [^>]*>(.+?)</a>',
             webpage, 'uploader', fatal=False)
         uploader_id = self._html_search_regex(
-            r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"',
+            r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
             webpage, 'uploader id', fatal=False)
         upload_date = unified_strdate(self._html_search_regex(
             r'</a> on (.+?) at \d+:\d+',
@@ -67,9 +81,10 @@ class SpankwireIE(InfoExtractor):
             r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
             webpage, 'comment count', fatal=False))
 
-        video_urls = list(map(
-            compat_urllib_parse_unquote,
-            re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
+        videos = re.findall(
+            r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
+        heights = [int(video[0]) for video in videos]
+        video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
         if webpage.find('flashvars\.encrypted = "true"') != -1:
             password = self._search_regex(
                 r'flashvars\.video_title = "([^"]+)',
@@ -79,21 +94,22 @@ class SpankwireIE(InfoExtractor):
                 video_urls))
 
         formats = []
-        for video_url in video_urls:
+        for height, video_url in zip(heights, video_urls):
             path = compat_urllib_parse_urlparse(video_url).path
-            format = path.split('/')[4].split('_')[:2]
-            resolution, bitrate_str = format
-            format = "-".join(format)
-            height = int(resolution.rstrip('Pp'))
-            tbr = int(bitrate_str.rstrip('Kk'))
-            formats.append({
+            _, quality = path.split('/')[4].split('_')[:2]
+            f = {
                 'url': video_url,
-                'resolution': resolution,
-                'format': format,
-                'tbr': tbr,
                 'height': height,
-                'format_id': format,
-            })
+            }
+            tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
+            if tbr:
+                f.update({
+                    'tbr': int(tbr),
+                    'format_id': '%dp' % height,
+                })
+            else:
+                f['format_id'] = quality
+            formats.append(f)
         self._sort_formats(formats)
 
         age_limit = self._rta_search(webpage)
index adaec337579e0bdca194b0b6cf44cefd918edd31..25edc310008ef0da7b407c97db86ce5e49c78a50 100644 (file)
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 import re
-import json
 import time
 import hmac
 import binascii
@@ -29,7 +28,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
 
 
 class ThePlatformBaseIE(InfoExtractor):
-    def _extract_theplatform_smil_formats(self, smil_url, video_id, note='Downloading SMIL data'):
+    def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
         meta = self._download_xml(smil_url, video_id, note=note)
         try:
             error_msg = next(
@@ -55,12 +54,13 @@ class ThePlatformBaseIE(InfoExtractor):
 
         self._sort_formats(formats)
 
-        return formats
+        subtitles = self._parse_smil_subtitles(meta, default_ns)
+
+        return formats, subtitles
 
     def get_metadata(self, path, video_id):
         info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
-        info_json = self._download_webpage(info_url, video_id)
-        info = json.loads(info_json)
+        info = self._download_json(info_url, video_id)
 
         subtitles = {}
         captions = info.get('captions')
@@ -210,12 +210,14 @@ class ThePlatformIE(ThePlatformBaseIE):
         if sig:
             smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
 
-        formats = self._extract_theplatform_smil_formats(smil_url, video_id)
+        formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
 
         ret = self.get_metadata(path, video_id)
+        combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
         ret.update({
             'id': video_id,
             'formats': formats,
+            'subtitles': combined_subtitles,
         })
 
         return ret
@@ -253,6 +255,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
         entry = feed['entries'][0]
 
         formats = []
+        subtitles = {}
         first_video_id = None
         duration = None
         for item in entry['media$content']:
@@ -261,7 +264,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
             if first_video_id is None:
                 first_video_id = cur_video_id
                 duration = float_or_none(item.get('plfile$duration'))
-            formats.extend(self._extract_theplatform_smil_formats(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id))
+            cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
+            formats.extend(cur_formats)
+            subtitles = self._merge_subtitles(subtitles, cur_subtitles)
 
         self._sort_formats(formats)
 
@@ -275,9 +280,11 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
         categories = [item['media$name'] for item in entry.get('media$categories', [])]
 
         ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
+        subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
         ret.update({
             'id': video_id,
             'formats': formats,
+            'subtitles': subtitles,
             'thumbnails': thumbnails,
             'duration': duration,
             'timestamp': timestamp,
index 2c4b21807ce1dc276e8625463ac38fb63c8ff211..4f86b3ee927541c8f31936103c3319ce48b97e72 100644 (file)
@@ -60,9 +60,7 @@ class TubiTvIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
         if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
-            raise ExtractorError(
-                'This video requires login, use --username and --password '
-                'options to provide account credentials.', expected=True)
+            self.raise_login_required('This video requires login')
 
         title = self._og_search_title(webpage)
         description = self._og_search_description(webpage)
index 4a0eaf65f78be0dbac2b089aa064eec043b15e41..365d8b4bfe19a6d89965a68957ea4901d7722037 100644 (file)
@@ -70,9 +70,7 @@ class UdemyIE(InfoExtractor):
     def _login(self):
         (username, password) = self._get_login_info()
         if username is None:
-            raise ExtractorError(
-                'Udemy account is required, use --username and --password options to provide account credentials.',
-                expected=True)
+            self.raise_login_required('Udemy account is required')
 
         login_popup = self._download_webpage(
             self._LOGIN_URL, None, 'Downloading login popup')
index f4c0f5702e59bea80046438606dd9a28271d8a30..4098e4629d671850425025d08bab441a3745f7dd 100644 (file)
@@ -1,18 +1,38 @@
-# coding=utf-8
+# codingutf-8
 from __future__ import unicode_literals
 
 import re
 import hashlib
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
 from ..utils import (
     int_or_none,
     float_or_none,
 )
 
 
-class YandexMusicBaseIE(InfoExtractor):
+class YandexMusicTrackIE(InfoExtractor):
+    IE_NAME = 'yandexmusic:track'
+    IE_DESC = 'Яндекс.Музыка - Трек'
+    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://music.yandex.ru/album/540508/track/4878838',
+        'md5': 'f496818aa2f60b6c0062980d2e00dc20',
+        'info_dict': {
+            'id': '4878838',
+            'ext': 'mp3',
+            'title': 'Carlo Ambrosio - Gypsy Eyes 1',
+            'filesize': 4628061,
+            'duration': 193.04,
+        }
+    }
+
     def _get_track_url(self, storage_dir, track_id):
         data = self._download_json(
             'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
@@ -35,24 +55,6 @@ class YandexMusicBaseIE(InfoExtractor):
             'duration': float_or_none(track.get('durationMs'), 1000),
         }
 
-
-class YandexMusicTrackIE(YandexMusicBaseIE):
-    IE_NAME = 'yandexmusic:track'
-    IE_DESC = 'Яндекс.Музыка - Трек'
-    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
-
-    _TEST = {
-        'url': 'http://music.yandex.ru/album/540508/track/4878838',
-        'md5': 'f496818aa2f60b6c0062980d2e00dc20',
-        'info_dict': {
-            'id': '4878838',
-            'ext': 'mp3',
-            'title': 'Carlo Ambrosio - Gypsy Eyes 1',
-            'filesize': 4628061,
-            'duration': 193.04,
-        }
-    }
-
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         album_id, track_id = mobj.group('album_id'), mobj.group('id')
@@ -64,7 +66,15 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
         return self._get_track_info(track)
 
 
-class YandexMusicAlbumIE(YandexMusicBaseIE):
+class YandexMusicPlaylistBaseIE(InfoExtractor):
+    def _build_playlist(self, tracks):
+        return [
+            self.url_result(
+                'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
+            for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)]
+
+
+class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
     IE_NAME = 'yandexmusic:album'
     IE_DESC = 'Яндекс.Музыка - Альбом'
     _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
@@ -85,7 +95,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
             'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
             album_id, 'Downloading album JSON')
 
-        entries = [self._get_track_info(track) for track in album['volumes'][0]]
+        entries = self._build_playlist(album['volumes'][0])
 
         title = '%s - %s' % (album['artists'][0]['name'], album['title'])
         year = album.get('year')
@@ -95,12 +105,12 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
         return self.playlist_result(entries, compat_str(album['id']), title)
 
 
-class YandexMusicPlaylistIE(YandexMusicBaseIE):
+class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
     IE_NAME = 'yandexmusic:playlist'
     IE_DESC = 'Яндекс.Музыка - Плейлист'
     _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
         'info_dict': {
             'id': '1245',
@@ -108,20 +118,54 @@ class YandexMusicPlaylistIE(YandexMusicBaseIE):
             'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
         },
         'playlist_count': 6,
-    }
+    }, {
+        # playlist exceeding the limit of 150 tracks shipped with webpage (see
+        # https://github.com/rg3/youtube-dl/issues/6666)
+        'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
+        'info_dict': {
+            'id': '1036',
+            'title': 'Музыка 90-х',
+        },
+        'playlist_count': 310,
+    }]
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
         webpage = self._download_webpage(url, playlist_id)
 
-        playlist = self._parse_json(
+        mu = self._parse_json(
             self._search_regex(
                 r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
-            playlist_id)['pageData']['playlist']
-
-        entries = [self._get_track_info(track) for track in playlist['tracks']]
+            playlist_id)
+
+        playlist = mu['pageData']['playlist']
+        tracks, track_ids = playlist['tracks'], playlist['trackIds']
+
+        # tracks dictionary shipped with webpage is limited to 150 tracks,
+        # missing tracks should be retrieved manually.
+        if len(tracks) < len(track_ids):
+            present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')])
+            missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids)
+            request = compat_urllib_request.Request(
+                'https://music.yandex.ru/handlers/track-entries.jsx',
+                compat_urllib_parse.urlencode({
+                    'entries': ','.join(missing_track_ids),
+                    'lang': mu.get('settings', {}).get('lang', 'en'),
+                    'external-domain': 'music.yandex.ru',
+                    'overembed': 'false',
+                    'sign': mu.get('authData', {}).get('user', {}).get('sign'),
+                    'strict': 'true',
+                }).encode('utf-8'))
+            request.add_header('Referer', url)
+            request.add_header('X-Requested-With', 'XMLHttpRequest')
+
+            missing_tracks = self._download_json(
+                request, playlist_id, 'Downloading missing tracks JSON', fatal=False)
+            if missing_tracks:
+                tracks.extend(missing_tracks)
 
         return self.playlist_result(
-            entries, compat_str(playlist_id),
+            self._build_playlist(tracks),
+            compat_str(playlist_id),
             playlist['title'], playlist.get('description'))
index 78caeb8b36e0be8cf4e97365d9e28251723059b7..2e81d92238307e8914769d6fc48d03befd6af2bf 100644 (file)
@@ -49,6 +49,17 @@ class YoukuIE(InfoExtractor):
         },
         'playlist_count': 13,
         'skip': 'Available in China only',
+    }, {
+        'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
+        'note': 'Video protected with password',
+        'info_dict': {
+            'id': 'XNjA1NzA2Njgw',
+            'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
+        },
+        'playlist_count': 19,
+        'params': {
+            'videopassword': '100600',
+        },
     }]
 
     def construct_video_urls(self, data1, data2):
@@ -185,9 +196,15 @@ class YoukuIE(InfoExtractor):
             raw_data = self._download_json(req, video_id, note=note)
             return raw_data['data'][0]
 
+        video_password = self._downloader.params.get('videopassword', None)
+
         # request basic data
+        basic_data_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
+        if video_password:
+            basic_data_url += '?password=%s' % video_password
+
         data1 = retrieve_data(
-            'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id,
+            basic_data_url,
             'Downloading JSON metadata 1')
         data2 = retrieve_data(
             'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
index 8e2da46e3ad68d902a2757dcc9b4e4613c7adb45..030ec70ca0b89c0c910e051c84e26d8d408f458f 100644 (file)
@@ -660,7 +660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
-            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
+            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$',
             player_url)
         if not id_m:
             raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1243,7 +1243,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
             if 'rtmpe%3Dyes' in encoded_url_map:
                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
-            url_map = {}
+            formats = []
             for url_data_str in encoded_url_map.split(','):
                 url_data = compat_parse_qs(url_data_str)
                 if 'itag' not in url_data or 'url' not in url_data:
@@ -1289,7 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 player_desc = 'flash player %s' % player_version
                             else:
                                 player_version = self._search_regex(
-                                    r'html5player-([^/]+?)(?:/html5player)?\.js',
+                                    r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
                                     player_url,
                                     'html5 player', fatal=False)
                                 player_desc = 'html5 player %s' % player_version
@@ -1303,8 +1303,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     url += '&signature=' + signature
                 if 'ratebypass' not in url:
                     url += '&ratebypass=yes'
-                url_map[format_id] = url
-            formats = _map_to_format_list(url_map)
+
+                # Some itags are not included in DASH manifest thus corresponding formats will
+                # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
+                # Trying to extract metadata from url_encoded_fmt_stream_map entry.
+                mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
+                width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
+                dct = {
+                    'format_id': format_id,
+                    'url': url,
+                    'player_url': player_url,
+                    'filesize': int_or_none(url_data.get('clen', [None])[0]),
+                    'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
+                    'width': width,
+                    'height': height,
+                    'fps': int_or_none(url_data.get('fps', [None])[0]),
+                    'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
+                }
+                type_ = url_data.get('type', [None])[0]
+                if type_:
+                    type_split = type_.split(';')
+                    kind_ext = type_split[0].split('/')
+                    if len(kind_ext) == 2:
+                        kind, ext = kind_ext
+                        dct['ext'] = ext
+                        if kind in ('audio', 'video'):
+                            codecs = None
+                            for mobj in re.finditer(
+                                    r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
+                                if mobj.group('key') == 'codecs':
+                                    codecs = mobj.group('val')
+                                    break
+                            if codecs:
+                                codecs = codecs.split(',')
+                                if len(codecs) == 2:
+                                    acodec, vcodec = codecs[0], codecs[1]
+                                else:
+                                    acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
+                                dct.update({
+                                    'acodec': acodec,
+                                    'vcodec': vcodec,
+                                })
+                if format_id in self._formats:
+                    dct.update(self._formats[format_id])
+                formats.append(dct)
         elif video_info.get('hlsvp'):
             manifest_url = video_info['hlsvp'][0]
             url_map = self._extract_from_m3u8(manifest_url, video_id)
index 9016e34983d3fed5e0fab72e9a8626124cdee859..8c4ff12bdd1763c12908ef8dc875f0916794237b 100644 (file)
@@ -320,7 +320,7 @@ def parseOpts(overrideArguments=None):
     authentication.add_option(
         '--video-password',
         dest='videopassword', metavar='PASSWORD',
-        help='Video password (vimeo, smotri)')
+        help='Video password (vimeo, smotri, youku)')
 
     video_format = optparse.OptionGroup(parser, 'Video Format Options')
     video_format.add_option(
index 4191d040bb1da468e248d57b78df1afdacf64cd0..150ef917347e43c972d541bcb3f394b8db6d8ac4 100644 (file)
@@ -4,6 +4,7 @@ import os
 
 from ..utils import (
     PostProcessingError,
+    cli_configuration_args,
     encodeFilename,
 )
 
@@ -61,11 +62,7 @@ class PostProcessor(object):
             self._downloader.report_warning(errnote)
 
     def _configuration_args(self, default=[]):
-        pp_args = self._downloader.params.get('postprocessor_args')
-        if pp_args is None:
-            return default
-        assert isinstance(pp_args, list)
-        return pp_args
+        return cli_configuration_args(self.params, 'postprocessor_args', default)
 
 
 class AudioConversionError(PostProcessingError):
index e265c75742b6d22647965e6a0261f24bb4a73a00..79381b3803717081b15c79b26cf9bf4173f18ef6 100644 (file)
@@ -141,7 +141,7 @@ def write_json_file(obj, fn):
 if sys.version_info >= (2, 7):
     def find_xpath_attr(node, xpath, key, val=None):
         """ Find the xpath xpath[@key=val] """
-        assert re.match(r'^[a-zA-Z-]+$', key)
+        assert re.match(r'^[a-zA-Z_-]+$', key)
         if val:
             assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
@@ -176,12 +176,12 @@ def xpath_with_ns(path, ns_map):
     return '/'.join(replaced)
 
 
-def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
     if sys.version_info < (2, 7):  # Crazy 2.6
         xpath = xpath.encode('ascii')
 
     n = node.find(xpath)
-    if n is None or n.text is None:
+    if n is None:
         if default is not NO_DEFAULT:
             return default
         elif fatal:
@@ -189,9 +189,37 @@ def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
             raise ExtractorError('Could not find XML element %s' % name)
         else:
             return None
+    return n
+
+
+def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+    n = xpath_element(node, xpath, name, fatal=fatal, default=default)
+    if n is None or n == default:
+        return n
+    if n.text is None:
+        if default is not NO_DEFAULT:
+            return default
+        elif fatal:
+            name = xpath if name is None else name
+            raise ExtractorError('Could not find XML element\'s text %s' % name)
+        else:
+            return None
     return n.text
 
 
+def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
+    n = find_xpath_attr(node, xpath, key)
+    if n is None:
+        if default is not NO_DEFAULT:
+            return default
+        elif fatal:
+            name = '%s[@%s]' % (xpath, key) if name is None else name
+            raise ExtractorError('Could not find XML attribute %s' % name)
+        else:
+            return None
+    return n.attrib[key]
+
+
 def get_element_by_id(id, html):
     """Return the content of the tag with the specified ID in the passed HTML document"""
     return get_element_by_attribute("id", id, html)
@@ -587,6 +615,11 @@ class ContentTooShortError(Exception):
 
 
 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+    # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
+    # expected HTTP responses to meet HTTP/1.0 or later (see also
+    # https://github.com/rg3/youtube-dl/issues/6727)
+    if sys.version_info < (3, 0):
+        kwargs['strict'] = True
     hc = http_class(*args, **kwargs)
     source_address = ydl_handler._params.get('source_address')
     if source_address is not None:
@@ -1918,6 +1951,32 @@ def dfxp2srt(dfxp_data):
     return ''.join(out)
 
 
+def cli_option(params, command_option, param):
+    param = params.get(param)
+    return [command_option, param] if param is not None else []
+
+
+def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
+    param = params.get(param)
+    assert isinstance(param, bool)
+    if separator:
+        return [command_option + separator + (true_value if param else false_value)]
+    return [command_option, true_value if param else false_value]
+
+
+def cli_valueless_option(params, command_option, param, expected_value=True):
+    param = params.get(param)
+    return [command_option] if param == expected_value else []
+
+
+def cli_configuration_args(params, param, default=[]):
+    ex_args = params.get(param)
+    if ex_args is None:
+        return default
+    assert isinstance(ex_args, list)
+    return ex_args
+
+
 class ISO639Utils(object):
     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
     _lang_map = {
index c090c6df7b1bfee2e3bee5631918c81f9fa26197..6bc689b7516ef4979edb3ff63bf34ab11d9c0ace 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.08.16.1'
+__version__ = '2015.09.03'