Merge pull request #8718 from remitamine/m3u8-fixup
authorremitamine <remitamine@gmail.com>
Sat, 5 Mar 2016 17:37:28 +0000 (18:37 +0100)
committerremitamine <remitamine@gmail.com>
Sat, 5 Mar 2016 17:37:28 +0000 (18:37 +0100)
Add fixup for media files produced by HlsNative downloader(fixes #4776)

41 files changed:
.gitignore
Makefile
README.md
test/helper.py
test/test_http.py
test/test_utils.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/compat.py
youtube_dl/downloader/common.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/aol.py
youtube_dl/extractor/audimedia.py
youtube_dl/extractor/bleacherreport.py
youtube_dl/extractor/common.py
youtube_dl/extractor/douyutv.py
youtube_dl/extractor/elpais.py
youtube_dl/extractor/engadget.py
youtube_dl/extractor/fivemin.py
youtube_dl/extractor/foxnews.py
youtube_dl/extractor/indavideo.py
youtube_dl/extractor/jeuxvideo.py
youtube_dl/extractor/kusi.py [new file with mode: 0644]
youtube_dl/extractor/leeco.py [new file with mode: 0644]
youtube_dl/extractor/letv.py [deleted file]
youtube_dl/extractor/lifenews.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/revision3.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/wistia.py
youtube_dl/extractor/yandexmusic.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/postprocessor/xattrpp.py
youtube_dl/utils.py

index 0422adf4456ec35166f5d2bce6b832c67601c2dc..26dbde73d412673ee9c53ee06a476a803a92edc7 100644 (file)
@@ -1,5 +1,6 @@
 *.pyc
 *.pyo
+*.class
 *~
 *.DS_Store
 wine-py2exe/
@@ -32,4 +33,4 @@ test/testdata
 .tox
 youtube-dl.zsh
 .idea
-.idea/*
\ No newline at end of file
+.idea/*
index cb449b7e61c4c27d44177cc775c26fa79ca97a3a..e98806791327feaa67cd780e5395ce0cc960bbd8 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -3,6 +3,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
 clean:
        rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
        find . -name "*.pyc" -delete
+       find . -name "*.class" -delete
 
 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
@@ -44,7 +45,7 @@ test:
 ot: offlinetest
 
 offlinetest: codetest
-       nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
+       $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
 
 tar: youtube-dl.tar.gz
 
index 44b65da024a71d2ce5229cf752b1cf5d2a48479f..d66804a7a19f667f0d8e3b19b707eec46bf2bb8d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -458,6 +458,7 @@ The basic usage is not to set any template arguments when downloading a single f
  - `alt_title`: A secondary title of the video
  - `display_id`: An alternative identifier for the video
  - `uploader`: Full name of the video uploader
+ - `license`: License name the video is licensed under
  - `creator`: The main artist who created the video
  - `release_date`: The date (YYYYMMDD) when the video was released
  - `timestamp`: UNIX timestamp of the moment the video became available
index bdd7acca4d91c490f29c21aeac7cc9ba01c86952..f2d87821290095c1f9526f50db5d80ab31969d56 100644 (file)
@@ -11,8 +11,11 @@ import sys
 
 import youtube_dl.extractor
 from youtube_dl import YoutubeDL
-from youtube_dl.utils import (
+from youtube_dl.compat import (
+    compat_os_name,
     compat_str,
+)
+from youtube_dl.utils import (
     preferredencoding,
     write_string,
 )
@@ -42,7 +45,7 @@ def report_warning(message):
     Print the message to stderr, it will be prefixed with 'WARNING:'
     If stderr is a tty file the 'WARNING:' will be colored
     '''
-    if sys.stderr.isatty() and os.name != 'nt':
+    if sys.stderr.isatty() and compat_os_name != 'nt':
         _msg_header = '\033[0;33mWARNING:\033[0m'
     else:
         _msg_header = 'WARNING:'
index f2e305b6fed3ce2f0574a7c20e89ffb977934f28..fc59b1aed6ddc2db10598a1a4b954a128e3d3133 100644 (file)
@@ -52,7 +52,12 @@ class TestHTTP(unittest.TestCase):
             ('localhost', 0), HTTPTestRequestHandler)
         self.httpd.socket = ssl.wrap_socket(
             self.httpd.socket, certfile=certfn, server_side=True)
-        self.port = self.httpd.socket.getsockname()[1]
+        if os.name == 'java':
+            # In Jython SSLSocket is not a subclass of socket.socket
+            sock = self.httpd.socket.sock
+        else:
+            sock = self.httpd.socket
+        self.port = sock.getsockname()[1]
         self.server_thread = threading.Thread(target=self.httpd.serve_forever)
         self.server_thread.daemon = True
         self.server_thread.start()
index 97587ad2f56215c5d57fd06c45b55a7c8ebf9d8e..2bcf8ecf03b588e3ca250cb298dff4bcee551f32 100644 (file)
@@ -61,6 +61,7 @@ from youtube_dl.utils import (
     lowercase_escape,
     url_basename,
     urlencode_postdata,
+    update_url_query,
     version_tuple,
     xpath_with_ns,
     xpath_element,
@@ -76,6 +77,8 @@ from youtube_dl.utils import (
 )
 from youtube_dl.compat import (
     compat_etree_fromstring,
+    compat_urlparse,
+    compat_parse_qs,
 )
 
 
@@ -454,6 +457,40 @@ class TestUtil(unittest.TestCase):
         data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
         self.assertTrue(isinstance(data, bytes))
 
+    def test_update_url_query(self):
+        def query_dict(url):
+            return compat_parse_qs(compat_urlparse.urlparse(url).query)
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
+            query_dict('http://example.com/path?quality=HD&format=mp4'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
+            query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'fields': 'id,formats,subtitles'})),
+            query_dict('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
+            query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path?manifest=f4m', {'manifest': []})),
+            query_dict('http://example.com/path'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
+            query_dict('http://example.com/path?system=LINUX'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'fields': b'id,formats,subtitles'})),
+            query_dict('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'width': 1080, 'height': 720})),
+            query_dict('http://example.com/path?width=1080&height=720'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'bitrate': 5020.43})),
+            query_dict('http://example.com/path?bitrate=5020.43'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'test': '第二行тест'})),
+            query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+
     def test_dict_get(self):
         FALSE_VALUES = {
             'none': None,
index 97cf31eb2cfc4c5f59bb16a0a7b90ec04f0806a8..94e4ea43200efe8fefae1e4db1c5149902f0d007 100755 (executable)
@@ -24,9 +24,6 @@ import time
 import tokenize
 import traceback
 
-if os.name == 'nt':
-    import ctypes
-
 from .compat import (
     compat_basestring,
     compat_cookiejar,
@@ -34,6 +31,7 @@ from .compat import (
     compat_get_terminal_size,
     compat_http_client,
     compat_kwargs,
+    compat_os_name,
     compat_str,
     compat_tokenize_tokenize,
     compat_urllib_error,
@@ -96,6 +94,9 @@ from .postprocessor import (
 )
 from .version import __version__
 
+if compat_os_name == 'nt':
+    import ctypes
+
 
 class YoutubeDL(object):
     """YoutubeDL class.
@@ -451,7 +452,7 @@ class YoutubeDL(object):
     def to_console_title(self, message):
         if not self.params.get('consoletitle', False):
             return
-        if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
+        if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
             # c_wchar_p() might not be necessary if `message` is
             # already of type unicode()
             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
@@ -522,7 +523,7 @@ class YoutubeDL(object):
         else:
             if self.params.get('no_warnings'):
                 return
-            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
                 _msg_header = '\033[0;33mWARNING:\033[0m'
             else:
                 _msg_header = 'WARNING:'
@@ -534,7 +535,7 @@ class YoutubeDL(object):
         Do the same as trouble, but prefixes the message with 'ERROR:', colored
         in red if stderr is a tty file.
         '''
-        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
             _msg_header = '\033[0;31mERROR:\033[0m'
         else:
             _msg_header = 'ERROR:'
@@ -567,7 +568,7 @@ class YoutubeDL(object):
                 elif template_dict.get('height'):
                     template_dict['resolution'] = '%sp' % template_dict['height']
                 elif template_dict.get('width'):
-                    template_dict['resolution'] = '?x%d' % template_dict['width']
+                    template_dict['resolution'] = '%dx?' % template_dict['width']
 
             sanitize = lambda k, v: sanitize_filename(
                 compat_str(v),
@@ -1632,7 +1633,7 @@ class YoutubeDL(object):
                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                 return
 
-            if success:
+            if success and filename != '-':
                 # Fixup content
                 fixup_policy = self.params.get('fixup')
                 if fixup_policy is None:
index f5f06424146f88fa045a26db569cbe7fa42da586..79b3898409bb4cd2f703a15b26f7225408c279d6 100644 (file)
@@ -355,6 +355,7 @@ def _real_main(argv=None):
         'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
         'encoding': opts.encoding,
         'extract_flat': opts.extract_flat,
+        'mark_watched': opts.mark_watched,
         'merge_output_format': opts.merge_output_format,
         'postprocessors': postprocessors,
         'fixup': opts.fixup,
index b497da6964944b5d47e0805f9e872357b6c11c74..2771fb5faa371ccf01d1d4f9449b87c7a380c175 100644 (file)
@@ -326,6 +326,9 @@ def compat_ord(c):
         return ord(c)
 
 
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
 if sys.version_info >= (3, 0):
     compat_getenv = os.getenv
     compat_expanduser = os.path.expanduser
@@ -346,7 +349,7 @@ else:
     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
     # for different platforms with correct environment variables decoding.
 
-    if os.name == 'posix':
+    if compat_os_name == 'posix':
         def compat_expanduser(path):
             """Expand ~ and ~user constructions.  If user or $HOME is unknown,
             do nothing."""
@@ -370,7 +373,7 @@ else:
                 userhome = pwent.pw_dir
             userhome = userhome.rstrip('/')
             return (userhome + path[i:]) or '/'
-    elif os.name == 'nt' or os.name == 'ce':
+    elif compat_os_name == 'nt' or compat_os_name == 'ce':
         def compat_expanduser(path):
             """Expand ~ and ~user constructs.
 
@@ -556,6 +559,7 @@ __all__ = [
     'compat_itertools_count',
     'compat_kwargs',
     'compat_ord',
+    'compat_os_name',
     'compat_parse_qs',
     'compat_print',
     'compat_shlex_split',
index 2d51540518f7ee40c7d002d632007d5aa9542697..f39db58f6c13f623a00b37ef0565dad70f18a305 100644 (file)
@@ -5,6 +5,7 @@ import re
 import sys
 import time
 
+from ..compat import compat_os_name
 from ..utils import (
     encodeFilename,
     error_to_compat_str,
@@ -219,7 +220,7 @@ class FileDownloader(object):
         if self.params.get('progress_with_newline', False):
             self.to_screen(fullmsg)
         else:
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                 prev_len = getattr(self, '_report_progress_prev_line_length',
                                    0)
                 if prev_len > len(fullmsg):
index 5817140c0c407c8f0c9468dd7df3efd3e5f29500..899bf8114f3e7711ef145b373cc9b1d59fec9089 100644 (file)
@@ -23,7 +23,10 @@ from .alphaporno import AlphaPornoIE
 from .animeondemand import AnimeOnDemandIE
 from .anitube import AnitubeIE
 from .anysex import AnySexIE
-from .aol import AolIE
+from .aol import (
+    AolIE,
+    AolFeaturesIE,
+)
 from .allocine import AllocineIE
 from .aparat import AparatIE
 from .appleconnect import AppleConnectIE
@@ -340,6 +343,7 @@ from .konserthusetplay import KonserthusetPlayIE
 from .kontrtube import KontrTubeIE
 from .krasview import KrasViewIE
 from .ku6 import Ku6IE
+from .kusi import KUSIIE
 from .kuwo import (
     KuwoIE,
     KuwoAlbumIE,
@@ -352,10 +356,9 @@ from .la7 import LA7IE
 from .laola1tv import Laola1TvIE
 from .lecture2go import Lecture2GoIE
 from .lemonde import LemondeIE
-from .letv import (
-    LetvIE,
-    LetvTvIE,
-    LetvPlaylistIE,
+from .leeco import (
+    LeIE,
+    LePlaylistIE,
     LetvCloudIE,
 )
 from .libsyn import LibsynIE
@@ -506,6 +509,7 @@ from .npr import NprIE
 from .nrk import (
     NRKIE,
     NRKPlaylistIE,
+    NRKSkoleIE,
     NRKTVIE,
 )
 from .ntvde import NTVDeIE
index b51eafc45928f8e6ff4ce571763593f71b715583..b761b2cc4c5d3d4b70766ed56ff5c3529dd39e6b 100644 (file)
@@ -1,24 +1,11 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class AolIE(InfoExtractor):
     IE_NAME = 'on.aol.com'
-    _VALID_URL = r'''(?x)
-        (?:
-            aol-video:|
-            http://on\.aol\.com/
-            (?:
-                video/.*-|
-                playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
-            )
-        )
-        (?P<id>[0-9]+)
-        (?:$|\?)
-    '''
+    _VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
 
     _TESTS = [{
         'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@@ -29,42 +16,31 @@ class AolIE(InfoExtractor):
             'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
         },
         'add_ie': ['FiveMin'],
-    }, {
-        'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
-        'info_dict': {
-            'id': '152147',
-            'title': 'Brace Yourself - Today\'s Weirdest News',
-        },
-        'playlist_mincount': 10,
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        playlist_id = mobj.group('playlist_id')
-        if not playlist_id or self._downloader.params.get('noplaylist'):
-            return self.url_result('5min:%s' % video_id)
+        video_id = self._match_id(url)
+        return self.url_result('5min:%s' % video_id)
 
-        self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
 
-        webpage = self._download_webpage(url, playlist_id)
-        title = self._html_search_regex(
-            r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
-        playlist_html = self._search_regex(
-            r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
-            'playlist HTML')
-        entries = [{
-            '_type': 'url',
-            'url': 'aol-video:%s' % m.group('id'),
-            'ie_key': 'Aol',
-        } for m in re.finditer(
-            r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
-            playlist_html)]
+class AolFeaturesIE(InfoExtractor):
+    IE_NAME = 'features.aol.com'
+    _VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)'
 
-        return {
-            '_type': 'playlist',
-            'id': playlist_id,
-            'display_id': mobj.group('playlist_display_id'),
-            'title': title,
-            'entries': entries,
-        }
+    _TESTS = [{
+        'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
+        'md5': '7db483bb0c09c85e241f84a34238cc75',
+        'info_dict': {
+            'id': '519507715',
+            'ext': 'mp4',
+            'title': 'What To Watch - February 17, 2016',
+        },
+        'add_ie': ['FiveMin'],
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        return self.url_result(self._search_regex(
+            r'<script type="text/javascript" src="(https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js[^"]+)"',
+            webpage, '5min embed url'), 'FiveMin')
index 3b2effa15fe15a5527644349d785b452540c7568..aa6925623140f08090515fda2f42a7debd5545ac 100644 (file)
@@ -10,9 +10,9 @@ from ..utils import (
 
 
 class AudiMediaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
     _TEST = {
-        'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
+        'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
         'md5': '79a8b71c46d49042609795ab59779b66',
         'info_dict': {
             'id': '1565',
@@ -32,7 +32,10 @@ class AudiMediaIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
 
-        raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')
+        raw_payload = self._search_regex([
+            r'class="amtv-embed"[^>]+id="([^"]+)"',
+            r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
+        ], webpage, 'raw payload')
         _, stage_mode, video_id, lang = raw_payload.split('-')
 
         # TODO: handle s and e stage_mode (live streams and ended live streams)
@@ -59,13 +62,19 @@ class AudiMediaIE(InfoExtractor):
                 video_version_url = video_version.get('download_url') or video_version.get('stream_url')
                 if not video_version_url:
                     continue
-                formats.append({
+                f = {
                     'url': video_version_url,
                     'width': int_or_none(video_version.get('width')),
                     'height': int_or_none(video_version.get('height')),
                     'abr': int_or_none(video_version.get('audio_bitrate')),
                     'vbr': int_or_none(video_version.get('video_bitrate')),
-                })
+                }
+                bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
+                if bitrate:
+                    f.update({
+                        'format_id': 'http-%s' % bitrate,
+                    })
+                formats.append(f)
             self._sort_formats(formats)
 
             return {
index 38bda3af5a189cc7a2c8d65937a7d710edd0211f..7a8e1f60b82923b643918e43924fa64a5250cb83 100644 (file)
@@ -28,10 +28,10 @@ class BleacherReportIE(InfoExtractor):
         'add_ie': ['Ooyala'],
     }, {
         'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
-        'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
+        'md5': '6a5cd403418c7b01719248ca97fb0692',
         'info_dict': {
             'id': '2586817',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
             'timestamp': 1446839961,
             'uploader': 'Sean Fay',
@@ -93,10 +93,14 @@ class BleacherReportCMSIE(AMPIE):
         'md5': '8c2c12e3af7805152675446c905d159b',
         'info_dict': {
             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
index 3f16b1b9ef16c43c55ca529b8c5eb78348134f4b..07bd2cbe2f9f6c4213e31e4c6e90d2df7b611f58 100644 (file)
@@ -15,13 +15,14 @@ import math
 from ..compat import (
     compat_cookiejar,
     compat_cookies,
+    compat_etree_fromstring,
     compat_getpass,
     compat_http_client,
+    compat_os_name,
+    compat_str,
     compat_urllib_error,
     compat_urllib_parse,
     compat_urlparse,
-    compat_str,
-    compat_etree_fromstring,
 )
 from ..utils import (
     NO_DEFAULT,
@@ -157,12 +158,14 @@ class InfoExtractor(object):
     thumbnail:      Full URL to a video thumbnail image.
     description:    Full video description.
     uploader:       Full name of the video uploader.
+    license:        License name the video is licensed under.
     creator:        The main artist who created the video.
     release_date:   The date (YYYYMMDD) when the video was released.
     timestamp:      UNIX timestamp of the moment the video became available.
     upload_date:    Video upload date (YYYYMMDD).
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
+    uploader_url:   Full URL to a personal webpage of the video uploader.
     location:       Physical location where the video was filmed.
     subtitles:      The available subtitles as a dictionary in the format
                     {language: subformats}. "subformats" is a list sorted from
@@ -425,7 +428,7 @@ class InfoExtractor(object):
             self.to_screen('Saving request to ' + filename)
             # Working around MAX_PATH limitation on Windows (see
             # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                 absfilepath = os.path.abspath(filename)
                 if len(absfilepath) > 259:
                     filename = '\\\\?\\' + absfilepath
@@ -594,7 +597,7 @@ class InfoExtractor(object):
                 if mobj:
                     break
 
-        if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
+        if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
             _name = '\033[0;34m%s\033[0m' % name
         else:
             _name = name
@@ -1620,6 +1623,15 @@ class InfoExtractor(object):
     def _get_automatic_captions(self, *args, **kwargs):
         raise NotImplementedError('This method must be implemented by subclasses')
 
+    def mark_watched(self, *args, **kwargs):
+        if (self._downloader.params.get('mark_watched', False) and
+                (self._get_login_info()[0] is not None or
+                    self._downloader.params.get('cookiefile') is not None)):
+            self._mark_watched(*args, **kwargs)
+
+    def _mark_watched(self, *args, **kwargs):
+        raise NotImplementedError('This method must be implemented by subclasses')
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
index 373b3b4b4735d8544128c48a10037eed3c570e5d..bdc768c783b9b3213badc5cf4b354f6159142f9f 100644 (file)
@@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
             'display_id': 'iseven',
             'ext': 'flv',
             'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
+            'description': 'md5:f34981259a03e980a3c6404190a3ed61',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': '7师傅',
             'uploader_id': '431925',
@@ -26,7 +26,7 @@ class DouyuTVIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
     }, {
         'url': 'http://www.douyutv.com/85982',
         'info_dict': {
@@ -42,7 +42,24 @@ class DouyuTVIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Romm not found',
+    }, {
+        'url': 'http://www.douyutv.com/17732',
+        'info_dict': {
+            'id': '17732',
+            'display_id': '17732',
+            'ext': 'flv',
+            'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'md5:f34981259a03e980a3c6404190a3ed61',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': '7师傅',
+            'uploader_id': '431925',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
index 00a69e6312aede6069e062c6abff29137939daa9..8c725a4e631860584781b116e72b02dd05813fc2 100644 (file)
@@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
     _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
     IE_DESC = 'El País'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
         'md5': '98406f301f19562170ec071b83433d55',
         'info_dict': {
@@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
             'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
             'upload_date': '20140206',
         }
-    }
+    }, {
+        'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
+        'md5': '3bd5b09509f3519d7d9e763179b013de',
+        'info_dict': {
+            'id': '1456340311_668921',
+            'ext': 'mp4',
+            'title': 'Cómo hacer el mejor café con cafetera italiana',
+            'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
+            'upload_date': '20160303',
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         prefix = self._html_search_regex(
-            r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
+            r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
         video_suffix = self._search_regex(
-            r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
+            r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
         video_url = prefix + video_suffix
         thumbnail_suffix = self._search_regex(
-            r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
-            fatal=False)
+            r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
+            webpage, 'thumbnail URL', fatal=False)
         thumbnail = (
             None if thumbnail_suffix is None
             else prefix + thumbnail_suffix)
         title = self._html_search_regex(
-            '<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+            (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
+             r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
             webpage, 'title')
-        date_str = self._search_regex(
+        upload_date = unified_strdate(self._search_regex(
             r'<p class="date-header date-int updated"\s+title="([^"]+)">',
-            webpage, 'upload date', fatal=False)
-        upload_date = (None if date_str is None else unified_strdate(date_str))
+            webpage, 'upload date', default=None) or self._html_search_meta(
+            'datePublished', webpage, 'timestamp'))
 
         return {
             'id': video_id,
index e4180701d7d5fe7f538d029e8ffb27235b6135df..e5e57d48518d3dd3999dad650d0c32406079ce33 100644 (file)
@@ -1,21 +1,13 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    url_basename,
-)
 
 
 class EngadgetIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://www.engadget.com/
-        (?:video(?:/5min)?/(?P<id>\d+)|
-            [\d/]+/.*?)
-        '''
+    _VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://www.engadget.com/video/5min/518153925/',
+        'url': 'http://www.engadget.com/video/518153925/',
         'md5': 'c6820d4828a5064447a4d9fc73f312c9',
         'info_dict': {
             'id': '518153925',
@@ -27,15 +19,4 @@ class EngadgetIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
-        if video_id is not None:
-            return self.url_result('5min:%s' % video_id)
-        else:
-            title = url_basename(url)
-            webpage = self._download_webpage(url, title)
-            ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
-            return {
-                '_type': 'playlist',
-                'title': title,
-                'entries': [self.url_result('5min:%s' % vid) for vid in ids]
-            }
+        return self.url_result('5min:%s' % video_id)
index 2955965d908c15f21b3f8880993530779f97ec15..67d50a386ce812018047f711205b8619d75c1bf8 100644 (file)
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
@@ -16,12 +18,7 @@ from ..utils import (
 
 class FiveMinIE(InfoExtractor):
     IE_NAME = '5min'
-    _VALID_URL = r'''(?x)
-        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
-            https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
-            5min:)
-        (?P<id>\d+)
-        '''
+    _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
 
     _TESTS = [
         {
@@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
                 'title': 'How to Make a Next-Level Fruit Salad',
                 'duration': 184,
             },
+            'skip': 'no longer available',
         },
     ]
     _ERRORS = {
@@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        sid = mobj.group('sid')
+
+        if mobj.group('query'):
+            qs = compat_parse_qs(mobj.group('query'))
+            if not qs.get('playList'):
+                raise ExtractorError('Invalid URL', expected=True)
+            video_id = qs['playList'][0]
+            if qs.get('sid'):
+                sid = qs['sid'][0]
+
         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
-        embed_page = self._download_webpage(embed_url, video_id,
-                                            'Downloading embed page')
-        sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
-        query = compat_urllib_parse.urlencode({
-            'func': 'GetResults',
-            'playlist': video_id,
-            'sid': sid,
-            'isPlayerSeed': 'true',
-            'url': embed_url,
-        })
+        if not sid:
+            embed_page = self._download_webpage(embed_url, video_id,
+                                                'Downloading embed page')
+            sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
+
         response = self._download_json(
-            'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
+            'https://syn.5min.com/handlers/SenseHandler.ashx?' +
+            compat_urllib_parse.urlencode({
+                'func': 'GetResults',
+                'playlist': video_id,
+                'sid': sid,
+                'isPlayerSeed': 'true',
+                'url': embed_url,
+            }),
             video_id)
         if not response['success']:
             raise ExtractorError(
@@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
         parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
             compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
         for rendition in info['Renditions']:
-            if rendition['RenditionType'] == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
-            elif rendition['RenditionType'] == 'aac':
+            if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
                 continue
             else:
                 rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
index 318ac013d44b9ca8ce9de5c77d67b2cd3c9bb1e1..1dc50318ce81feb2604cfef943ddfb90e0a7641b 100644 (file)
@@ -36,6 +36,10 @@ class FoxNewsIE(AMPIE):
                 # 'upload_date': '20141204',
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         {
             'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
index 12fb5e8e1dcb1e5dfba8057a1496edcbe0f61f82..9622f198aa6aaf99094a9b85c5a914d4f0c07d46 100644 (file)
@@ -73,7 +73,7 @@ class IndavideoEmbedIE(InfoExtractor):
             'url': self._proto_relative_url(thumbnail)
         } for thumbnail in video.get('thumbnails', [])]
 
-        tags = [tag['title'] for tag in video.get('tags', [])]
+        tags = [tag['title'] for tag in video.get('tags') or []]
 
         return {
             'id': video.get('id') or video_id,
index eef7daa299813219c5211aefe2051a1160238319..137db873cc09f7e57b258bcf65b8331d8b36b8c0 100644 (file)
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
         webpage = self._download_webpage(url, title)
         title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
         config_url = self._html_search_regex(
-            r'data-src="(/contenu/medias/video.php.*?)"',
+            r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
             webpage, 'config URL')
         config_url = 'http://www.jeuxvideo.com' + config_url
 
diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py
new file mode 100644 (file)
index 0000000..931f34c
--- /dev/null
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    timeconvert,
+    update_url_query,
+    xpath_text,
+)
+
+
+class KUSIIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
+    _TESTS = [{
+        'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
+        'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
+        'info_dict': {
+            'id': '12203019',
+            'ext': 'mp4',
+            'title': 'Turko Files: Case Closed! & Put On Hold!',
+            'duration': 231.0,
+            'upload_date': '20160210',
+            'timestamp': 1455087571,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+    }, {
+        'url': 'http://kusi.com/video?clipId=12203019',
+        'info_dict': {
+            'id': '12203019',
+            'ext': 'mp4',
+            'title': 'Turko Files: Case Closed! & Put On Hold!',
+            'duration': 231.0,
+            'upload_date': '20160210',
+            'timestamp': 1455087571,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+        'params': {
+            'skip_download': True,  # Same as previous one
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        clip_id = mobj.group('clipId')
+        video_id = clip_id or mobj.group('path')
+
+        webpage = self._download_webpage(url, video_id)
+
+        if clip_id is None:
+            video_id = clip_id = self._html_search_regex(
+                r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
+
+        affiliate_id = self._search_regex(
+            r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
+
+        # See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
+        xml_url = update_url_query('http://www.kusi.com/build.asp', {
+            'buildtype': 'buildfeaturexmlrequest',
+            'featureType': 'Clip',
+            'featureid': clip_id,
+            'affiliateno': affiliate_id,
+            'clientgroupid': '1',
+            'rnd': int(round(random.random() * 1000000)),
+        })
+
+        doc = self._download_xml(xml_url, video_id)
+
+        video_title = xpath_text(doc, 'HEADLINE', fatal=True)
+        duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
+        description = xpath_text(doc, 'ABSTRACT')
+        thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
+        createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
+
+        quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
+        formats = []
+        for quality in quality_options:
+            formats.append({
+                'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
+                'height': int_or_none(quality.attrib.get('height')),
+                'width': int_or_none(quality.attrib.get('width')),
+                'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'description': description,
+            'duration': duration,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'timestamp': createtion_time,
+        }
diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py
new file mode 100644 (file)
index 0000000..df47e88
--- /dev/null
@@ -0,0 +1,361 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import datetime
+import hashlib
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_ord,
+    compat_str,
+    compat_urllib_parse,
+)
+from ..utils import (
+    determine_ext,
+    encode_data_uri,
+    ExtractorError,
+    int_or_none,
+    orderedSet,
+    parse_iso8601,
+    sanitized_Request,
+    str_or_none,
+    url_basename,
+)
+
+
+class LeIE(InfoExtractor):
+    IE_DESC = '乐视网'
+    _VALID_URL = r'http://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html'
+
+    _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
+
+    _TESTS = [{
+        'url': 'http://www.le.com/ptv/vplay/22005890.html',
+        'md5': 'edadcfe5406976f42f9f266057ee5e40',
+        'info_dict': {
+            'id': '22005890',
+            'ext': 'mp4',
+            'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
+            'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
+        },
+        'params': {
+            'hls_prefer_native': True,
+        },
+    }, {
+        'url': 'http://www.le.com/ptv/vplay/1415246.html',
+        'info_dict': {
+            'id': '1415246',
+            'ext': 'mp4',
+            'title': '美人天下01',
+            'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
+        },
+        'params': {
+            'hls_prefer_native': True,
+        },
+    }, {
+        'note': 'This video is available only in Mainland China, thus a proxy is needed',
+        'url': 'http://www.le.com/ptv/vplay/1118082.html',
+        'md5': '2424c74948a62e5f31988438979c5ad1',
+        'info_dict': {
+            'id': '1118082',
+            'ext': 'mp4',
+            'title': '与龙共舞 完整版',
+            'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
+        },
+        'params': {
+            'hls_prefer_native': True,
+        },
+        'skip': 'Only available in China',
+    }]
+
+    @staticmethod
+    def urshift(val, n):
+        return val >> n if val >= 0 else (val + 0x100000000) >> n
+
+    # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
+    def ror(self, param1, param2):
+        _loc3_ = 0
+        while _loc3_ < param2:
+            param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
+            _loc3_ += 1
+        return param1
+
+    def calc_time_key(self, param1):
+        _loc2_ = 773625421
+        _loc3_ = self.ror(param1, _loc2_ % 13)
+        _loc3_ = _loc3_ ^ _loc2_
+        _loc3_ = self.ror(_loc3_, _loc2_ % 17)
+        return _loc3_
+
+    # see M3U8Encryption class in KLetvPlayer.swf
+    @staticmethod
+    def decrypt_m3u8(encrypted_data):
+        if encrypted_data[:5].decode('utf-8').lower() != 'vc_01':
+            return encrypted_data
+        encrypted_data = encrypted_data[5:]
+
+        _loc4_ = bytearray(2 * len(encrypted_data))
+        for idx, val in enumerate(encrypted_data):
+            b = compat_ord(val)
+            _loc4_[2 * idx] = b // 16
+            _loc4_[2 * idx + 1] = b % 16
+        idx = len(_loc4_) - 11
+        _loc4_ = _loc4_[idx:] + _loc4_[:idx]
+        _loc7_ = bytearray(len(encrypted_data))
+        for i in range(len(encrypted_data)):
+            _loc7_[i] = _loc4_[2 * i] * 16 + _loc4_[2 * i + 1]
+
+        return bytes(_loc7_)
+
+    def _real_extract(self, url):
+        media_id = self._match_id(url)
+        page = self._download_webpage(url, media_id)
+        params = {
+            'id': media_id,
+            'platid': 1,
+            'splatid': 101,
+            'format': 1,
+            'tkey': self.calc_time_key(int(time.time())),
+            'domain': 'www.le.com'
+        }
+        play_json_req = sanitized_Request(
+            'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
+        )
+        cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+        if cn_verification_proxy:
+            play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
+        play_json = self._download_json(
+            play_json_req,
+            media_id, 'Downloading playJson data')
+
+        # Check for errors
+        playstatus = play_json['playstatus']
+        if playstatus['status'] == 0:
+            flag = playstatus['flag']
+            if flag == 1:
+                msg = 'Country %s auth error' % playstatus['country']
+            else:
+                msg = 'Generic error. flag = %d' % flag
+            raise ExtractorError(msg, expected=True)
+
+        playurl = play_json['playurl']
+
+        formats = ['350', '1000', '1300', '720p', '1080p']
+        dispatch = playurl['dispatch']
+
+        urls = []
+        for format_id in formats:
+            if format_id in dispatch:
+                media_url = playurl['domain'][0] + dispatch[format_id][0]
+                media_url += '&' + compat_urllib_parse.urlencode({
+                    'm3v': 1,
+                    'format': 1,
+                    'expect': 3,
+                    'rateid': format_id,
+                })
+
+                nodes_data = self._download_json(
+                    media_url, media_id,
+                    'Download JSON metadata for format %s' % format_id)
+
+                req = self._request_webpage(
+                    nodes_data['nodelist'][0]['location'], media_id,
+                    note='Downloading m3u8 information for format %s' % format_id)
+
+                m3u8_data = self.decrypt_m3u8(req.read())
+
+                url_info_dict = {
+                    'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
+                    'ext': determine_ext(dispatch[format_id][1]),
+                    'format_id': format_id,
+                    'protocol': 'm3u8',
+                }
+
+                if format_id[-1:] == 'p':
+                    url_info_dict['height'] = int_or_none(format_id[:-1])
+
+                urls.append(url_info_dict)
+
+        publish_time = parse_iso8601(self._html_search_regex(
+            r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
+            delimiter=' ', timezone=datetime.timedelta(hours=8))
+        description = self._html_search_meta('description', page, fatal=False)
+
+        return {
+            'id': media_id,
+            'formats': urls,
+            'title': playurl['title'],
+            'thumbnail': playurl['pic'],
+            'description': description,
+            'timestamp': publish_time,
+        }
+
+
+class LePlaylistIE(InfoExtractor):
+    _VALID_URL = r'http://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)'
+
+    _TESTS = [{
+        'url': 'http://www.le.com/tv/46177.html',
+        'info_dict': {
+            'id': '46177',
+            'title': '美人天下',
+            'description': 'md5:395666ff41b44080396e59570dbac01c'
+        },
+        'playlist_count': 35
+    }, {
+        'url': 'http://tv.le.com/izt/wuzetian/index.html',
+        'info_dict': {
+            'id': 'wuzetian',
+            'title': '武媚娘传奇',
+            'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
+        },
+        # This playlist contains some extra videos other than the drama itself
+        'playlist_mincount': 96
+    }, {
+        'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml',
+        # This series is moved to http://www.le.com/tv/10005297.html
+        'only_matching': True,
+    }, {
+        'url': 'http://www.le.com/comic/92063.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://list.le.com/listn/c1009_sc532002_d2_p1_o1.html',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if LeIE.suitable(url) else super(LePlaylistIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        page = self._download_webpage(url, playlist_id)
+
+        # Currently old domain names are still used in playlists
+        media_ids = orderedSet(re.findall(
+            r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page))
+        entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le')
+                   for media_id in media_ids]
+
+        title = self._html_search_meta('keywords', page,
+                                       fatal=False).split(',')[0]
+        description = self._html_search_meta('description', page, fatal=False)
+
+        return self.playlist_result(entries, playlist_id, playlist_title=title,
+                                    playlist_description=description)
+
+
+class LetvCloudIE(InfoExtractor):
+    # Most of *.letv.com is changed to *.le.com on 2016/01/02
+    # but yuntv.letv.com is kept, so also keep the extractor name
+    IE_DESC = '乐视云'
+    _VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
+
+    _TESTS = [{
+        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf',
+        'md5': '26450599afd64c513bc77030ad15db44',
+        'info_dict': {
+            'id': 'p7jnfw5hw9_467623dedf',
+            'ext': 'mp4',
+            'title': 'Video p7jnfw5hw9_467623dedf',
+        },
+    }, {
+        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
+        'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
+        'info_dict': {
+            'id': 'p7jnfw5hw9_ec93197892',
+            'ext': 'mp4',
+            'title': 'Video p7jnfw5hw9_ec93197892',
+        },
+    }, {
+        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
+        'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
+        'info_dict': {
+            'id': 'p7jnfw5hw9_187060b6fd',
+            'ext': 'mp4',
+            'title': 'Video p7jnfw5hw9_187060b6fd',
+        },
+    }]
+
+    @staticmethod
+    def sign_data(obj):
+        if obj['cf'] == 'flash':
+            salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
+            items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
+        elif obj['cf'] == 'html5':
+            salt = 'fbeh5player12c43eccf2bec3300344'
+            items = ['cf', 'ran', 'uu', 'bver', 'vu']
+        input_data = ''.join([item + obj[item] for item in items]) + salt
+        obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest()
+
+    def _get_formats(self, cf, uu, vu, media_id):
+        def get_play_json(cf, timestamp):
+            data = {
+                'cf': cf,
+                'ver': '2.2',
+                'bver': 'firefox44.0',
+                'format': 'json',
+                'uu': uu,
+                'vu': vu,
+                'ran': compat_str(timestamp),
+            }
+            self.sign_data(data)
+            return self._download_json(
+                'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data),
+                media_id, 'Downloading playJson data for type %s' % cf)
+
+        play_json = get_play_json(cf, time.time())
+        # The server time may be different from local time
+        if play_json.get('code') == 10071:
+            play_json = get_play_json(cf, play_json['timestamp'])
+
+        if not play_json.get('data'):
+            if play_json.get('message'):
+                raise ExtractorError('Letv cloud said: %s' % play_json['message'], expected=True)
+            elif play_json.get('code'):
+                raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
+            else:
+                raise ExtractorError('Letv cloud returned an unknwon error')
+
+        def b64decode(s):
+            return base64.b64decode(s.encode('utf-8')).decode('utf-8')
+
+        formats = []
+        for media in play_json['data']['video_info']['media'].values():
+            play_url = media['play_url']
+            url = b64decode(play_url['main_url'])
+            decoded_url = b64decode(url_basename(url))
+            formats.append({
+                'url': url,
+                'ext': determine_ext(decoded_url),
+                'format_id': str_or_none(play_url.get('vtype')),
+                'format_note': str_or_none(play_url.get('definition')),
+                'width': int_or_none(play_url.get('vwidth')),
+                'height': int_or_none(play_url.get('vheight')),
+            })
+
+        return formats
+
+    def _real_extract(self, url):
+        uu_mobj = re.search('uu=([\w]+)', url)
+        vu_mobj = re.search('vu=([\w]+)', url)
+
+        if not uu_mobj or not vu_mobj:
+            raise ExtractorError('Invalid URL: %s' % url, expected=True)
+
+        uu = uu_mobj.group(1)
+        vu = vu_mobj.group(1)
+        media_id = uu + '_' + vu
+
+        formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
+        self._sort_formats(formats)
+
+        return {
+            'id': media_id,
+            'title': 'Video %s' % media_id,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py
deleted file mode 100644 (file)
index 9fd494c..0000000
+++ /dev/null
@@ -1,355 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import datetime
-import re
-import time
-import base64
-import hashlib
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_ord,
-    compat_str,
-)
-from ..utils import (
-    determine_ext,
-    ExtractorError,
-    parse_iso8601,
-    sanitized_Request,
-    int_or_none,
-    str_or_none,
-    encode_data_uri,
-    url_basename,
-)
-
-
-class LetvIE(InfoExtractor):
-    IE_DESC = '乐视网'
-    _VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
-
-    _TESTS = [{
-        'url': 'http://www.letv.com/ptv/vplay/22005890.html',
-        'md5': 'edadcfe5406976f42f9f266057ee5e40',
-        'info_dict': {
-            'id': '22005890',
-            'ext': 'mp4',
-            'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
-            'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
-        },
-        'params': {
-            'hls_prefer_native': True,
-        },
-    }, {
-        'url': 'http://www.letv.com/ptv/vplay/1415246.html',
-        'info_dict': {
-            'id': '1415246',
-            'ext': 'mp4',
-            'title': '美人天下01',
-            'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
-        },
-        'params': {
-            'hls_prefer_native': True,
-        },
-    }, {
-        'note': 'This video is available only in Mainland China, thus a proxy is needed',
-        'url': 'http://www.letv.com/ptv/vplay/1118082.html',
-        'md5': '2424c74948a62e5f31988438979c5ad1',
-        'info_dict': {
-            'id': '1118082',
-            'ext': 'mp4',
-            'title': '与龙共舞 完整版',
-            'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
-        },
-        'params': {
-            'hls_prefer_native': True,
-        },
-        'skip': 'Only available in China',
-    }]
-
-    @staticmethod
-    def urshift(val, n):
-        return val >> n if val >= 0 else (val + 0x100000000) >> n
-
-    # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
-    def ror(self, param1, param2):
-        _loc3_ = 0
-        while _loc3_ < param2:
-            param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
-            _loc3_ += 1
-        return param1
-
-    def calc_time_key(self, param1):
-        _loc2_ = 773625421
-        _loc3_ = self.ror(param1, _loc2_ % 13)
-        _loc3_ = _loc3_ ^ _loc2_
-        _loc3_ = self.ror(_loc3_, _loc2_ % 17)
-        return _loc3_
-
-    # see M3U8Encryption class in KLetvPlayer.swf
-    @staticmethod
-    def decrypt_m3u8(encrypted_data):
-        if encrypted_data[:5].decode('utf-8').lower() != 'vc_01':
-            return encrypted_data
-        encrypted_data = encrypted_data[5:]
-
-        _loc4_ = bytearray(2 * len(encrypted_data))
-        for idx, val in enumerate(encrypted_data):
-            b = compat_ord(val)
-            _loc4_[2 * idx] = b // 16
-            _loc4_[2 * idx + 1] = b % 16
-        idx = len(_loc4_) - 11
-        _loc4_ = _loc4_[idx:] + _loc4_[:idx]
-        _loc7_ = bytearray(len(encrypted_data))
-        for i in range(len(encrypted_data)):
-            _loc7_[i] = _loc4_[2 * i] * 16 + _loc4_[2 * i + 1]
-
-        return bytes(_loc7_)
-
-    def _real_extract(self, url):
-        media_id = self._match_id(url)
-        page = self._download_webpage(url, media_id)
-        params = {
-            'id': media_id,
-            'platid': 1,
-            'splatid': 101,
-            'format': 1,
-            'tkey': self.calc_time_key(int(time.time())),
-            'domain': 'www.letv.com'
-        }
-        play_json_req = sanitized_Request(
-            'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
-        )
-        cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
-        if cn_verification_proxy:
-            play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
-
-        play_json = self._download_json(
-            play_json_req,
-            media_id, 'Downloading playJson data')
-
-        # Check for errors
-        playstatus = play_json['playstatus']
-        if playstatus['status'] == 0:
-            flag = playstatus['flag']
-            if flag == 1:
-                msg = 'Country %s auth error' % playstatus['country']
-            else:
-                msg = 'Generic error. flag = %d' % flag
-            raise ExtractorError(msg, expected=True)
-
-        playurl = play_json['playurl']
-
-        formats = ['350', '1000', '1300', '720p', '1080p']
-        dispatch = playurl['dispatch']
-
-        urls = []
-        for format_id in formats:
-            if format_id in dispatch:
-                media_url = playurl['domain'][0] + dispatch[format_id][0]
-                media_url += '&' + compat_urllib_parse.urlencode({
-                    'm3v': 1,
-                    'format': 1,
-                    'expect': 3,
-                    'rateid': format_id,
-                })
-
-                nodes_data = self._download_json(
-                    media_url, media_id,
-                    'Download JSON metadata for format %s' % format_id)
-
-                req = self._request_webpage(
-                    nodes_data['nodelist'][0]['location'], media_id,
-                    note='Downloading m3u8 information for format %s' % format_id)
-
-                m3u8_data = self.decrypt_m3u8(req.read())
-
-                url_info_dict = {
-                    'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
-                    'ext': determine_ext(dispatch[format_id][1]),
-                    'format_id': format_id,
-                    'protocol': 'm3u8',
-                }
-
-                if format_id[-1:] == 'p':
-                    url_info_dict['height'] = int_or_none(format_id[:-1])
-
-                urls.append(url_info_dict)
-
-        publish_time = parse_iso8601(self._html_search_regex(
-            r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
-            delimiter=' ', timezone=datetime.timedelta(hours=8))
-        description = self._html_search_meta('description', page, fatal=False)
-
-        return {
-            'id': media_id,
-            'formats': urls,
-            'title': playurl['title'],
-            'thumbnail': playurl['pic'],
-            'description': description,
-            'timestamp': publish_time,
-        }
-
-
-class LetvTvIE(InfoExtractor):
-    _VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
-    _TESTS = [{
-        'url': 'http://www.letv.com/tv/46177.html',
-        'info_dict': {
-            'id': '46177',
-            'title': '美人天下',
-            'description': 'md5:395666ff41b44080396e59570dbac01c'
-        },
-        'playlist_count': 35
-    }]
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        page = self._download_webpage(url, playlist_id)
-
-        media_urls = list(set(re.findall(
-            r'http://www.letv.com/ptv/vplay/\d+.html', page)))
-        entries = [self.url_result(media_url, ie='Letv')
-                   for media_url in media_urls]
-
-        title = self._html_search_meta('keywords', page,
-                                       fatal=False).split(',')[0]
-        description = self._html_search_meta('description', page, fatal=False)
-
-        return self.playlist_result(entries, playlist_id, playlist_title=title,
-                                    playlist_description=description)
-
-
-class LetvPlaylistIE(LetvTvIE):
-    _VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
-    _TESTS = [{
-        'url': 'http://tv.letv.com/izt/wuzetian/index.html',
-        'info_dict': {
-            'id': 'wuzetian',
-            'title': '武媚娘传奇',
-            'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
-        },
-        # This playlist contains some extra videos other than the drama itself
-        'playlist_mincount': 96
-    }, {
-        'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
-        'info_dict': {
-            'id': 'lswjzzjc',
-            # The title should be "劲舞青春", but I can't find a simple way to
-            # determine the playlist title
-            'title': '乐视午间自制剧场',
-            'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
-        },
-        'playlist_mincount': 7
-    }]
-
-
-class LetvCloudIE(InfoExtractor):
-    IE_DESC = '乐视云'
-    _VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
-
-    _TESTS = [{
-        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf',
-        'md5': '26450599afd64c513bc77030ad15db44',
-        'info_dict': {
-            'id': 'p7jnfw5hw9_467623dedf',
-            'ext': 'mp4',
-            'title': 'Video p7jnfw5hw9_467623dedf',
-        },
-    }, {
-        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
-        'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
-        'info_dict': {
-            'id': 'p7jnfw5hw9_ec93197892',
-            'ext': 'mp4',
-            'title': 'Video p7jnfw5hw9_ec93197892',
-        },
-    }, {
-        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
-        'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
-        'info_dict': {
-            'id': 'p7jnfw5hw9_187060b6fd',
-            'ext': 'mp4',
-            'title': 'Video p7jnfw5hw9_187060b6fd',
-        },
-    }]
-
-    @staticmethod
-    def sign_data(obj):
-        if obj['cf'] == 'flash':
-            salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
-            items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
-        elif obj['cf'] == 'html5':
-            salt = 'fbeh5player12c43eccf2bec3300344'
-            items = ['cf', 'ran', 'uu', 'bver', 'vu']
-        input_data = ''.join([item + obj[item] for item in items]) + salt
-        obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest()
-
-    def _get_formats(self, cf, uu, vu, media_id):
-        def get_play_json(cf, timestamp):
-            data = {
-                'cf': cf,
-                'ver': '2.2',
-                'bver': 'firefox44.0',
-                'format': 'json',
-                'uu': uu,
-                'vu': vu,
-                'ran': compat_str(timestamp),
-            }
-            self.sign_data(data)
-            return self._download_json(
-                'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data),
-                media_id, 'Downloading playJson data for type %s' % cf)
-
-        play_json = get_play_json(cf, time.time())
-        # The server time may be different from local time
-        if play_json.get('code') == 10071:
-            play_json = get_play_json(cf, play_json['timestamp'])
-
-        if not play_json.get('data'):
-            if play_json.get('message'):
-                raise ExtractorError('Letv cloud said: %s' % play_json['message'], expected=True)
-            elif play_json.get('code'):
-                raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
-            else:
-                raise ExtractorError('Letv cloud returned an unknwon error')
-
-        def b64decode(s):
-            return base64.b64decode(s.encode('utf-8')).decode('utf-8')
-
-        formats = []
-        for media in play_json['data']['video_info']['media'].values():
-            play_url = media['play_url']
-            url = b64decode(play_url['main_url'])
-            decoded_url = b64decode(url_basename(url))
-            formats.append({
-                'url': url,
-                'ext': determine_ext(decoded_url),
-                'format_id': int_or_none(play_url.get('vtype')),
-                'format_note': str_or_none(play_url.get('definition')),
-                'width': int_or_none(play_url.get('vwidth')),
-                'height': int_or_none(play_url.get('vheight')),
-            })
-
-        return formats
-
-    def _real_extract(self, url):
-        uu_mobj = re.search('uu=([\w]+)', url)
-        vu_mobj = re.search('vu=([\w]+)', url)
-
-        if not uu_mobj or not vu_mobj:
-            raise ExtractorError('Invalid URL: %s' % url, expected=True)
-
-        uu = uu_mobj.group(1)
-        vu = vu_mobj.group(1)
-        media_id = uu + '_' + vu
-
-        formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
-        self._sort_formats(formats)
-
-        return {
-            'id': media_id,
-            'title': 'Video %s' % media_id,
-            'formats': formats,
-        }
index f8cbca7b36afab1890b71806d6761bbe67d7d924..a8fd639cc9276c2290f9c89dbded1bb9a1cbe7dd 100644 (file)
@@ -20,18 +20,18 @@ class LifeNewsIE(InfoExtractor):
     _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
 
     _TESTS = [{
-        'url': 'http://lifenews.ru/news/126342',
-        'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
+        # single video embedded via video/source
+        'url': 'http://lifenews.ru/news/98736',
+        'md5': '77c95eaefaca216e32a76a343ad89d23',
         'info_dict': {
-            'id': '126342',
+            'id': '98736',
             'ext': 'mp4',
-            'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
-            'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
-            'thumbnail': 're:http://.*\.jpg',
-            'upload_date': '20140130',
+            'title': 'Мужчина нашел дома архив оборонного завода',
+            'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
+            'upload_date': '20120805',
         }
     }, {
-        # video in <iframe>
+        # single video embedded via iframe
         'url': 'http://lifenews.ru/news/152125',
         'md5': '77d19a6f0886cd76bdbf44b4d971a273',
         'info_dict': {
@@ -42,15 +42,33 @@ class LifeNewsIE(InfoExtractor):
             'upload_date': '20150402',
         }
     }, {
+        # two videos embedded via iframe
         'url': 'http://lifenews.ru/news/153461',
-        'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
         'info_dict': {
             'id': '153461',
-            'ext': 'mp4',
             'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
             'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
             'upload_date': '20150505',
-        }
+        },
+        'playlist': [{
+            'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
+            'info_dict': {
+                'id': '153461-video1',
+                'ext': 'mp4',
+                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
+                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+                'upload_date': '20150505',
+            },
+        }, {
+            'md5': 'ebb3bf3b1ce40e878d0d628e93eb0322',
+            'info_dict': {
+                'id': '153461-video2',
+                'ext': 'mp4',
+                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
+                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+                'upload_date': '20150505',
+            },
+        }],
     }, {
         'url': 'http://lifenews.ru/video/13035',
         'only_matching': True,
@@ -65,10 +83,14 @@ class LifeNewsIE(InfoExtractor):
             'http://lifenews.ru/%s/%s' % (section, video_id),
             video_id, 'Downloading page')
 
-        videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
-        iframe_link = self._html_search_regex(
-            '<iframe[^>]+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None)
-        if not videos and not iframe_link:
+        video_urls = re.findall(
+            r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
+
+        iframe_links = re.findall(
+            r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']',
+            webpage)
+
+        if not video_urls and not iframe_links:
             raise ExtractorError('No media links available for %s' % video_id)
 
         title = remove_end(
@@ -95,31 +117,44 @@ class LifeNewsIE(InfoExtractor):
             'upload_date': upload_date,
         }
 
-        def make_entry(video_id, media, video_number=None):
+        def make_entry(video_id, video_url, index=None):
             cur_info = dict(common_info)
             cur_info.update({
-                'id': video_id,
-                'url': media[1],
-                'thumbnail': media[0],
-                'title': title if video_number is None else '%s-video%s' % (title, video_number),
+                'id': video_id if not index else '%s-video%s' % (video_id, index),
+                'url': video_url,
+                'title': title if not index else '%s (Видео %s)' % (title, index),
             })
             return cur_info
 
-        if iframe_link:
-            iframe_link = self._proto_relative_url(iframe_link, 'http:')
-            cur_info = dict(common_info)
-            cur_info.update({
-                '_type': 'url_transparent',
-                'id': video_id,
-                'title': title,
-                'url': iframe_link,
-            })
+        def make_video_entry(video_id, video_url, index=None):
+            video_url = compat_urlparse.urljoin(url, video_url)
+            return make_entry(video_id, video_url, index)
+
+        def make_iframe_entry(video_id, video_url, index=None):
+            video_url = self._proto_relative_url(video_url, 'http:')
+            cur_info = make_entry(video_id, video_url, index)
+            cur_info['_type'] = 'url_transparent'
             return cur_info
 
-        if len(videos) == 1:
-            return make_entry(video_id, videos[0])
-        else:
-            return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
+        if len(video_urls) == 1 and not iframe_links:
+            return make_video_entry(video_id, video_urls[0])
+
+        if len(iframe_links) == 1 and not video_urls:
+            return make_iframe_entry(video_id, iframe_links[0])
+
+        entries = []
+
+        if video_urls:
+            for num, video_url in enumerate(video_urls, 1):
+                entries.append(make_video_entry(video_id, video_url, num))
+
+        if iframe_links:
+            for num, iframe_link in enumerate(iframe_links, len(video_urls) + 1):
+                entries.append(make_iframe_entry(video_id, iframe_link, num))
+
+        playlist = common_info.copy()
+        playlist.update(self.playlist_result(entries, video_id, title, description))
+        return playlist
 
 
 class LifeEmbedIE(InfoExtractor):
index 38fb3d9e4166f5f4a188ab2436c27240a8b04283..9884362261aa4deab85f8e25718feaaa7a07afce 100644 (file)
@@ -64,7 +64,7 @@ class LivestreamIE(InfoExtractor):
     def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
         base_ele = find_xpath_attr(
             smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
-        base = base_ele.get('content') if base_ele else 'http://livestreamvod-f.akamaihd.net/'
+        base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
 
         formats = []
         video_nodes = smil.findall(self._xpath_ns('.//video', namespace))
index a126f5054fbee3c178cf19915caec9fdb03df297..3b21fbd4d9e1442e3aa8016d74c1b9a97278d90f 100644 (file)
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_urllib_parse_unquote,
+)
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -87,7 +90,7 @@ class NRKIE(InfoExtractor):
 
 
 class NRKPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
 
     _TESTS = [{
         'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
@@ -126,6 +129,37 @@ class NRKPlaylistIE(InfoExtractor):
             entries, playlist_id, playlist_title, playlist_description)
 
 
+class NRKSkoleIE(InfoExtractor):
+    IE_DESC = 'NRK Skole'
+    _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/klippdetalj?.*\btopic=(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://nrk.no/skole/klippdetalj?topic=nrk:klipp/616532',
+        'md5': '04cd85877cc1913bce73c5d28a47e00f',
+        'info_dict': {
+            'id': '6021',
+            'ext': 'flv',
+            'title': 'Genetikk og eneggede tvillinger',
+            'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
+            'duration': 399,
+        },
+    }, {
+        'url': 'http://www.nrk.no/skole/klippdetalj?topic=nrk%3Aklipp%2F616532#embed',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nrk.no/skole/klippdetalj?topic=urn:x-mediadb:21379',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = compat_urllib_parse_unquote(self._match_id(url))
+
+        webpage = self._download_webpage(url, video_id)
+
+        nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id')
+        return self.url_result('nrk:%s' % nrk_id)
+
+
 class NRKTVIE(InfoExtractor):
     IE_DESC = 'NRK TV and NRK Radio'
     _VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
index b1b8800b97c9eb8caad2c03f999f1bc8f304c4da..99979ebe1a9fe82099076b46b576ef38a58bca8c 100644 (file)
@@ -19,7 +19,7 @@ class Revision3IE(InfoExtractor):
         'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
         'md5': 'd94a72d85d0a829766de4deb8daaf7df',
         'info_dict': {
-            'id': '73034',
+            'id': '71089',
             'display_id': 'technobuffalo/5-google-predictions-for-2016',
             'ext': 'webm',
             'title': '5 Google Predictions for 2016',
@@ -31,6 +31,7 @@ class Revision3IE(InfoExtractor):
             'uploader_id': 'technobuffalo',
         }
     }, {
+        # Show
         'url': 'http://testtube.com/brainstuff',
         'info_dict': {
             'id': '251',
@@ -41,7 +42,7 @@ class Revision3IE(InfoExtractor):
     }, {
         'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
         'info_dict': {
-            'id': '60163',
+            'id': '58227',
             'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
             'duration': 275,
             'ext': 'webm',
@@ -52,18 +53,72 @@ class Revision3IE(InfoExtractor):
             'uploader': 'DNews',
             'uploader_id': 'dnews',
         },
+    }, {
+        'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+        'info_dict': {
+            'id': '71618',
+            'ext': 'mp4',
+            'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+            'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
+            'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
+            'uploader': 'Editors\' Picks',
+            'uploader_id': 'tt-editors-picks',
+            'timestamp': 1453309200,
+            'upload_date': '20160120',
+        },
+        'add_ie': ['Youtube'],
+    }, {
+        # Tag
+        'url': 'http://testtube.com/tech-news',
+        'info_dict': {
+            'id': '21018',
+            'title': 'tech news',
+        },
+        'playlist_mincount': 9,
     }]
     _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
     _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
 
     def _real_extract(self, url):
         domain, display_id = re.match(self._VALID_URL, url).groups()
+        site = domain.split('.')[0]
         page_info = self._download_json(
             self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
 
-        if page_info['data']['type'] == 'episode':
-            episode_data = page_info['data']
-            video_id = compat_str(episode_data['video']['data']['id'])
+        page_data = page_info['data']
+        page_type = page_data['type']
+        if page_type in ('episode', 'embed'):
+            show_data = page_data['show']['data']
+            page_id = compat_str(page_data['id'])
+            video_id = compat_str(page_data['video']['data']['id'])
+
+            preference = qualities(['mini', 'small', 'medium', 'large'])
+            thumbnails = [{
+                'url': image_url,
+                'id': image_id,
+                'preference': preference(image_id)
+            } for image_id, image_url in page_data.get('images', {}).items()]
+
+            info = {
+                'id': page_id,
+                'display_id': display_id,
+                'title': unescapeHTML(page_data['name']),
+                'description': unescapeHTML(page_data.get('summary')),
+                'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
+                'author': page_data.get('author'),
+                'uploader': show_data.get('name'),
+                'uploader_id': show_data.get('slug'),
+                'thumbnails': thumbnails,
+                'extractor_key': site,
+            }
+
+            if page_type == 'embed':
+                info.update({
+                    '_type': 'url_transparent',
+                    'url': page_data['video']['data']['embed'],
+                })
+                return info
+
             video_data = self._download_json(
                 'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
                 video_id)['items'][0]
@@ -84,36 +139,30 @@ class Revision3IE(InfoExtractor):
                         })
             self._sort_formats(formats)
 
-            preference = qualities(['mini', 'small', 'medium', 'large'])
-            thumbnails = [{
-                'url': image_url,
-                'id': image_id,
-                'preference': preference(image_id)
-            } for image_id, image_url in video_data.get('images', {}).items()]
-
-            return {
-                'id': video_id,
-                'display_id': display_id,
+            info.update({
                 'title': unescapeHTML(video_data['title']),
                 'description': unescapeHTML(video_data.get('summary')),
-                'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
-                'author': episode_data.get('author'),
                 'uploader': video_data.get('show', {}).get('name'),
                 'uploader_id': video_data.get('show', {}).get('slug'),
                 'duration': int_or_none(video_data.get('duration')),
-                'thumbnails': thumbnails,
                 'formats': formats,
-            }
+            })
+            return info
         else:
-            show_data = page_info['show']['data']
+            list_data = page_info[page_type]['data']
             episodes_data = page_info['episodes']['data']
             num_episodes = page_info['meta']['totalEpisodes']
             processed_episodes = 0
             entries = []
             page_num = 1
             while True:
-                entries.extend([self.url_result(
-                    'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
+                entries.extend([{
+                    '_type': 'url',
+                    'url': 'http://%s%s' % (domain, episode['path']),
+                    'id': compat_str(episode['id']),
+                    'ie_key': 'Revision3',
+                    'extractor_key': site,
+                } for episode in episodes_data])
                 processed_episodes += len(episodes_data)
                 if processed_episodes == num_episodes:
                     break
@@ -123,5 +172,5 @@ class Revision3IE(InfoExtractor):
                     display_id)['episodes']['data']
 
             return self.playlist_result(
-                entries, compat_str(show_data['id']),
-                show_data.get('name'), show_data.get('summary'))
+                entries, compat_str(list_data['id']),
+                list_data.get('name'), list_data.get('summary'))
index 93d8715716d00be7e15c53afbe804a3703b38e68..9a57b49df66422a7a7828f2a12ff1385016951ef 100644 (file)
@@ -22,6 +22,7 @@ from ..utils import (
     unsmuggle_url,
     xpath_with_ns,
     mimetype2ext,
+    find_xpath_attr,
 )
 
 default_ns = 'http://www.w3.org/2005/SMIL21/Language'
@@ -31,15 +32,11 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
 class ThePlatformBaseIE(InfoExtractor):
     def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
         meta = self._download_xml(smil_url, video_id, note=note)
-        try:
-            error_msg = next(
-                n.attrib['abstract']
-                for n in meta.findall(_x('.//smil:ref'))
-                if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
-        except StopIteration:
-            pass
-        else:
-            raise ExtractorError(error_msg, expected=True)
+        error_element = find_xpath_attr(
+            meta, _x('.//smil:ref'), 'src',
+            'http://link.theplatform.com/s/errorFiles/Unavailable.mp4')
+        if error_element is not None:
+            raise ExtractorError(error_element.attrib['abstract'], expected=True)
 
         formats = self._parse_smil_formats(
             meta, smil_url, video_id, namespace=default_ns,
index 69882da6337bc6f20d0578f6d612e799429db3e8..8639293e35a79875c40b3933b5531381c2b3d077 100644 (file)
@@ -17,6 +17,7 @@ from ..utils import (
     encode_dict,
     ExtractorError,
     int_or_none,
+    orderedSet,
     parse_duration,
     parse_iso8601,
     sanitized_Request,
@@ -281,17 +282,36 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
         entries = []
         offset = 0
         limit = self._PAGE_LIMIT
+        broken_paging_detected = False
+        counter_override = None
         for counter in itertools.count(1):
             response = self._download_json(
                 self._PLAYLIST_URL % (channel_id, offset, limit),
-                channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
+                channel_id,
+                'Downloading %s videos JSON page %s'
+                % (self._PLAYLIST_TYPE, counter_override or counter))
             page_entries = self._extract_playlist_page(response)
             if not page_entries:
                 break
+            total = int_or_none(response.get('_total'))
+            # Since the beginning of March 2016 twitch's paging mechanism
+            # is completely broken on the twitch side. It simply ignores
+            # a limit and returns the whole offset number of videos.
+            # Working around by just requesting all videos at once.
+            if not broken_paging_detected and total and len(page_entries) > limit:
+                self.report_warning(
+                    'Twitch paging is broken on twitch side, requesting all videos at once',
+                    channel_id)
+                broken_paging_detected = True
+                offset = total
+                counter_override = '(all at once)'
+                continue
             entries.extend(page_entries)
+            if broken_paging_detected or total and len(page_entries) >= total:
+                break
             offset += limit
         return self.playlist_result(
-            [self.url_result(entry) for entry in set(entries)],
+            [self.url_result(entry) for entry in orderedSet(entries)],
             channel_id, channel_name)
 
     def _extract_playlist_page(self, response):
index 5d2b5ec3515277980f2da45a91a0c73fae17923d..e70b2ab3c8d564cd907e8763fd8136e1b3827ac5 100644 (file)
@@ -10,7 +10,6 @@ from ..utils import (
     remove_end,
     int_or_none,
     ExtractorError,
-    sanitized_Request,
 )
 
 
@@ -22,7 +21,7 @@ class TwitterBaseIE(InfoExtractor):
 
 class TwitterCardIE(TwitterBaseIE):
     IE_NAME = 'twitter:card'
-    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
@@ -30,7 +29,7 @@ class TwitterCardIE(TwitterBaseIE):
             'info_dict': {
                 'id': '560070183650213889',
                 'ext': 'mp4',
-                'title': 'TwitterCard',
+                'title': 'Twitter Card',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 30.033,
             }
@@ -41,7 +40,7 @@ class TwitterCardIE(TwitterBaseIE):
             'info_dict': {
                 'id': '623160978427936768',
                 'ext': 'mp4',
-                'title': 'TwitterCard',
+                'title': 'Twitter Card',
                 'thumbnail': 're:^https?://.*\.jpg',
                 'duration': 80.155,
             },
@@ -72,63 +71,102 @@ class TwitterCardIE(TwitterBaseIE):
                 'title': 'Vine by ArsenalTerje',
             },
             'add_ie': ['Vine'],
-        }
+        }, {
+            'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
+            'md5': '3846d0a07109b5ab622425449b59049d',
+            'info_dict': {
+                'id': '705235433198714880',
+                'ext': 'mp4',
+                'title': 'Twitter web player',
+                'thumbnail': 're:^https?://.*\.jpg',
+            },
+        },
     ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        # Different formats served for different User-Agents
-        USER_AGENTS = [
-            'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',  # mp4
-            'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',  # webm
-        ]
-
         config = None
         formats = []
-        for user_agent in USER_AGENTS:
-            request = sanitized_Request(url)
-            request.add_header('User-Agent', user_agent)
-            webpage = self._download_webpage(request, video_id)
-
-            iframe_url = self._html_search_regex(
-                r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
-                webpage, 'video iframe', default=None)
-            if iframe_url:
-                return self.url_result(iframe_url)
-
-            config = self._parse_json(self._html_search_regex(
-                r'data-player-config="([^"]+)"', webpage, 'data player config'),
-                video_id)
-            if 'playlist' not in config:
-                if 'vmapUrl' in config:
-                    formats.append({
-                        'url': self._get_vmap_video_url(config['vmapUrl'], video_id),
-                    })
-                    break   # same video regardless of UA
-                continue
-
-            video_url = config['playlist'][0]['source']
+        duration = None
 
-            f = {
-                'url': video_url,
-            }
+        webpage = self._download_webpage(url, video_id)
+
+        iframe_url = self._html_search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
+            webpage, 'video iframe', default=None)
+        if iframe_url:
+            return self.url_result(iframe_url)
 
+        config = self._parse_json(self._html_search_regex(
+            r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
+            video_id)
+
+        def _search_dimensions_in_video_url(a_format, video_url):
             m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
             if m:
-                f.update({
+                a_format.update({
                     'width': int(m.group('width')),
                     'height': int(m.group('height')),
                 })
+
+        playlist = config.get('playlist')
+        if playlist:
+            video_url = playlist[0]['source']
+
+            f = {
+                'url': video_url,
+            }
+
+            _search_dimensions_in_video_url(f, video_url)
+
             formats.append(f)
+
+        vmap_url = config.get('vmapUrl') or config.get('vmap_url')
+        if vmap_url:
+            formats.append({
+                'url': self._get_vmap_video_url(vmap_url, video_id),
+            })
+
+        media_info = None
+
+        for entity in config.get('status', {}).get('entities', []):
+            if 'mediaInfo' in entity:
+                media_info = entity['mediaInfo']
+
+        if media_info:
+            for media_variant in media_info['variants']:
+                media_url = media_variant['url']
+                if media_url.endswith('.m3u8'):
+                    formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
+                elif media_url.endswith('.mpd'):
+                    formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
+                else:
+                    vbr = int_or_none(media_variant.get('bitRate'), scale=1000)
+                    a_format = {
+                        'url': media_url,
+                        'format_id': 'http-%d' % vbr if vbr else 'http',
+                        'vbr': vbr,
+                    }
+                    # Reported bitRate may be zero
+                    if not a_format['vbr']:
+                        del a_format['vbr']
+
+                    _search_dimensions_in_video_url(a_format, media_url)
+
+                    formats.append(a_format)
+
+            duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
+
         self._sort_formats(formats)
 
-        thumbnail = config.get('posterImageUrl')
-        duration = float_or_none(config.get('duration'))
+        title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+        thumbnail = config.get('posterImageUrl') or config.get('image_src')
+        duration = float_or_none(config.get('duration')) or duration
 
         return {
             'id': video_id,
-            'title': 'TwitterCard',
+            'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
@@ -142,7 +180,6 @@ class TwitterIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
-        # MD5 checksums are different in different places
         'info_dict': {
             'id': '643211948184596480',
             'ext': 'mp4',
@@ -153,6 +190,9 @@ class TwitterIE(InfoExtractor):
             'uploader': 'FREE THE NIPPLE',
             'uploader_id': 'freethenipple',
         },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        },
     }, {
         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
@@ -177,6 +217,36 @@ class TwitterIE(InfoExtractor):
             'uploader_id': 'starwars',
             'uploader': 'Star Wars',
         },
+    }, {
+        'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
+        'info_dict': {
+            'id': '705235433198714880',
+            'ext': 'mp4',
+            'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.',
+            'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."',
+            'uploader_id': 'BTNBrentYarina',
+            'uploader': 'Brent Yarina',
+        },
+        'params': {
+            # The same video as https://twitter.com/i/videos/tweet/705235433198714880
+            # Test case of TwitterCardIE
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
+        'md5': '',
+        'info_dict': {
+            'id': '700207533655363584',
+            'ext': 'mp4',
+            'title': 'jay - BEAT PROD: @suhmeduh #Damndaniel',
+            'description': 'jay on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'uploader': 'jay',
+            'uploader_id': 'jaydingeer',
+        },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        },
     }]
 
     def _real_extract(self, url):
@@ -234,6 +304,15 @@ class TwitterIE(InfoExtractor):
             })
             return info
 
+        if 'class="PlayableMedia' in webpage:
+            info.update({
+                '_type': 'url_transparent',
+                'ie_key': 'TwitterCard',
+                'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid),
+            })
+
+            return info
+
         raise ExtractorError('There\'s no video in this tweet.')
 
 
index 14e945d494cd2f6e5f3b3e6a03ff6ebb076826dd..e148b1ef513321376efe1795056503ea2a8bcad8 100644 (file)
@@ -20,6 +20,7 @@ class VGTVIE(XstreamIE):
         'aftenbladet.no/tv': 'satv',
         'fvn.no/fvntv': 'fvntv',
         'aftenposten.no/webtv': 'aptv',
+        'ap.vgtv.no/webtv': 'aptv',
     }
 
     _APP_NAME_TO_VENDOR = {
@@ -35,7 +36,7 @@ class VGTVIE(XstreamIE):
                     (?P<host>
                         %s
                     )
-                    /
+                    /?
                     (?:
                         \#!/(?:video|live)/|
                         embed?.*id=
@@ -107,19 +108,27 @@ class VGTVIE(XstreamIE):
             'md5': 'fd828cd29774a729bf4d4425fe192972',
             'info_dict': {
                 'id': '21039',
-                'ext': 'mov',
+                'ext': 'mp4',
                 'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
                 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
                 'duration': 66,
                 'timestamp': 1417002452,
                 'upload_date': '20141126',
                 'view_count': int,
-            }
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         {
             'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
             'only_matching': True,
         },
+        {
+            'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
@@ -144,8 +153,6 @@ class VGTVIE(XstreamIE):
         if len(video_id) == 5:
             if appname == 'bttv':
                 info = self._extract_video_info('btno', video_id)
-            elif appname == 'aptv':
-                info = self._extract_video_info('ap', video_id)
 
         streams = data['streamUrls']
         stream_type = data.get('streamType')
index 3049dffb6c98ed95251262bd3013add99f4b41db..9f282a1da68ac9889f9cfe667a1a8bc7b8b3a71f 100644 (file)
@@ -93,6 +93,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
                 'description': 'md5:2d3305bad981a06ff79f027f19865021',
                 'upload_date': '20121220',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user7108434',
                 'uploader_id': 'user7108434',
                 'uploader': 'Filippo Valsorda',
                 'duration': 10,
@@ -105,6 +106,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'info_dict': {
                 'id': '68093876',
                 'ext': 'mp4',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
                 'uploader_id': 'openstreetmapus',
                 'uploader': 'OpenStreetMap US',
                 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
@@ -121,6 +123,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
                 'uploader': 'The BLN & Business of Software',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
                 'uploader_id': 'theblnbusinessofsoftware',
                 'duration': 3610,
                 'description': None,
@@ -135,6 +138,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'youtube-dl password protected test video',
                 'upload_date': '20130614',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user18948128',
                 'uploader_id': 'user18948128',
                 'uploader': 'Jaime Marquínez Ferrándiz',
                 'duration': 10,
@@ -154,6 +158,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'Key & Peele: Terrorist Interrogation',
                 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/atencio',
                 'uploader_id': 'atencio',
                 'uploader': 'Peter Atencio',
                 'upload_date': '20130927',
@@ -169,6 +174,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'title': 'The New Vimeo Player (You Know, For Videos)',
                 'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
                 'upload_date': '20131015',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/staff',
                 'uploader_id': 'staff',
                 'uploader': 'Vimeo Staff',
                 'duration': 62,
@@ -183,6 +189,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'Pier Solar OUYA Official Trailer',
                 'uploader': 'Tulio Gonçalves',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user28849593',
                 'uploader_id': 'user28849593',
             },
         },
@@ -195,6 +202,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
                 'uploader': 'The DMCI',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/dmci',
                 'uploader_id': 'dmci',
                 'upload_date': '20111220',
                 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
@@ -370,9 +378,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
         # Extract title
         video_title = config['video']['title']
 
-        # Extract uploader and uploader_id
-        video_uploader = config['video']['owner']['name']
-        video_uploader_id = config['video']['owner']['url'].split('/')[-1] if config['video']['owner']['url'] else None
+        # Extract uploader, uploader_url and uploader_id
+        video_uploader = config['video'].get('owner', {}).get('name')
+        video_uploader_url = config['video'].get('owner', {}).get('url')
+        video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
 
         # Extract video thumbnail
         video_thumbnail = config['video'].get('thumbnail')
@@ -473,6 +482,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
         return {
             'id': video_id,
             'uploader': video_uploader,
+            'uploader_url': video_uploader_url,
             'uploader_id': video_uploader_id,
             'upload_date': video_upload_date,
             'title': video_title,
index fdb16d91c25ae4a3d3c8314a76050d09cf86ef3a..41061dd3125a6f298a5aa915c62b50aabdb68a47 100644 (file)
@@ -35,7 +35,8 @@ class WistiaIE(InfoExtractor):
 
         formats = []
         thumbnails = []
-        for atype, a in data['assets'].items():
+        for a in data['assets']:
+            atype = a.get('type')
             if atype == 'still':
                 thumbnails.append({
                     'url': a['url'],
index d3cc1a29fa473fee2f58e91323774633be00fc4b..e699e663f60818b090bb6bf0ccdf24802c3c14c4 100644 (file)
@@ -10,13 +10,27 @@ from ..compat import (
     compat_urllib_parse,
 )
 from ..utils import (
+    ExtractorError,
     int_or_none,
     float_or_none,
     sanitized_Request,
 )
 
 
-class YandexMusicTrackIE(InfoExtractor):
+class YandexMusicBaseIE(InfoExtractor):
+    @staticmethod
+    def _handle_error(response):
+        error = response.get('error')
+        if error:
+            raise ExtractorError(error, expected=True)
+
+    def _download_json(self, *args, **kwargs):
+        response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
+        self._handle_error(response)
+        return response
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
     IE_NAME = 'yandexmusic:track'
     IE_DESC = 'Яндекс.Музыка - Трек'
     _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
@@ -73,7 +87,7 @@ class YandexMusicTrackIE(InfoExtractor):
         return self._get_track_info(track)
 
 
-class YandexMusicPlaylistBaseIE(InfoExtractor):
+class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
     def _build_playlist(self, tracks):
         return [
             self.url_result(
index b29baafc441c220b4128c9363f341f7159b8df93..1124fe6c280cb0e23bee3a41ea323165ec714dce 100644 (file)
@@ -75,7 +75,7 @@ class YouPornIE(InfoExtractor):
         links = []
 
         sources = self._search_regex(
-            r'sources\s*:\s*({.+?})', webpage, 'sources', default=None)
+            r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
         if sources:
             for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
                 links.append(link)
@@ -101,8 +101,9 @@ class YouPornIE(InfoExtractor):
             }
             # Video URL's path looks like this:
             #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
             # We will benefit from it by extracting some metadata
-            mobj = re.search(r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
             if mobj:
                 height = int(mobj.group('height'))
                 bitrate = int(mobj.group('bitrate'))
index ec90c211160f9f761ea25816683b9c09245fd817..27e67feb43efb9374c9638a2f5616770c83ed241 100644 (file)
@@ -6,6 +6,7 @@ from __future__ import unicode_literals
 import itertools
 import json
 import os.path
+import random
 import re
 import time
 import traceback
@@ -382,7 +383,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                 'uploader': 'Philipp Hagemeister',
                 'uploader_id': 'phihag',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
                 'upload_date': '20121002',
+                'license': 'Standard YouTube License',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
@@ -401,12 +404,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20120506',
                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
                 'alt_title': 'I Love It (feat. Charli XCX)',
-                'description': 'md5:782e8651347686cba06e58f71ab51773',
+                'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
                          'iconic ep', 'iconic', 'love', 'it'],
                 'uploader': 'Icona Pop',
                 'uploader_id': 'IconaPop',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
+                'license': 'Standard YouTube License',
                 'creator': 'Icona Pop',
             }
         },
@@ -422,6 +427,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:64249768eec3bc4276236606ea996373',
                 'uploader': 'justintimberlakeVEVO',
                 'uploader_id': 'justintimberlakeVEVO',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
+                'license': 'Standard YouTube License',
                 'creator': 'Justin Timberlake',
                 'age_limit': 18,
             }
@@ -437,6 +444,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
                 'uploader': 'SET India',
                 'uploader_id': 'setindia',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
+                'license': 'Standard YouTube License',
                 'age_limit': 18,
             }
         },
@@ -449,7 +458,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                 'uploader': 'Philipp Hagemeister',
                 'uploader_id': 'phihag',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
                 'upload_date': '20121002',
+                'license': 'Standard YouTube License',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
@@ -468,8 +479,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'm4a',
                 'upload_date': '20121002',
                 'uploader_id': '8KVIDEO',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
                 'description': '',
                 'uploader': '8KVIDEO',
+                'license': 'Standard YouTube License',
                 'title': 'UHDTV TEST 8K VIDEO.mp4'
             },
             'params': {
@@ -488,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'AfrojackVEVO',
                 'uploader_id': 'AfrojackVEVO',
                 'upload_date': '20131011',
+                'license': 'Standard YouTube License',
             },
             'params': {
                 'youtube_include_dash_manifest': True,
@@ -506,6 +520,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'TaylorSwiftVEVO',
                 'uploader_id': 'TaylorSwiftVEVO',
                 'upload_date': '20140818',
+                'license': 'Standard YouTube License',
                 'creator': 'Taylor Swift',
             },
             'params': {
@@ -522,6 +537,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20100909',
                 'uploader': 'The Amazing Atheist',
                 'uploader_id': 'TheAmazingAtheist',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
+                'license': 'Standard YouTube License',
                 'title': 'Burning Everyone\'s Koran',
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
             }
@@ -536,7 +553,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'uploader': 'The Witcher',
                 'uploader_id': 'WitcherGame',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
+                'license': 'Standard YouTube License',
                 'age_limit': 18,
             },
         },
@@ -550,7 +569,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
                 'uploader': 'LloydVEVO',
                 'uploader_id': 'LloydVEVO',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
                 'upload_date': '20110629',
+                'license': 'Standard YouTube License',
                 'age_limit': 18,
             },
         },
@@ -562,9 +583,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20100430',
                 'uploader_id': 'deadmau5',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
                 'creator': 'deadmau5',
                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
                 'uploader': 'deadmau5',
+                'license': 'Standard YouTube License',
                 'title': 'Deadmau5 - Some Chords (HD)',
                 'alt_title': 'Some Chords',
             },
@@ -580,6 +603,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20150827',
                 'uploader_id': 'olympic',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
+                'license': 'Standard YouTube License',
                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
                 'uploader': 'Olympics',
                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
@@ -597,8 +622,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'stretched_ratio': 16 / 9.,
                 'upload_date': '20110310',
                 'uploader_id': 'AllenMeow',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
                 'uploader': '孫艾倫',
+                'license': 'Standard YouTube License',
                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
             },
         },
@@ -629,7 +656,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
                 'upload_date': '20150625',
                 'uploader_id': 'dorappi2000',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
                 'uploader': 'dorappi2000',
+                'license': 'Standard YouTube License',
                 'formats': 'mincount:33',
             },
         },
@@ -644,6 +673,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Airtek',
                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
+                'license': 'Standard YouTube License',
                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
             },
             'params': {
@@ -668,6 +698,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'upload_date': '20150721',
                     'uploader': 'Beer Games Beer',
                     'uploader_id': 'beergamesbeer',
+                    'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+                    'license': 'Standard YouTube License',
                 },
             }, {
                 'info_dict': {
@@ -678,6 +710,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'upload_date': '20150721',
                     'uploader': 'Beer Games Beer',
                     'uploader_id': 'beergamesbeer',
+                    'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+                    'license': 'Standard YouTube License',
                 },
             }, {
                 'info_dict': {
@@ -688,6 +722,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'upload_date': '20150721',
                     'uploader': 'Beer Games Beer',
                     'uploader_id': 'beergamesbeer',
+                    'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+                    'license': 'Standard YouTube License',
                 },
             }, {
                 'info_dict': {
@@ -698,6 +734,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'upload_date': '20150721',
                     'uploader': 'Beer Games Beer',
                     'uploader_id': 'beergamesbeer',
+                    'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+                    'license': 'Standard YouTube License',
                 },
             }],
             'params': {
@@ -731,7 +769,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'upload_date': '20151119',
                 'uploader_id': 'IronSoulElf',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
                 'uploader': 'IronSoulElf',
+                'license': 'Standard YouTube License',
                 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
             },
             'params': {
@@ -759,6 +799,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            # Video licensed under Creative Commons
+            'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
+            'info_dict': {
+                'id': 'M4gD1WSo5mA',
+                'ext': 'mp4',
+                'title': 'md5:e41008789470fc2533a3252216f1c1d1',
+                'description': 'md5:a677553cf0840649b731a3024aeff4cc',
+                'upload_date': '20150127',
+                'uploader_id': 'BerkmanCenter',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
+                'uploader': 'BerkmanCenter',
+                'license': 'Creative Commons Attribution license (reuse allowed)',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # Channel-like uploader_url
+            'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
+            'info_dict': {
+                'id': 'eQcmzGIKrzg',
+                'ext': 'mp4',
+                'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
+                'description': 'md5:dda0d780d5a6e120758d1711d062a867',
+                'upload_date': '20151119',
+                'uploader': 'Bernie 2016',
+                'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+                'license': 'Creative Commons Attribution license (reuse allowed)',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         {
             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
             'only_matching': True,
@@ -1046,6 +1122,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             self._downloader.report_warning(err_msg)
             return {}
 
+    def _mark_watched(self, video_id, video_info):
+        playback_url = video_info.get('videostats_playback_base_url', [None])[0]
+        if not playback_url:
+            return
+        parsed_playback_url = compat_urlparse.urlparse(playback_url)
+        qs = compat_urlparse.parse_qs(parsed_playback_url.query)
+
+        # cpn generation algorithm is reverse engineered from base.js.
+        # In fact it works even with dummy cpn.
+        CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+        cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
+
+        qs.update({
+            'ver': ['2'],
+            'cpn': [cpn],
+        })
+        playback_url = compat_urlparse.urlunparse(
+            parsed_playback_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
+
+        self._download_webpage(
+            playback_url, video_id, 'Marking watched',
+            'Unable to mark watched', fatal=False)
+
     @classmethod
     def extract_id(cls, url):
         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
@@ -1272,9 +1371,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         # uploader_id
         video_uploader_id = None
-        mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
+        video_uploader_url = None
+        mobj = re.search(
+            r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
+            video_webpage)
         if mobj is not None:
-            video_uploader_id = mobj.group(1)
+            video_uploader_id = mobj.group('uploader_id')
+            video_uploader_url = mobj.group('uploader_url')
         else:
             self._downloader.report_warning('unable to extract uploader nickname')
 
@@ -1302,6 +1405,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
         upload_date = unified_strdate(upload_date)
 
+        video_license = self._html_search_regex(
+            r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
+            video_webpage, 'license', default=None)
+
         m_music = re.search(
             r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
             video_webpage)
@@ -1375,6 +1482,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
             if 'rtmpe%3Dyes' in encoded_url_map:
                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
+            formats_spec = {}
+            fmt_list = video_info.get('fmt_list', [''])[0]
+            if fmt_list:
+                for fmt in fmt_list.split(','):
+                    spec = fmt.split('/')
+                    if len(spec) > 1:
+                        width_height = spec[1].split('x')
+                        if len(width_height) == 2:
+                            formats_spec[spec[0]] = {
+                                'resolution': spec[1],
+                                'width': int_or_none(width_height[0]),
+                                'height': int_or_none(width_height[1]),
+                            }
             formats = []
             for url_data_str in encoded_url_map.split(','):
                 url_data = compat_parse_qs(url_data_str)
@@ -1443,6 +1563,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 }
                 if format_id in self._formats:
                     dct.update(self._formats[format_id])
+                if format_id in formats_spec:
+                    dct.update(formats_spec[format_id])
 
                 # Some itags are not included in DASH manifest thus corresponding formats will
                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
@@ -1555,11 +1677,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         self._sort_formats(formats)
 
+        self.mark_watched(video_id, video_info)
+
         return {
             'id': video_id,
             'uploader': video_uploader,
             'uploader_id': video_uploader_id,
+            'uploader_url': video_uploader_url,
             'upload_date': upload_date,
+            'license': video_license,
             'creator': video_creator,
             'title': video_title,
             'alt_title': video_alt_title,
index 3afa8bb6faac3610dfc3695fe27d6006ff9339b5..9dd7a80346c2704c91cbaccb17096ac24a35f363 100644 (file)
@@ -170,6 +170,14 @@ def parseOpts(overrideArguments=None):
         action='store_const', dest='extract_flat', const='in_playlist',
         default=False,
         help='Do not extract the videos of a playlist, only list them.')
+    general.add_option(
+        '--mark-watched',
+        action='store_true', dest='mark_watched', default=False,
+        help='Mark videos watched (YouTube only)')
+    general.add_option(
+        '--no-mark-watched',
+        action='store_false', dest='mark_watched', default=False,
+        help='Do not mark videos watched (YouTube only)')
     general.add_option(
         '--no-color', '--no-colors',
         action='store_true', dest='no_color',
index 480d48d057400fafb0acdfc8492fca31b1d2f674..e39ca60aa08326b6f05814ff800bb09c75755e48 100644 (file)
@@ -6,6 +6,7 @@ import sys
 import errno
 
 from .common import PostProcessor
+from ..compat import compat_os_name
 from ..utils import (
     check_executable,
     hyphenate_date,
@@ -73,7 +74,7 @@ class XAttrMetadataPP(PostProcessor):
                     raise XAttrMetadataError(e.errno, e.strerror)
 
         except ImportError:
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                 # Write xattrs to NTFS Alternate Data Streams:
                 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
                 def write_xattr(path, key, value):
@@ -168,7 +169,7 @@ class XAttrMetadataPP(PostProcessor):
                     'Unable to write extended attributes due to too long values.')
             else:
                 msg = 'This filesystem doesn\'t support extended attributes. '
-                if os.name == 'nt':
+                if compat_os_name == 'nt':
                     msg += 'You need to use NTFS.'
                 else:
                     msg += '(You may have to enable them in your /etc/fstab)'
index 606977c5874ac613f0224815e8c91fc5ef62400a..22a39a0ab7af191095242e6b2a3f580e59e17c37 100644 (file)
@@ -160,8 +160,6 @@ if sys.version_info >= (2, 7):
     def find_xpath_attr(node, xpath, key, val=None):
         """ Find the xpath xpath[@key=val] """
         assert re.match(r'^[a-zA-Z_-]+$', key)
-        if val:
-            assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
         return node.find(expr)
 else:
@@ -467,6 +465,10 @@ def encodeFilename(s, for_subprocess=False):
     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
         return s
 
+    # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+    if sys.platform.startswith('java'):
+        return s
+
     return s.encode(get_subprocess_encoding(), 'ignore')
 
 
@@ -1217,13 +1219,23 @@ if sys.platform == 'win32':
             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 
 else:
-    import fcntl
+    # Some platforms, such as Jython, is missing fcntl
+    try:
+        import fcntl
 
-    def _lock_file(f, exclusive):
-        fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+        def _lock_file(f, exclusive):
+            fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 
-    def _unlock_file(f):
-        fcntl.flock(f, fcntl.LOCK_UN)
+        def _unlock_file(f):
+            fcntl.flock(f, fcntl.LOCK_UN)
+    except ImportError:
+        UNSUPPORTED_MSG = 'file locking is not supported on this platform'
+
+        def _lock_file(f, exclusive):
+            raise IOError(UNSUPPORTED_MSG)
+
+        def _unlock_file(f):
+            raise IOError(UNSUPPORTED_MSG)
 
 
 class locked_file(object):
@@ -1387,6 +1399,12 @@ def fix_xml_ampersands(xml_str):
 
 def setproctitle(title):
     assert isinstance(title, compat_str)
+
+    # ctypes in Jython is not complete
+    # http://bugs.jython.org/issue2148
+    if sys.platform.startswith('java'):
+        return
+
     try:
         libc = ctypes.cdll.LoadLibrary('libc.so.6')
     except OSError:
@@ -1721,6 +1739,15 @@ def urlencode_postdata(*args, **kargs):
     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
 
 
+def update_url_query(url, query):
+    parsed_url = compat_urlparse.urlparse(url)
+    qs = compat_parse_qs(parsed_url.query)
+    qs.update(query)
+    qs = encode_dict(qs)
+    return compat_urlparse.urlunparse(parsed_url._replace(
+        query=compat_urllib_parse.urlencode(qs, True)))
+
+
 def encode_dict(d, encoding='utf-8'):
     def encode(v):
         return v.encode(encoding) if isinstance(v, compat_basestring) else v