Merge pull request #7436 from davidbz/add_proxy_to_update_procedure
authorSergey M <dstftw@gmail.com>
Sun, 15 Nov 2015 11:13:22 +0000 (11:13 +0000)
committerSergey M <dstftw@gmail.com>
Sun, 15 Nov 2015 11:13:22 +0000 (11:13 +0000)
Add proxy support for update_self

28 files changed:
AUTHORS
docs/supportedsites.md
youtube_dl/extractor/__init__.py
youtube_dl/extractor/aljazeera.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/cbs.py
youtube_dl/extractor/cbsnews.py
youtube_dl/extractor/dumpert.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/gorillavid.py [deleted file]
youtube_dl/extractor/instagram.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/novamov.py
youtube_dl/extractor/nowness.py
youtube_dl/extractor/nowtv.py
youtube_dl/extractor/nowvideo.py
youtube_dl/extractor/periscope.py
youtube_dl/extractor/ruutu.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/space.py
youtube_dl/extractor/tlc.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/wsj.py
youtube_dl/extractor/xfileshare.py [new file with mode: 0644]
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
index cc552bcb2ceb5b00acc81f09ac93b899084f27c8..f465d20edcb4fc5bdb23a69b488333b83a608785 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -144,3 +144,5 @@ Lee Jenkins
 Anssi Hannula
 Lukáš Lalinský
 Qijiang Fan
+Rémy Léone
+Marco Ferragina
index a9820c1f52b7f0fce45273f13f6e3c92b9e27a9d..5016ba4bc1e6b14a2b772f127ddee819e544de98 100644 (file)
  - **GodTube**
  - **GoldenMoustache**
  - **Golem**
- - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
  - **Goshgay**
  - **Groupon**
  - **Hark**
  - **WSJ**: Wall Street Journal
  - **XBef**
  - **XboxClips**
+ - **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
  - **XHamster**
  - **XHamsterEmbed**
  - **XMinus**
index 0a90da73cdb2a5dec9eaa91acd8513ed222ec6a9..59c82f65d316ac5dea7cfea23e39fc4f366ebd9d 100644 (file)
@@ -60,7 +60,10 @@ from .bloomberg import BloombergIE
 from .bpb import BpbIE
 from .br import BRIE
 from .breakcom import BreakIE
-from .brightcove import BrightcoveIE
+from .brightcove import (
+    BrightcoveLegacyIE,
+    BrightcoveNewIE,
+)
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
@@ -221,7 +224,6 @@ from .goldenmoustache import GoldenMoustacheIE
 from .golem import GolemIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
-from .gorillavid import GorillaVidIE
 from .goshgay import GoshgayIE
 from .groupon import GrouponIE
 from .hark import HarkIE
@@ -418,7 +420,10 @@ from .nowness import (
     NownessPlaylistIE,
     NownessSeriesIE,
 )
-from .nowtv import NowTVIE
+from .nowtv import (
+    NowTVIE,
+    NowTVListIE,
+)
 from .nowvideo import NowVideoIE
 from .npo import (
     NPOIE,
@@ -456,10 +461,7 @@ from .orf import (
 from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
-from .periscope import (
-    PeriscopeIE,
-    QuickscopeIE,
-)
+from .periscope import PeriscopeIE
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
@@ -786,6 +788,7 @@ from .wrzuta import WrzutaIE
 from .wsj import WSJIE
 from .xbef import XBefIE
 from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
 from .xhamster import (
     XHamsterIE,
     XHamsterEmbedIE,
index 184a14a4fa99632e825245170b94abab3cb68684..5b2c0dc9ac10aa826d5757f2fc75738c376219a3 100644 (file)
@@ -15,7 +15,7 @@ class AlJazeeraIE(InfoExtractor):
             'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
             'uploader': 'Al Jazeera English',
         },
-        'add_ie': ['Brightcove'],
+        'add_ie': ['BrightcoveLegacy'],
         'skip': 'Not accessible from Travis CI server',
     }
 
@@ -32,5 +32,5 @@ class AlJazeeraIE(InfoExtractor):
                 'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
                 '&%40videoPlayer={0}'.format(brightcove_id)
             ),
-            'ie_key': 'Brightcove',
+            'ie_key': 'BrightcoveLegacy',
         }
index a55a6dbc9dc8e89dda6213650f46e719978cf8ca..33b296eafc0776d50edbc964170b1026b223bb71 100644 (file)
@@ -27,7 +27,7 @@ class BBCCoUkIE(InfoExtractor):
     _MEDIASELECTOR_URLS = [
         # Provides HQ HLS streams with even better quality that pc mediaset but fails
         # with geolocation in some cases when it's even not geo restricted at all (e.g.
-        # http://www.bbc.co.uk/programmes/b06bp7lf)
+        # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
         'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
         'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
     ]
@@ -334,7 +334,7 @@ class BBCCoUkIE(InfoExtractor):
                 return self._download_media_selector_url(
                     mediaselector_url % programme_id, programme_id)
             except BBCCoUkIE.MediaSelectionError as e:
-                if e.id in ('notukerror', 'geolocation'):
+                if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
                     last_exception = e
                     continue
                 self._raise_extractor_error(e)
@@ -345,7 +345,7 @@ class BBCCoUkIE(InfoExtractor):
             media_selection = self._download_xml(
                 url, programme_id, 'Downloading media selection XML')
         except ExtractorError as ee:
-            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
                 media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
             else:
                 raise
index 1686cdde14fcc7383f91bf52a6723d4831d6311d..14ee05f213843de58445a7f054299ead92c1c963 100644 (file)
@@ -20,12 +20,17 @@ from ..utils import (
     ExtractorError,
     find_xpath_attr,
     fix_xml_ampersands,
+    float_or_none,
+    js_to_json,
+    int_or_none,
+    parse_iso8601,
     unescapeHTML,
     unsmuggle_url,
 )
 
 
-class BrightcoveIE(InfoExtractor):
+class BrightcoveLegacyIE(InfoExtractor):
+    IE_NAME = 'brightcove:legacy'
     _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
 
@@ -346,3 +351,172 @@ class BrightcoveIE(InfoExtractor):
         if 'url' not in info and not info.get('formats'):
             raise ExtractorError('Unable to extract video url for %s' % info['id'])
         return info
+
+
+class BrightcoveNewIE(InfoExtractor):
+    IE_NAME = 'brightcove:new'
+    _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
+    _TESTS = [{
+        'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
+        'md5': 'c8100925723840d4b0d243f7025703be',
+        'info_dict': {
+            'id': '4463358922001',
+            'ext': 'mp4',
+            'title': 'Meet the man behind Popcorn Time',
+            'description': 'md5:eac376a4fe366edc70279bfb681aea16',
+            'duration': 165.768,
+            'timestamp': 1441391203,
+            'upload_date': '20150904',
+            'uploader_id': '929656772001',
+            'formats': 'mincount:22',
+        },
+    }, {
+        # with rtmp streams
+        'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
+        'info_dict': {
+            'id': '4279049078001',
+            'ext': 'mp4',
+            'title': 'Titansgrave: Chapter 0',
+            'description': 'Titansgrave: Chapter 0',
+            'duration': 1242.058,
+            'timestamp': 1433556729,
+            'upload_date': '20150606',
+            'uploader_id': '4036320279001',
+            'formats': 'mincount:41',
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        # Reference:
+        # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
+        # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
+        # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
+
+        entries = []
+
+        # Look for iframe embeds [1]
+        for _, url in re.findall(
+                r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
+            entries.append(url)
+
+        # Look for embed_in_page embeds [2]
+        for video_id, account_id, player_id, embed in re.findall(
+                # According to examples from [3] it's unclear whether video id
+                # may be optional and what to do when it is
+                r'''(?sx)
+                    <video[^>]+
+                        data-video-id=["\'](\d+)["\'][^>]*>.*?
+                    </video>.*?
+                    <script[^>]+
+                        src=["\'](?:https?:)?//players\.brightcove\.net/
+                        (\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
+                ''', webpage):
+            entries.append(
+                'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+                % (account_id, player_id, embed, video_id))
+
+        return entries
+
+    def _real_extract(self, url):
+        account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
+
+        webpage = self._download_webpage(
+            'http://players.brightcove.net/%s/%s_%s/index.min.js'
+            % (account_id, player_id, embed), video_id)
+
+        policy_key = None
+
+        catalog = self._search_regex(
+            r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
+        if catalog:
+            catalog = self._parse_json(
+                js_to_json(catalog), video_id, fatal=False)
+            if catalog:
+                policy_key = catalog.get('policyKey')
+
+        if not policy_key:
+            policy_key = self._search_regex(
+                r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
+                webpage, 'policy key', group='pk')
+
+        req = compat_urllib_request.Request(
+            'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
+            % (account_id, video_id),
+            headers={'Accept': 'application/json;pk=%s' % policy_key})
+        json_data = self._download_json(req, video_id)
+
+        title = json_data['name']
+
+        formats = []
+        for source in json_data.get('sources', []):
+            source_type = source.get('type')
+            src = source.get('src')
+            if source_type == 'application/x-mpegURL':
+                if not src:
+                    continue
+                m3u8_formats = self._extract_m3u8_formats(
+                    src, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False)
+                if m3u8_formats:
+                    formats.extend(m3u8_formats)
+            else:
+                streaming_src = source.get('streaming_src')
+                stream_name, app_name = source.get('stream_name'), source.get('app_name')
+                if not src and not streaming_src and (not stream_name or not app_name):
+                    continue
+                tbr = float_or_none(source.get('avg_bitrate'), 1000)
+                height = int_or_none(source.get('height'))
+                f = {
+                    'tbr': tbr,
+                    'width': int_or_none(source.get('width')),
+                    'height': height,
+                    'filesize': int_or_none(source.get('size')),
+                    'container': source.get('container'),
+                    'vcodec': source.get('codec'),
+                    'ext': source.get('container').lower(),
+                }
+
+                def build_format_id(kind):
+                    format_id = kind
+                    if tbr:
+                        format_id += '-%dk' % int(tbr)
+                    if height:
+                        format_id += '-%dp' % height
+                    return format_id
+
+                if src or streaming_src:
+                    f.update({
+                        'url': src or streaming_src,
+                        'format_id': build_format_id('http' if src else 'http-streaming'),
+                        'preference': 2 if src else 1,
+                    })
+                else:
+                    f.update({
+                        'url': app_name,
+                        'play_path': stream_name,
+                        'format_id': build_format_id('rtmp'),
+                    })
+                formats.append(f)
+        self._sort_formats(formats)
+
+        description = json_data.get('description')
+        thumbnail = json_data.get('thumbnail')
+        timestamp = parse_iso8601(json_data.get('published_at'))
+        duration = float_or_none(json_data.get('duration'), 1000)
+        tags = json_data.get('tags', [])
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'uploader_id': account_id,
+            'formats': formats,
+            'tags': tags,
+        }
index 75fffb1563ae9f95bf862ad156111b6962a8429e..43f05d278060c47215122dc3b5a86d6480b5d891 100644 (file)
@@ -1,6 +1,8 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import smuggle_url
 
 
 class CBSIE(InfoExtractor):
@@ -46,13 +48,19 @@ class CBSIE(InfoExtractor):
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
+        request = compat_urllib_request.Request(url)
+        # Android UA is served with higher quality (720p) streams (see
+        # https://github.com/rg3/youtube-dl/issues/7490)
+        request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
+        webpage = self._download_webpage(request, display_id)
         real_id = self._search_regex(
             [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
             webpage, 'real video ID')
         return {
             '_type': 'url_transparent',
             'ie_key': 'ThePlatform',
-            'url': 'theplatform:%s' % real_id,
+            'url': smuggle_url(
+                'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id,
+                {'force_smil_url': True}),
             'display_id': display_id,
         }
index 52e61d85b3a20bc939771cee2b94188d32f16d17..f9a64a0a2ec77ec75daeb3024fadcbbc80e6e9e8 100644 (file)
@@ -67,9 +67,12 @@ class CBSNewsIE(InfoExtractor):
                 'format_id': format_id,
             }
             if uri.startswith('rtmp'):
+                play_path = re.sub(
+                    r'{slistFilePath}', '',
+                    uri.split('<break>')[-1].split('{break}')[-1])
                 fmt.update({
                     'app': 'ondemand?auth=cbs',
-                    'play_path': 'mp4:' + uri.split('<break>')[-1],
+                    'play_path': 'mp4:' + play_path,
                     'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
                     'page_url': 'http://www.cbsnews.com',
                     'ext': 'flv',
index 1f00386feae15d00a4421b3166335c15f3b01aa9..f5a31058d5cd8b1f914e9968aeca98751fb90136 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import base64
+import re
 
 from .common import InfoExtractor
 from ..compat import compat_urllib_request
@@ -9,7 +10,7 @@ from ..utils import qualities
 
 
 class DumpertIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+    _VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
     _TESTS = [{
         'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
         'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
@@ -26,9 +27,11 @@ class DumpertIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        protocol = mobj.group('protocol')
 
-        url = 'https://www.dumpert.nl/mediabase/' + video_id
+        url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'nsfw=1; cpc=10')
         webpage = self._download_webpage(req, video_id)
index f5f13689c0a8310d0e241f0cd24617d90799c701..7f21d7410c4515e23c4ebde5895d56de3b8a8f79 100644 (file)
@@ -45,11 +45,20 @@ class FunnyOrDieIE(InfoExtractor):
 
         links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
 
-        bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates')
-        bitrates = [int(b) for b in bitrates.rstrip(',').split(',')]
-        bitrates.sort()
+        m3u8_url = self._search_regex(
+            r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
+            webpage, 'm3u8 url', default=None, group='url')
 
         formats = []
+
+        m3u8_formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+        if m3u8_formats:
+            formats.extend(m3u8_formats)
+
+        bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
+        bitrates.sort()
+
         for bitrate in bitrates:
             for link in links:
                 formats.append({
index d0b486d2a03a5e1dcd0ae0f0703246907d1550a6..51516a38a14646f29bdc160b60e0298ad1234739 100644 (file)
@@ -30,7 +30,10 @@ from ..utils import (
     url_basename,
     xpath_text,
 )
-from .brightcove import BrightcoveIE
+from .brightcove import (
+    BrightcoveLegacyIE,
+    BrightcoveNewIE,
+)
 from .nbc import NBCSportsVPlayerIE
 from .ooyala import OoyalaIE
 from .rutv import RUTVIE
@@ -275,7 +278,7 @@ class GenericIE(InfoExtractor):
         # it also tests brightcove videos that need to set the 'Referer' in the
         # http requests
         {
-            'add_ie': ['Brightcove'],
+            'add_ie': ['BrightcoveLegacy'],
             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
             'info_dict': {
                 'id': '2765128793001',
@@ -299,7 +302,7 @@ class GenericIE(InfoExtractor):
                 'uploader': 'thestar.com',
                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
             },
-            'add_ie': ['Brightcove'],
+            'add_ie': ['BrightcoveLegacy'],
         },
         {
             'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@ -314,7 +317,7 @@ class GenericIE(InfoExtractor):
         },
         {
             # https://github.com/rg3/youtube-dl/issues/3541
-            'add_ie': ['Brightcove'],
+            'add_ie': ['BrightcoveLegacy'],
             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
             'info_dict': {
                 'id': '3866516442001',
@@ -1031,6 +1034,17 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'cinemasnob',
             },
+        },
+        # BrightcoveInPageEmbed embed
+        {
+            'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+            'info_dict': {
+                'id': '4238694884001',
+                'ext': 'flv',
+                'title': 'Tabletop: Dread, Last Thoughts',
+                'description': 'Tabletop: Dread, Last Thoughts',
+                'duration': 51690,
+            },
         }
     ]
 
@@ -1290,14 +1304,14 @@ class GenericIE(InfoExtractor):
             return self.playlist_result(
                 urlrs, playlist_id=video_id, playlist_title=video_title)
 
-        # Look for BrightCove:
-        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
+        # Look for Brightcove Legacy Studio embeds
+        bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
         if bc_urls:
             self.to_screen('Brightcove video detected.')
             entries = [{
                 '_type': 'url',
                 'url': smuggle_url(bc_url, {'Referer': url}),
-                'ie_key': 'Brightcove'
+                'ie_key': 'BrightcoveLegacy'
             } for bc_url in bc_urls]
 
             return {
@@ -1307,6 +1321,11 @@ class GenericIE(InfoExtractor):
                 'entries': entries,
             }
 
+        # Look for Brightcove New Studio embeds
+        bc_urls = BrightcoveNewIE._extract_urls(webpage)
+        if bc_urls:
+            return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+
         # Look for embedded rtl.nl player
         matches = re.findall(
             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py
deleted file mode 100644 (file)
index d23e3ea..0000000
+++ /dev/null
@@ -1,126 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
-from ..utils import (
-    ExtractorError,
-    encode_dict,
-    int_or_none,
-)
-
-
-class GorillaVidIE(InfoExtractor):
-    IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com'
-    _VALID_URL = r'''(?x)
-        https?://(?P<host>(?:www\.)?
-            (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com))/
-        (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
-    '''
-
-    _FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<'
-
-    _TESTS = [{
-        'url': 'http://gorillavid.in/06y9juieqpmi',
-        'md5': '5ae4a3580620380619678ee4875893ba',
-        'info_dict': {
-            'id': '06y9juieqpmi',
-            'ext': 'flv',
-            'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
-            'thumbnail': 're:http://.*\.jpg',
-        },
-    }, {
-        'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
-        'only_matching': True,
-    }, {
-        'url': 'http://daclips.in/3rso4kdn6f9m',
-        'md5': '1ad8fd39bb976eeb66004d3a4895f106',
-        'info_dict': {
-            'id': '3rso4kdn6f9m',
-            'ext': 'mp4',
-            'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
-            'thumbnail': 're:http://.*\.jpg',
-        }
-    }, {
-        # video with countdown timeout
-        'url': 'http://fastvideo.in/1qmdn1lmsmbw',
-        'md5': '8b87ec3f6564a3108a0e8e66594842ba',
-        'info_dict': {
-            'id': '1qmdn1lmsmbw',
-            'ext': 'mp4',
-            'title': 'Man of Steel - Trailer',
-            'thumbnail': 're:http://.*\.jpg',
-        },
-    }, {
-        'url': 'http://realvid.net/ctn2y6p2eviw',
-        'md5': 'b2166d2cf192efd6b6d764c18fd3710e',
-        'info_dict': {
-            'id': 'ctn2y6p2eviw',
-            'ext': 'flv',
-            'title': 'rdx 1955',
-            'thumbnail': 're:http://.*\.jpg',
-        },
-    }, {
-        'url': 'http://movpod.in/0wguyyxi1yca',
-        'only_matching': True,
-    }, {
-        'url': 'http://filehoot.com/3ivfabn7573c.html',
-        'info_dict': {
-            'id': '3ivfabn7573c',
-            'ext': 'mp4',
-            'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
-            'thumbnail': 're:http://.*\.jpg',
-        }
-    }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        url = 'http://%s/%s' % (mobj.group('host'), video_id)
-        webpage = self._download_webpage(url, video_id)
-
-        if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
-        fields = self._hidden_inputs(webpage)
-
-        if fields['op'] == 'download1':
-            countdown = int_or_none(self._search_regex(
-                r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
-                webpage, 'countdown', default=None))
-            if countdown:
-                self._sleep(countdown, video_id)
-
-            post = compat_urllib_parse.urlencode(encode_dict(fields))
-
-            req = compat_urllib_request.Request(url, post)
-            req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
-            webpage = self._download_webpage(req, video_id, 'Downloading video page')
-
-        title = self._search_regex(
-            [r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'>Watch (.+) '],
-            webpage, 'title', default=None) or self._og_search_title(webpage)
-        video_url = self._search_regex(
-            r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
-        thumbnail = self._search_regex(
-            r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', fatal=False)
-
-        formats = [{
-            'format_id': 'sd',
-            'url': video_url,
-            'quality': 1,
-        }]
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'formats': formats,
-        }
index 3d78f78c46d1ad004339bc33ebcb09d1286e5092..fce179000786cfeacad39d9d65786aa9fea915b1 100644 (file)
@@ -10,8 +10,8 @@ from ..utils import (
 
 
 class InstagramIE(InfoExtractor):
-    _VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
-    _TEST = {
+    _VALID_URL = r'https://instagram\.com/p/(?P<id>[^/?#&]+)'
+    _TESTS = [{
         'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
         'md5': '0d2da106a9d2631273e192b372806516',
         'info_dict': {
@@ -21,7 +21,10 @@ class InstagramIE(InfoExtractor):
             'title': 'Video by naomipq',
             'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
         }
-    }
+    }, {
+        'url': 'https://instagram.com/p/-Cmh1cukG2/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index 9a207b2cd425ba2f1b173d9bd52730e511a52fa0..3d7e7e003e1b2d4dce2d04bdde1d1ae5c252b82d 100644 (file)
@@ -25,7 +25,7 @@ class LyndaBaseIE(InfoExtractor):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
@@ -83,6 +83,10 @@ class LyndaBaseIE(InfoExtractor):
             raise ExtractorError('Unable to log in')
 
     def _logout(self):
+        username, _ = self._get_login_info()
+        if username is None:
+            return
+
         self._download_webpage(
             'http://www.lynda.com/ajax/logout.aspx', None,
             'Logging out', 'Unable to log out', fatal=False)
index 04d779890af1960d65b070d0b2f80e429db21d07..6b15fc2e530a84e99ed194fee0024f8ec6c4c2b8 100644 (file)
@@ -4,10 +4,14 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_urllib_request,
     compat_urlparse,
 )
 from ..utils import (
     ExtractorError,
+    NO_DEFAULT,
+    encode_dict,
+    urlencode_postdata,
 )
 
 
@@ -38,19 +42,40 @@ class NovaMovIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
-        page = self._download_webpage(
-            'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
+        url = 'http://%s/video/%s' % (self._HOST, video_id)
 
-        if re.search(self._FILE_DELETED_REGEX, page) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+        webpage = self._download_webpage(
+            url, video_id, 'Downloading video page')
 
-        filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
+        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 
-        title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
-        description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
+        def extract_filekey(default=NO_DEFAULT):
+            return self._search_regex(
+                self._FILEKEY_REGEX, webpage, 'filekey', default=default)
+
+        filekey = extract_filekey(default=None)
+
+        if not filekey:
+            fields = self._hidden_inputs(webpage)
+            post_url = self._search_regex(
+                r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
+                'post url', default=url, group='url')
+            if not post_url.startswith('http'):
+                post_url = compat_urlparse.urljoin(url, post_url)
+            request = compat_urllib_request.Request(
+                post_url, urlencode_postdata(encode_dict(fields)))
+            request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+            request.add_header('Referer', post_url)
+            webpage = self._download_webpage(
+                request, video_id, 'Downloading continue to the video page')
+
+        filekey = extract_filekey()
+
+        title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False)
+        description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
 
         api_response = self._download_webpage(
             'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
index b97f62fdb839f4cdb395c3bf0fa152ec8eace0ca..0fba55833813c26a7203ab70d3c8606e385ebba3 100644 (file)
@@ -1,7 +1,7 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-from .brightcove import BrightcoveIE
+from .brightcove import BrightcoveLegacyIE
 from .common import InfoExtractor
 from ..utils import ExtractorError
 from ..compat import (
@@ -22,10 +22,10 @@ class NownessBaseIE(InfoExtractor):
                             'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
                             note='Downloading player JavaScript',
                             errnote='Unable to download player JavaScript')
-                        bc_url = BrightcoveIE._extract_brightcove_url(player_code)
+                        bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
                         if bc_url is None:
                             raise ExtractorError('Could not find player definition')
-                        return self.url_result(bc_url, 'Brightcove')
+                        return self.url_result(bc_url, 'BrightcoveLegacy')
                     elif source == 'vimeo':
                         return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
                     elif source == 'youtube':
index b0bdffc4ea168cf2138340f6ab16f0b4f6644dd4..67e34b294520faa6d8320c719471dcfe89c71fa9 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
@@ -13,8 +15,63 @@ from ..utils import (
 )
 
 
-class NowTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
+class NowTVBaseIE(InfoExtractor):
+    _VIDEO_FIELDS = (
+        'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
+        'broadcastStartDate', 'seoUrl', 'duration', 'files',
+        'format.defaultImage169Format', 'format.defaultImage169Logo')
+
+    def _extract_video(self, info, display_id=None):
+        video_id = compat_str(info['id'])
+
+        files = info['files']
+        if not files:
+            if info.get('geoblocked', False):
+                raise ExtractorError(
+                    'Video %s is not available from your location due to geo restriction' % video_id,
+                    expected=True)
+            if not info.get('free', True):
+                raise ExtractorError(
+                    'Video %s is not available for free' % video_id, expected=True)
+
+        formats = []
+        for item in files['items']:
+            if determine_ext(item['path']) != 'f4v':
+                continue
+            app, play_path = remove_start(item['path'], '/').split('/', 1)
+            formats.append({
+                'url': 'rtmpe://fms.rtl.de',
+                'app': app,
+                'play_path': 'mp4:%s' % play_path,
+                'ext': 'flv',
+                'page_url': 'http://rtlnow.rtl.de',
+                'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
+                'tbr': int_or_none(item.get('bitrate')),
+            })
+        self._sort_formats(formats)
+
+        title = info['title']
+        description = info.get('articleLong') or info.get('articleShort')
+        timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
+        duration = parse_duration(info.get('duration'))
+
+        f = info.get('format', {})
+        thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+
+        return {
+            'id': video_id,
+            'display_id': display_id or info.get('seoUrl'),
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'formats': formats,
+        }
+
+
+class NowTVIE(NowTVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:list/[^/]+/)?(?P<id>[^/]+)/(?:player|preview)'
 
     _TESTS = [{
         # rtl
@@ -23,7 +80,7 @@ class NowTVIE(InfoExtractor):
             'id': '203519',
             'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
             'ext': 'flv',
-            'title': 'Die neuen Bauern und eine Hochzeit',
+            'title': 'Inka Bause stellt die neuen Bauern vor',
             'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
             'thumbnail': 're:^https?://.*\.jpg$',
             'timestamp': 1432580700,
@@ -136,58 +193,65 @@ class NowTVIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-        display_id_split = display_id.split('/')
-        if len(display_id) > 2:
-            display_id = '/'.join((display_id_split[0], display_id_split[-1]))
+        mobj = re.match(self._VALID_URL, url)
+        display_id = '%s/%s' % (mobj.group('show_id'), mobj.group('id'))
 
         info = self._download_json(
-            'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
-            display_id)
+            'https://api.nowtv.de/v3/movies/%s?fields=%s'
+            % (display_id, ','.join(self._VIDEO_FIELDS)), display_id)
 
-        video_id = compat_str(info['id'])
+        return self._extract_video(info, display_id)
 
-        files = info['files']
-        if not files:
-            if info.get('geoblocked', False):
-                raise ExtractorError(
-                    'Video %s is not available from your location due to geo restriction' % video_id,
-                    expected=True)
-            if not info.get('free', True):
-                raise ExtractorError(
-                    'Video %s is not available for free' % video_id, expected=True)
 
-        formats = []
-        for item in files['items']:
-            if determine_ext(item['path']) != 'f4v':
-                continue
-            app, play_path = remove_start(item['path'], '/').split('/', 1)
-            formats.append({
-                'url': 'rtmpe://fms.rtl.de',
-                'app': app,
-                'play_path': 'mp4:%s' % play_path,
-                'ext': 'flv',
-                'page_url': 'http://rtlnow.rtl.de',
-                'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
-                'tbr': int_or_none(item.get('bitrate')),
-            })
-        self._sort_formats(formats)
+class NowTVListIE(NowTVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/list/(?P<id>[^?/#&]+)$'
 
-        title = info['title']
-        description = info.get('articleLong') or info.get('articleShort')
-        timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
-        duration = parse_duration(info.get('duration'))
+    _SHOW_FIELDS = ('title', )
+    _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
 
-        f = info.get('format', {})
-        thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+    _TESTS = [{
+        'url': 'http://www.nowtv.at/rtl/stern-tv/list/aktuell',
+        'info_dict': {
+            'id': '17006',
+            'title': 'stern TV - Aktuell',
+        },
+        'playlist_count': 1,
+    }, {
+        'url': 'http://www.nowtv.at/rtl/das-supertalent/list/free-staffel-8',
+        'info_dict': {
+            'id': '20716',
+            'title': 'Das Supertalent - FREE Staffel 8',
+        },
+        'playlist_count': 14,
+    }]
 
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'duration': duration,
-            'formats': formats,
-        }
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_id = mobj.group('show_id')
+        season_id = mobj.group('id')
+
+        fields = []
+        fields.extend(self._SHOW_FIELDS)
+        fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
+        fields.extend(
+            'formatTabs.formatTabPages.container.movies.%s' % field
+            for field in self._VIDEO_FIELDS)
+
+        list_info = self._download_json(
+            'https://api.nowtv.de/v3/formats/seo?fields=%s&name=%s.php'
+            % (','.join(fields), show_id),
+            season_id)
+
+        season = next(
+            season for season in list_info['formatTabs']['items']
+            if season.get('seoheadline') == season_id)
+
+        title = '%s - %s' % (list_info['title'], season['headline'])
+
+        entries = []
+        for container in season['formatTabPages']['items']:
+            for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
+                entries.append(self._extract_video(info))
+
+        return self.playlist_result(
+            entries, compat_str(season.get('id') or season_id), title)
index 17baa96796fafbf5d70b220d2f796cdc707d216b..57ee3d3662b291c6acc18fecc220671df0c706eb 100644 (file)
@@ -7,9 +7,9 @@ class NowVideoIE(NovaMovIE):
     IE_NAME = 'nowvideo'
     IE_DESC = 'NowVideo'
 
-    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
 
-    _HOST = 'www.nowvideo.ch'
+    _HOST = 'www.nowvideo.to'
 
     _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
     _FILEKEY_REGEX = r'var fkzd="([^"]+)";'
index 887c8020d87f7413876b97e180ba10d1f813175c..63cc764bb8eceed80893606e640466b355279a0d 100644 (file)
@@ -2,16 +2,12 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
 from ..utils import parse_iso8601
 
 
 class PeriscopeIE(InfoExtractor):
     IE_DESC = 'Periscope'
-    _VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
     # Alive example URLs can be found here http://onperiscope.com/
     _TESTS = [{
         'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -29,6 +25,9 @@ class PeriscopeIE(InfoExtractor):
     }, {
         'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
         'only_matching': True,
+    }, {
+        'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
+        'only_matching': True,
     }]
 
     def _call_api(self, method, value):
@@ -81,24 +80,3 @@ class PeriscopeIE(InfoExtractor):
             'thumbnails': thumbnails,
             'formats': formats,
         }
-
-
-class QuickscopeIE(InfoExtractor):
-    IE_DESC = 'Quick Scope'
-    _VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://watchonperiscope.com/broadcast/56180087',
-        'only_matching': True,
-    }
-
-    def _real_extract(self, url):
-        broadcast_id = self._match_id(url)
-        request = compat_urllib_request.Request(
-            'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({
-                'broadcast_id': broadcast_id,
-                'entry_ticket': '',
-                'from_push': 'false',
-                'uses_sessions': 'true',
-            }).encode('utf-8'))
-        return self.url_result(
-            self._download_json(request, broadcast_id)['share_url'], 'Periscope')
index a16b73ff4025fb78a9358282c6f01c523327f8e2..e417bf66147a7d6d8dc9d604fd7fed09acb873ca 100644 (file)
@@ -57,16 +57,21 @@ class RuutuIE(InfoExtractor):
                     extract_formats(child)
                 elif child.tag.endswith('File'):
                     video_url = child.text
-                    if not video_url or video_url in processed_urls or 'NOT_USED' in video_url:
+                    if (not video_url or video_url in processed_urls or
+                            any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
                         return
                     processed_urls.append(video_url)
                     ext = determine_ext(video_url)
                     if ext == 'm3u8':
-                        formats.extend(self._extract_m3u8_formats(
-                            video_url, video_id, 'mp4', m3u8_id='hls'))
+                        m3u8_formats = self._extract_m3u8_formats(
+                            video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+                        if m3u8_formats:
+                            formats.extend(m3u8_formats)
                     elif ext == 'f4m':
-                        formats.extend(self._extract_f4m_formats(
-                            video_url, video_id, f4m_id='hds'))
+                        f4m_formats = self._extract_f4m_formats(
+                            video_url, video_id, f4m_id='hds', fatal=False)
+                        if f4m_formats:
+                            formats.extend(f4m_formats)
                     else:
                         proto = compat_urllib_parse_urlparse(video_url).scheme
                         if not child.tag.startswith('HTTP') and proto != 'rtmp':
index a602af6928d2a9d054fc8670342a6ddf7d9ef4da..e9e33d0a3979a33c3a913d38a564c2d5f754cef5 100644 (file)
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from .brightcove import BrightcoveIE
+from .brightcove import BrightcoveLegacyIE
 
 from ..compat import (
     compat_urllib_parse,
@@ -112,11 +112,11 @@ class SafariIE(SafariBaseIE):
             '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
             part)
 
-        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
         if not bc_url:
             raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
 
-        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy')
 
 
 class SafariCourseIE(SafariBaseIE):
index c2d0d36a6935c40553419621678ce8987c4f2dbd..ebb5d6ec0ffe6f0b9056bdbe58ab36abea019c3d 100644 (file)
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from .brightcove import BrightcoveIE
+from .brightcove import BrightcoveLegacyIE
 from ..utils import RegexNotFoundError, ExtractorError
 
 
 class SpaceIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
     _TEST = {
-        'add_ie': ['Brightcove'],
+        'add_ie': ['BrightcoveLegacy'],
         'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
         'info_dict': {
             'id': '2780937028001',
@@ -31,8 +31,8 @@ class SpaceIE(InfoExtractor):
             brightcove_url = self._og_search_video_url(webpage)
         except RegexNotFoundError:
             # Other videos works fine with the info from the object
-            brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
+            brightcove_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
         if brightcove_url is None:
             raise ExtractorError(
                 'The webpage does not contain a video', expected=True)
-        return self.url_result(brightcove_url, BrightcoveIE.ie_key())
+        return self.url_result(brightcove_url, BrightcoveLegacyIE.ie_key())
index 13263614cc06b099d929ee71564899ac3620f76a..d6d038a8d7a80db41ef75f7d13f13fe2ce0411c8 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from .brightcove import BrightcoveIE
+from .brightcove import BrightcoveLegacyIE
 from .discovery import DiscoveryIE
 from ..compat import compat_urlparse
 
@@ -66,6 +66,6 @@ class TlcDeIE(InfoExtractor):
 
         return {
             '_type': 'url',
-            'url': BrightcoveIE._extract_brightcove_url(iframe),
-            'ie': BrightcoveIE.ie_key(),
+            'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
+            'ie': BrightcoveLegacyIE.ie_key(),
         }
index 9d3e46b946843ae0da6b9de525c4aa4b8b3f4cbb..05504734046a1ff9cb2221e21510d7d3d9968dae 100644 (file)
@@ -9,6 +9,8 @@ from ..utils import (
     float_or_none,
     xpath_text,
     remove_end,
+    int_or_none,
+    ExtractorError,
 )
 
 
@@ -18,7 +20,7 @@ class TwitterCardIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
-            'md5': '7d2f6b4d2eb841a7ccc893d479bfceb4',
+            'md5': '4fa26a35f9d1bf4b646590ba8e84be19',
             'info_dict': {
                 'id': '560070183650213889',
                 'ext': 'mp4',
@@ -50,6 +52,20 @@ class TwitterCardIE(InfoExtractor):
                 'uploader': 'OMG! Ubuntu!',
                 'uploader_id': 'omgubuntu',
             },
+            'add_ie': ['Youtube'],
+        },
+        {
+            'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
+            'md5': 'ab2745d0b0ce53319a534fccaa986439',
+            'info_dict': {
+                'id': 'iBb2x00UVlv',
+                'ext': 'mp4',
+                'upload_date': '20151113',
+                'uploader_id': '1189339351084113920',
+                'uploader': '@ArsenalTerje',
+                'title': 'Vine by @ArsenalTerje',
+            },
+            'add_ie': ['Vine'],
         }
     ]
 
@@ -69,11 +85,11 @@ class TwitterCardIE(InfoExtractor):
             request.add_header('User-Agent', user_agent)
             webpage = self._download_webpage(request, video_id)
 
-            youtube_url = self._html_search_regex(
-                r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
-                webpage, 'youtube iframe', default=None)
-            if youtube_url:
-                return self.url_result(youtube_url, 'Youtube')
+            iframe_url = self._html_search_regex(
+                r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
+                webpage, 'video iframe', default=None)
+            if iframe_url:
+                return self.url_result(iframe_url)
 
             config = self._parse_json(self._html_search_regex(
                 r'data-player-config="([^"]+)"', webpage, 'data player config'),
@@ -120,9 +136,9 @@ class TwitterIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
     _TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
-        'md5': '31cd83a116fc41f99ae3d909d4caf6a0',
+        'md5': 'db6612ec5d03355953c3ca9250c97e5e',
         'info_dict': {
             'id': '643211948184596480',
             'ext': 'mp4',
@@ -133,7 +149,30 @@ class TwitterIE(InfoExtractor):
             'uploader': 'FREE THE NIPPLE',
             'uploader_id': 'freethenipple',
         },
-    }
+    }, {
+        'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
+        'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
+        'info_dict': {
+            'id': '657991469417025536',
+            'ext': 'mp4',
+            'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
+            'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
+            'thumbnail': 're:^https?://.*\.png',
+            'uploader': 'Gifs',
+            'uploader_id': 'giphz',
+        },
+    }, {
+        'url': 'https://twitter.com/starwars/status/665052190608723968',
+        'md5': '39b7199856dee6cd4432e72c74bc69d4',
+        'info_dict': {
+            'id': '665052190608723968',
+            'ext': 'mp4',
+            'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
+            'description': 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."',
+            'uploader_id': 'starwars',
+            'uploader': 'Star Wars',
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -144,23 +183,46 @@ class TwitterIE(InfoExtractor):
 
         username = remove_end(self._og_search_title(webpage), ' on Twitter')
 
-        title = self._og_search_description(webpage).strip('').replace('\n', ' ')
+        title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
 
         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
-        mobj = re.match(r'“(.*)\s+(https?://[^ ]+)”', title)
-        title, short_url = mobj.groups()
-
-        card_id = self._search_regex(
-            r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url')
-        card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
+        title = re.sub(r'\s+(https?://[^ ]+)', '', title)
 
-        return {
-            '_type': 'url_transparent',
-            'ie_key': 'TwitterCard',
+        info = {
             'uploader_id': user_id,
             'uploader': username,
-            'url': card_url,
             'webpage_url': url,
-            'description': '%s on Twitter: "%s %s"' % (username, title, short_url),
+            'description': '%s on Twitter: "%s"' % (username, description),
             'title': username + ' - ' + title,
         }
+
+        card_id = self._search_regex(
+            r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
+        if card_id:
+            card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
+            info.update({
+                '_type': 'url_transparent',
+                'ie_key': 'TwitterCard',
+                'url': card_url,
+            })
+            return info
+
+        mobj = re.search(r'''(?x)
+            <video[^>]+class="animated-gif"[^>]+
+                (?:data-height="(?P<height>\d+)")?[^>]+
+                (?:data-width="(?P<width>\d+)")?[^>]+
+                (?:poster="(?P<poster>[^"]+)")?[^>]*>\s*
+                <source[^>]+video-src="(?P<url>[^"]+)"
+        ''', webpage)
+
+        if mobj:
+            info.update({
+                'id': twid,
+                'url': mobj.group('url'),
+                'height': int_or_none(mobj.group('height')),
+                'width': int_or_none(mobj.group('width')),
+                'thumbnail': mobj.group('poster'),
+            })
+            return info
+
+        raise ExtractorError('There\'s not video in this tweet.')
index ca716c8f530326907986ceb17a427c0b28c1102e..b72341a2bef0d97de284ed987265aeedc0b62011 100644 (file)
@@ -49,8 +49,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
         }))
         login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
         login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        login_request.add_header('Cookie', 'vuid=%s' % vuid)
         login_request.add_header('Referer', self._LOGIN_URL)
+        self._set_vimeo_cookie('vuid', vuid)
         self._download_webpage(login_request, None, False, 'Wrong login info')
 
     def _extract_xsrft_and_vuid(self, webpage):
@@ -62,6 +62,9 @@ class VimeoBaseInfoExtractor(InfoExtractor):
             webpage, 'vuid', group='vuid')
         return xsrft, vuid
 
+    def _set_vimeo_cookie(self, name, value):
+        self._set_cookie('vimeo.com', name, value)
+
 
 class VimeoIE(VimeoBaseInfoExtractor):
     """Information extractor for vimeo.com."""
@@ -217,8 +220,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
             url = url.replace('http://', 'https://')
         password_request = compat_urllib_request.Request(url + '/password', data)
         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid)
         password_request.add_header('Referer', url)
+        self._set_vimeo_cookie('vuid', vuid)
         return self._download_webpage(
             password_request, video_id,
             'Verifying the password', 'Wrong password')
@@ -384,47 +387,29 @@ class VimeoIE(VimeoBaseInfoExtractor):
             like_count = None
             comment_count = None
 
-        # Vimeo specific: extract request signature and timestamp
-        sig = config['request']['signature']
-        timestamp = config['request']['timestamp']
-
-        # Vimeo specific: extract video codec and quality information
-        # First consider quality, then codecs, then take everything
-        codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
-        files = {'hd': [], 'sd': [], 'other': []}
-        config_files = config["video"].get("files") or config["request"].get("files")
-        for codec_name, codec_extension in codecs:
-            for quality in config_files.get(codec_name, []):
-                format_id = '-'.join((codec_name, quality)).lower()
-                key = quality if quality in files else 'other'
-                video_url = None
-                if isinstance(config_files[codec_name], dict):
-                    file_info = config_files[codec_name][quality]
-                    video_url = file_info.get('url')
-                else:
-                    file_info = {}
-                if video_url is None:
-                    video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
-                        % (video_id, sig, timestamp, quality, codec_name.upper())
-
-                files[key].append({
-                    'ext': codec_extension,
-                    'url': video_url,
-                    'format_id': format_id,
-                    'width': int_or_none(file_info.get('width')),
-                    'height': int_or_none(file_info.get('height')),
-                    'tbr': int_or_none(file_info.get('bitrate')),
-                })
         formats = []
-        m3u8_url = config_files.get('hls', {}).get('all')
+        config_files = config['video'].get('files') or config['request'].get('files', {})
+        for f in config_files.get('progressive', []):
+            video_url = f.get('url')
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'format_id': 'http-%s' % f.get('quality'),
+                'width': int_or_none(f.get('width')),
+                'height': int_or_none(f.get('height')),
+                'fps': int_or_none(f.get('fps')),
+                'tbr': int_or_none(f.get('bitrate')),
+            })
+        m3u8_url = config_files.get('hls', {}).get('url')
         if m3u8_url:
             m3u8_formats = self._extract_m3u8_formats(
                 m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False)
             if m3u8_formats:
                 formats.extend(m3u8_formats)
-        for key in ('other', 'sd', 'hd'):
-            formats += files[key]
-        self._sort_formats(formats)
+        # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+        # at the same time without actual units specified. This lead to wrong sorting.
+        self._sort_formats(formats, field_preference=('height', 'width', 'fps', 'format_id'))
 
         subtitles = {}
         text_tracks = config['request'].get('text_tracks')
@@ -494,8 +479,8 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
         password_url = compat_urlparse.urljoin(page_url, password_path)
         password_request = compat_urllib_request.Request(password_url, post)
         password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
-        password_request.add_header('Cookie', 'vuid=%s' % vuid)
-        self._set_cookie('vimeo.com', 'xsrft', token)
+        self._set_vimeo_cookie('vuid', vuid)
+        self._set_vimeo_cookie('xsrft', token)
 
         return self._download_webpage(
             password_request, list_id,
index 2ddf29a694ec6365e9089bc18536320489b4d2c3..5a897371d1d69a95e08f7b4da4d457b3236e09cc 100644 (file)
@@ -84,6 +84,5 @@ class WSJIE(InfoExtractor):
             'duration': duration,
             'upload_date': upload_date,
             'title': title,
-            'formats': formats,
             'categories': categories,
         }
diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py
new file mode 100644 (file)
index 0000000..952515c
--- /dev/null
@@ -0,0 +1,138 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
+    encode_dict,
+    int_or_none,
+)
+
+
+class XFileShareIE(InfoExtractor):
+    IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me'
+    _VALID_URL = r'''(?x)
+        https?://(?P<host>(?:www\.)?
+            (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me))/
+        (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
+    '''
+
+    _FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<'
+
+    _TESTS = [{
+        'url': 'http://gorillavid.in/06y9juieqpmi',
+        'md5': '5ae4a3580620380619678ee4875893ba',
+        'info_dict': {
+            'id': '06y9juieqpmi',
+            'ext': 'flv',
+            'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }, {
+        'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://daclips.in/3rso4kdn6f9m',
+        'md5': '1ad8fd39bb976eeb66004d3a4895f106',
+        'info_dict': {
+            'id': '3rso4kdn6f9m',
+            'ext': 'mp4',
+            'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
+            'thumbnail': 're:http://.*\.jpg',
+        }
+    }, {
+        # video with countdown timeout
+        'url': 'http://fastvideo.in/1qmdn1lmsmbw',
+        'md5': '8b87ec3f6564a3108a0e8e66594842ba',
+        'info_dict': {
+            'id': '1qmdn1lmsmbw',
+            'ext': 'mp4',
+            'title': 'Man of Steel - Trailer',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }, {
+        'url': 'http://realvid.net/ctn2y6p2eviw',
+        'md5': 'b2166d2cf192efd6b6d764c18fd3710e',
+        'info_dict': {
+            'id': 'ctn2y6p2eviw',
+            'ext': 'flv',
+            'title': 'rdx 1955',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }, {
+        'url': 'http://movpod.in/0wguyyxi1yca',
+        'only_matching': True,
+    }, {
+        'url': 'http://filehoot.com/3ivfabn7573c.html',
+        'info_dict': {
+            'id': '3ivfabn7573c',
+            'ext': 'mp4',
+            'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
+            'thumbnail': 're:http://.*\.jpg',
+        }
+    }, {
+        'url': 'http://vidto.me/ku5glz52nqe1.html',
+        'info_dict': {
+            'id': 'ku5glz52nqe1',
+            'ext': 'mp4',
+            'title': 'test'
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        url = 'http://%s/%s' % (mobj.group('host'), video_id)
+        webpage = self._download_webpage(url, video_id)
+
+        if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+        fields = self._hidden_inputs(webpage)
+
+        if fields['op'] == 'download1':
+            countdown = int_or_none(self._search_regex(
+                r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
+                webpage, 'countdown', default=None))
+            if countdown:
+                self._sleep(countdown, video_id)
+
+            post = compat_urllib_parse.urlencode(encode_dict(fields))
+
+            req = compat_urllib_request.Request(url, post)
+            req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+            webpage = self._download_webpage(req, video_id, 'Downloading video page')
+
+        title = (self._search_regex(
+            [r'style="z-index: [0-9]+;">([^<]+)</span>',
+             r'<td nowrap>([^<]+)</td>',
+             r'>Watch (.+) ',
+             r'<h2 class="video-page-head">([^<]+)</h2>'],
+            webpage, 'title', default=None) or self._og_search_title(webpage)).strip()
+        video_url = self._search_regex(
+            [r'file\s*:\s*["\'](http[^"\']+)["\'],',
+             r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'],
+            webpage, 'file url')
+        thumbnail = self._search_regex(
+            r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
+
+        formats = [{
+            'format_id': 'sd',
+            'url': video_url,
+            'quality': 1,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index b3d2540050364c2a944c247e1e5b7bd34463985f..6585d60d58b4624798791fabb5c4cdb5e1a2e4ca 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.11.10'
+__version__ = '2015.11.13'