Merge branch 'fktv' of https://github.com/remitamine/youtube-dl into remitamine-fktv

author Yen Chi Hsuan <yan12125@gmail.com>

Fri, 25 Sep 2015 09:02:10 +0000 (17:02 +0800)

committer Yen Chi Hsuan <yan12125@gmail.com>

Fri, 25 Sep 2015 09:02:10 +0000 (17:02 +0800)
author Yen Chi Hsuan <yan12125@gmail.com>
Fri, 25 Sep 2015 09:02:10 +0000 (17:02 +0800)
committer Yen Chi Hsuan <yan12125@gmail.com>
Fri, 25 Sep 2015 09:02:10 +0000 (17:02 +0800)
diff --git a/AUTHORS b/AUTHORS

index d1693224ec850681691377a599f8769bef1448a7..901c1b263d663b11fab5d93c02cb9956abebca48 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -140,3 +140,6 @@ Behrouz Abbasi
  ngld
  nyuszika7h
  Shaun Walbridge
+Lee Jenkins
+Anssi Hannula
+Lukáš Lalinský
diff --git a/README.md b/README.md

index 24bfe38a242f3e798a225710187cde8a56d4c157..2ed75179182c93cfe7df8c584c7c17163cdb10ed 100644 (file)
--- a/README.md
+++ b/README.md
@@ -9,6 +9,7 @@ youtube-dl - download videos from youtube.com or other video platforms
  - [VIDEO SELECTION](#video-selection)
  - [FAQ](#faq)
  - [DEVELOPER INSTRUCTIONS](#developer-instructions)
+- [EMBEDDING YOUTUBE-DL](#embedding-youtube-dl)
  - [BUGS](#bugs)
  - [COPYRIGHT](#copyright)
  
@@ -261,7 +262,7 @@ For example:
  machine youtube login myaccount@gmail.com password my_youtube_password
  machine twitch login my_twitch_account_name password my_twitch_password
  ```
-To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration).
+To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or place it in [configuration file](#configuration).
  
  On Windows you may also need to setup `%HOME%` environment variable manually.
  
@@ -277,8 +278,8 @@ The `-o` option allows users to indicate a template for the output file names. T
   - `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
   - `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
   - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
- - `playlist`: The name or the id of the playlist that contains the video.
- - `playlist_index`: The index of the video in the playlist, a five-digit number.
+ - `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.
+ - `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.
   - `format_id`: The sequence will be replaced by the format code specified by `--format`.
  
  The current default template is `%(title)s-%(id)s.%(ext)s`.
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 04b9959ac61b0f77da6cf46b07661414b81083c1..ab153af6ba2c23a91502b64600dade16c2b0f42b 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -122,7 +122,6 @@
   - **defense.gouv.fr**
   - **DHM**: Filmarchiv - Deutsches Historisches Museum
   - **Discovery**
- - **divxstage**: DivxStage
   - **Dotsub**
   - **DouyuTV**: 斗鱼
   - **dramafever**
@@ -195,7 +194,7 @@
   - **GodTube**
   - **GoldenMoustache**
   - **Golem**
- - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
+ - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
   - **Goshgay**
   - **Groupon**
   - **Hark**
@@ -286,7 +285,7 @@
   - **Minhateca**
   - **MinistryGrid**
   - **miomio.tv**
- - **mitele.es**
+ - **MiTele**: mitele.es
   - **mixcloud**
   - **MLB**
   - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
@@ -309,7 +308,6 @@
   - **mtvservices:embedded**
   - **MuenchenTV**: münchen.tv
   - **MusicPlayOn**
- - **MusicVault**
   - **muzu.tv**
   - **Mwave**
   - **MySpace**
@@ -318,7 +316,6 @@
   - **Myvi**
   - **myvideo**
   - **MyVidster**
- - **N-JOY**
   - **n-tv.de**
   - **NationalGeographic**
   - **Naver**
@@ -327,7 +324,9 @@
   - **NBCNews**
   - **NBCSports**
   - **NBCSportsVPlayer**
- - **ndr**: NDR.de - Mediathek
+ - **ndr**: NDR.de - Norddeutscher Rundfunk
+ - **ndr:embed**
+ - **ndr:embed:base**
   - **NDTV**
   - **NerdCubedFeed**
   - **Nerdist**
@@ -350,12 +349,16 @@
   - **nhl.com:videocenter**: NHL videocenter category
   - **niconico**: ニコニコ動画
   - **NiconicoPlaylist**
+ - **njoy**: N-JOY
+ - **njoy:embed**
   - **Noco**
   - **Normalboots**
   - **NosVideo**
   - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
   - **novamov**: NovaMov
- - **Nowness**
+ - **nowness**
+ - **nowness:playlist**
+ - **nowness:series**
   - **NowTV**
   - **nowvideo**: NowVideo
   - **npo**: npo.nl and ntr.nl
@@ -376,7 +379,6 @@
   - **OnionStudios**
   - **Ooyala**
   - **OoyalaExternal**
- - **OpenFilm**
   - **orf:fm4**: radio FM4
   - **orf:iptv**: iptv.ORF.at
   - **orf:oe1**: Radio Österreich 1
@@ -531,7 +533,7 @@
   - **techtv.mit.edu**
   - **ted**
   - **TeleBruxelles**
- - **telecinco.es**
+ - **Telecinco**: telecinco.es, cuatro.com and mediaset.es
   - **Telegraaf**
   - **TeleMB**
   - **TeleTask**
@@ -633,6 +635,7 @@
   - **vine:user**
   - **vk**: VK
   - **vk:uservideos**: VK - User's Videos
+ - **vlive**
   - **Vodlocker**
   - **VoiceRepublic**
   - **Vporn**
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py

index e32bef279842d7a5c97d6e6e52b01cee267e69f9..1ff42d94b95d9098cf270b883176bb2a06e54297 100644 (file)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -80,6 +80,11 @@ try:
  except ImportError:
      import BaseHTTPServer as compat_http_server
  
+try:
+    compat_str = unicode  # Python 2
+except NameError:
+    compat_str = str
+
  try:
      from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
      from urllib.parse import unquote as compat_urllib_parse_unquote
@@ -100,7 +105,7 @@ except ImportError:  # Python 2
              # Is it a string-like object?
              string.split
              return b''
-        if isinstance(string, unicode):
+        if isinstance(string, compat_str):
              string = string.encode('utf-8')
          bits = string.split(b'%')
          if len(bits) == 1:
@@ -150,11 +155,6 @@ except ImportError:  # Python 2
          string = string.replace('+', ' ')
          return compat_urllib_parse_unquote(string, encoding, errors)
  
-try:
-    compat_str = unicode  # Python 2
-except NameError:
-    compat_str = str
-
  try:
      compat_basestring = basestring  # Python 2
  except NameError:
@@ -234,7 +234,7 @@ else:
      # Working around shlex issue with unicode strings on some python 2
      # versions (see http://bugs.python.org/issue1548891)
      def compat_shlex_split(s, comments=False, posix=True):
-        if isinstance(s, unicode):
+        if isinstance(s, compat_str):
              s = s.encode('utf-8')
          return shlex.split(s, comments, posix)
  
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py

index 71aafdc73e17a29bd6784f5200b193126fb35fe4..a62d2047bbefc27bb06208bbaf19a523d411377f 100644 (file)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -28,10 +28,19 @@ class HlsFD(FileDownloader):
              return False
          ffpp.check_version()
  
-        args = [
-            encodeArgument(opt)
-            for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
-        args.append(encodeFilename(tmpfilename, True))
+        args = [ffpp.executable, '-y']
+
+        if info_dict['http_headers']:
+            # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
+            # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
+            args += [
+                '-headers',
+                ''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items())]
+
+        args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
+
+        args = [encodeArgument(opt) for opt in args]
+        args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
  
          self._debug_cmd(args)
  
@@ -92,6 +101,7 @@ class NativeHlsFD(FragmentFD):
                  return False
              down, frag_sanitized = sanitize_open(frag_filename, 'rb')
              ctx['dest_stream'].write(down.read())
+            down.close()
              frags_filenames.append(frag_sanitized)
  
          self._finish_frag_download(ctx)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index f8d4c846201a96254a59a2bcd1466bd1820a1da0..a73a1317eb795becaca148cd3a50a06cda4a542e 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -138,7 +138,6 @@ from .dump import DumpIE
  from .dumpert import DumpertIE
  from .defense import DefenseGouvFrIE
  from .discovery import DiscoveryIE
-from .divxstage import DivxStageIE
  from .dropbox import DropboxIE
  from .eagleplatform import EaglePlatformIE
  from .ebaumsworld import EbaumsWorldIE
@@ -226,7 +225,6 @@ from .historicfilms import HistoricFilmsIE
  from .history import HistoryIE
  from .hitbox import HitboxIE, HitboxLiveIE
  from .hornbunny import HornBunnyIE
-from .hostingbulk import HostingBulkIE
  from .hotnewhiphop import HotNewHipHopIE
  from .howcast import HowcastIE
  from .howstuffworks import HowStuffWorksIE
@@ -364,6 +362,9 @@ from .nbc import (
  from .ndr import (
      NDRIE,
      NJoyIE,
+    NDREmbedBaseIE,
+    NDREmbedIE,
+    NJoyEmbedIE,
  )
  from .ndtv import NDTVIE
  from .netzkino import NetzkinoIE
@@ -399,7 +400,11 @@ from .normalboots import NormalbootsIE
  from .nosvideo import NosVideoIE
  from .nova import NovaIE
  from .novamov import NovaMovIE
-from .nowness import NownessIE
+from .nowness import (
+    NownessIE,
+    NownessPlaylistIE,
+    NownessSeriesIE,
+)
  from .nowtv import NowTVIE
  from .nowvideo import NowVideoIE
  from .npo import (
@@ -429,7 +434,6 @@ from .ooyala import (
      OoyalaIE,
      OoyalaExternalIE,
  )
-from .openfilm import OpenFilmIE
  from .orf import (
      ORFTVthekIE,
      ORFOE1IE,
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py

index 76de244774369dd53c510961ecf6b6a7641c7027..2a00da3ee0b7f865cb2b14e2c4920361c35022f8 100644 (file)
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
  from ..utils import (
      find_xpath_attr,
      unified_strdate,
@@ -77,7 +81,13 @@ class ArteTVPlus7IE(InfoExtractor):
      def _extract_from_webpage(self, webpage, video_id, lang):
          json_url = self._html_search_regex(
              [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
-            webpage, 'json vp url')
+            webpage, 'json vp url', default=None)
+        if not json_url:
+            iframe_url = self._html_search_regex(
+                r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
+                webpage, 'iframe url', group='url')
+            json_url = compat_parse_qs(
+                compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
          return self._extract_from_json_url(json_url, video_id, lang)
  
      def _extract_from_json_url(self, json_url, video_id, lang):
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py

index abc5a44a1b97567dc3153896d572dc5e86716542..42526357afcadb77ccd48fb5a0e9d38769b11887 100644 (file)
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -21,6 +21,7 @@ class BBCCoUkIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
  
      _MEDIASELECTOR_URLS = [
+        'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
          'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
      ]
  
@@ -189,6 +190,12 @@ class BBCCoUkIE(InfoExtractor):
              # Skip DASH until supported
              elif transfer_format == 'dash':
                  pass
+            elif transfer_format == 'hls':
+                m3u8_formats = self._extract_m3u8_formats(
+                    href, programme_id, ext='mp4', entry_protocol='m3u8_native',
+                    m3u8_id=supplier, fatal=False)
+                if m3u8_formats:
+                    formats.extend(m3u8_formats)
              # Direct link
              else:
                  formats.append({
diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py

index 14f215c5c27e3001414e512e7a1bf06f725cd8b0..1dfa7c12e19dd3151b9fdbda9c76ad4f58fa3192 100644 (file)
--- a/youtube_dl/extractor/clubic.py
+++ b/youtube_dl/extractor/clubic.py
@@ -12,9 +12,9 @@ from ..utils import (
  
  
  class ClubicIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
          'md5': '1592b694ba586036efac1776b0b43cd3',
          'info_dict': {
@@ -24,7 +24,10 @@ class ClubicIE(InfoExtractor):
              'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
              'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
          }
-    }
+    }, {
+        'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 5eeeda08d3b2cda957106d2ee512eeee3ed55e02..1e7db8a9baf2fb48b8cab0697ca0ada46f624aa7 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -516,6 +516,12 @@ class InfoExtractor(object):
              '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
              expected=True)
  
+    @staticmethod
+    def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
+        raise ExtractorError(
+            '%s. You might want to use --proxy to workaround.' % msg,
+            expected=True)
+
      # Methods for following #608
      @staticmethod
      def url_result(url, ie=None, video_id=None, video_title=None):
@@ -731,8 +737,9 @@ class InfoExtractor(object):
  
      @staticmethod
      def _hidden_inputs(html):
+        html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
          hidden_inputs = {}
-        for input in re.findall(r'<input([^>]+)>', html):
+        for input in re.findall(r'(?i)<input([^>]+)>', html):
              if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
                  continue
              name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
@@ -746,7 +753,7 @@ class InfoExtractor(object):
  
      def _form_hidden_inputs(self, form_id, html):
          form = self._search_regex(
-            r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+            r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
              html, '%s form' % form_id, group='form')
          return self._hidden_inputs(form)
  
diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py

index 3db4db4e4db816ae532060bc2386cd91a9c71a92..d6949ca28f9605fc69c0ee2adb032217903da393 100644 (file)
--- a/youtube_dl/extractor/condenast.py
+++ b/youtube_dl/extractor/condenast.py
@@ -2,7 +2,6 @@
  from __future__ import unicode_literals
  
  import re
-import json
  
  from .common import InfoExtractor
  from ..compat import (
@@ -24,16 +23,28 @@ class CondeNastIE(InfoExtractor):
      # The keys are the supported sites and the values are the name to be shown
      # to the user and in the extractor description.
      _SITES = {
-        'wired': 'WIRED',
+        'allure': 'Allure',
+        'architecturaldigest': 'Architectural Digest',
+        'arstechnica': 'Ars Technica',
+        'bonappetit': 'Bon Appétit',
+        'brides': 'Brides',
+        'cnevids': 'Condé Nast',
+        'cntraveler': 'Condé Nast Traveler',
+        'details': 'Details',
+        'epicurious': 'Epicurious',
+        'glamour': 'Glamour',
+        'golfdigest': 'Golf Digest',
          'gq': 'GQ',
+        'newyorker': 'The New Yorker',
+        'self': 'SELF',
+        'teenvogue': 'Teen Vogue',
+        'vanityfair': 'Vanity Fair',
          'vogue': 'Vogue',
-        'glamour': 'Glamour',
+        'wired': 'WIRED',
          'wmagazine': 'W Magazine',
-        'vanityfair': 'Vanity Fair',
-        'cnevids': 'Condé Nast',
      }
  
-    _VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
+    _VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
      IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
  
      EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys())
@@ -86,8 +97,8 @@ class CondeNastIE(InfoExtractor):
          info_url = base_info_url + data
          info_page = self._download_webpage(info_url, video_id,
                                             'Downloading video info')
-        video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info')
-        video_info = json.loads(video_info)
+        video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info')
+        video_info = self._parse_json(video_info, video_id)
  
          formats = [{
              'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
diff --git a/youtube_dl/extractor/divxstage.py b/youtube_dl/extractor/divxstage.py

deleted file mode 100644 (file)

index b88379e..0000000
--- a/youtube_dl/extractor/divxstage.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from __future__ import unicode_literals
-
-from .novamov import NovaMovIE
-
-
-class DivxStageIE(NovaMovIE):
-    IE_NAME = 'divxstage'
-    IE_DESC = 'DivxStage'
-
-    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'}
-
-    _HOST = 'www.divxstage.eu'
-
-    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
-    _TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
-    _DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
-
-    _TEST = {
-        'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
-        'md5': '63969f6eb26533a1968c4d325be63e72',
-        'info_dict': {
-            'id': '57f238e2e5e01',
-            'ext': 'flv',
-            'title': 'youtubedl test video',
-            'description': 'This is a test video for youtubedl.',
-        }
-    }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index ec748ed9f4c7b81f4e49270a9857ba429a2cd108..8881a8a237bfe809f5fe924afcc70109166f66af 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
  
  import os
  import re
+import sys
  
  from .common import InfoExtractor
  from .youtube import YoutubeIE
@@ -230,6 +231,22 @@ class GenericIE(InfoExtractor):
                  'skip_download': False,
              }
          },
+        {
+            # redirect in Refresh HTTP header
+            'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
+            'info_dict': {
+                'id': 'pO8h3EaFRdo',
+                'ext': 'mp4',
+                'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+                'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
+                'upload_date': '20150917',
+                'uploader_id': 'brtvofficial',
+                'uploader': 'Boiler Room',
+            },
+            'params': {
+                'skip_download': False,
+            },
+        },
          {
              'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
              'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
@@ -1808,6 +1825,9 @@ class GenericIE(InfoExtractor):
                  # Look also in Refresh HTTP header
                  refresh_header = head_response.headers.get('Refresh')
                  if refresh_header:
+                    # In python 2 response HTTP headers are bytestrings
+                    if sys.version_info < (3, 0) and isinstance(refresh_header, str):
+                        refresh_header = refresh_header.decode('iso-8859-1')
                      found = re.search(REDIRECT_REGEX, refresh_header)
              if found:
                  new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py

deleted file mode 100644 (file)

index a3154cf..0000000
--- a/youtube_dl/extractor/hostingbulk.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_request,
-)
-from ..utils import (
-    ExtractorError,
-    int_or_none,
-    urlencode_postdata,
-)
-
-
-class HostingBulkIE(InfoExtractor):
-    _VALID_URL = r'''(?x)
-        https?://(?:www\.)?hostingbulk\.com/
-        (?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
-    _FILE_DELETED_REGEX = r'<b>File Not Found</b>'
-    _TEST = {
-        'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
-        'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
-        'info_dict': {
-            'id': 'n0ulw1hv20fm',
-            'ext': 'mp4',
-            'title': 'md5:5afeba33f48ec87219c269e054afd622',
-            'filesize': 6816081,
-            'thumbnail': 're:^http://.*\.jpg$',
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
-
-        # Custom request with cookie to set language to English, so our file
-        # deleted regex would work.
-        request = compat_urllib_request.Request(
-            url, headers={'Cookie': 'lang=english'})
-        webpage = self._download_webpage(request, video_id)
-
-        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id,
-                                 expected=True)
-
-        title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
-        filesize = int_or_none(
-            self._search_regex(
-                r'<small>\((\d+)\sbytes?\)</small>',
-                webpage,
-                'filesize',
-                fatal=False
-            )
-        )
-        thumbnail = self._search_regex(
-            r'<img src="([^"]+)".+?class="pic"',
-            webpage, 'thumbnail', fatal=False)
-
-        fields = self._hidden_inputs(webpage)
-
-        request = compat_urllib_request.Request(url, urlencode_postdata(fields))
-        request.add_header('Content-type', 'application/x-www-form-urlencoded')
-        response = self._request_webpage(request, video_id,
-                                         'Submiting download request')
-        video_url = response.geturl()
-
-        formats = [{
-            'format_id': 'sd',
-            'filesize': filesize,
-            'url': video_url,
-        }]
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'formats': formats,
-        }
diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py

index 70e4c0d4173816e990749759cf2d36fe902904ee..a39f422e985bf4c97ac63b28418317d7288c85ca 100644 (file)
--- a/youtube_dl/extractor/iconosquare.py
+++ b/youtube_dl/extractor/iconosquare.py
@@ -1,7 +1,11 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    get_element_by_id,
+    remove_end,
+)
  
  
  class IconosquareIE(InfoExtractor):
@@ -12,7 +16,7 @@ class IconosquareIE(InfoExtractor):
          'info_dict': {
              'id': '522207370455279102_24101272',
              'ext': 'mp4',
-            'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)',
+            'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
              'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
              'timestamp': 1376471991,
              'upload_date': '20130814',
@@ -29,8 +33,7 @@ class IconosquareIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          media = self._parse_json(
-            self._search_regex(
-                r'window\.media\s*=\s*({.+?});\n', webpage, 'media'),
+            get_element_by_id('mediaJson', webpage),
              video_id)
  
          formats = [{
@@ -41,9 +44,7 @@ class IconosquareIE(InfoExtractor):
          } for format_id, f in media['videos'].items()]
          self._sort_formats(formats)
  
-        title = self._html_search_regex(
-            r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>',
-            webpage, 'title')
+        title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
  
          timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
          description = media.get('caption', {}).get('text')
@@ -61,6 +62,14 @@ class IconosquareIE(InfoExtractor):
              'height': int_or_none(t.get('height'))
          } for thumbnail_id, t in media.get('images', {}).items()]
  
+        comments = [{
+            'id': comment.get('id'),
+            'text': comment['text'],
+            'timestamp': int_or_none(comment.get('created_time')),
+            'author': comment.get('from', {}).get('full_name'),
+            'author_id': comment.get('from', {}).get('username'),
+        } for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
+
          return {
              'id': video_id,
              'title': title,
@@ -72,4 +81,5 @@ class IconosquareIE(InfoExtractor):
              'comment_count': comment_count,
              'like_count': like_count,
              'formats': formats,
+            'comments': comments,
          }
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py

index 393e67e354c4adb9864bdebfe05c38503be67b0b..ce1ab3820a13a7688d003b1b4a3e02ff8ae78ed5 100644 (file)
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -95,6 +95,10 @@ class IqiyiIE(InfoExtractor):
          ('10', 'h1'),
      ]
  
+    @staticmethod
+    def md5_text(text):
+        return hashlib.md5(text.encode('utf-8')).hexdigest()
+
      def construct_video_urls(self, data, video_id, _uuid):
          def do_xor(x, y):
              a = y % 3
@@ -121,7 +125,7 @@ class IqiyiIE(InfoExtractor):
                  note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
              )['t']
              t = str(int(math.floor(int(tm) / (600.0))))
-            return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
+            return self.md5_text(t + mg + x)
  
          video_urls_dict = {}
          for format_item in data['vp']['tkl'][0]['vs']:
@@ -179,20 +183,19 @@ class IqiyiIE(InfoExtractor):
  
      def get_raw_data(self, tvid, video_id, enc_key, _uuid):
          tm = str(int(time.time()))
+        tail = tm + tvid
          param = {
              'key': 'fvip',
-            'src': hashlib.md5(b'youtube-dl').hexdigest(),
+            'src': self.md5_text('youtube-dl'),
              'tvId': tvid,
              'vid': video_id,
              'vinfo': 1,
              'tm': tm,
-            'enc': hashlib.md5(
-                (enc_key + tm + tvid).encode('utf8')).hexdigest(),
+            'enc': self.md5_text((enc_key + tail)[1:64:2] + tail),
              'qyid': _uuid,
              'tn': random.random(),
              'um': 0,
-            'authkey': hashlib.md5(
-                (tm + tvid).encode('utf8')).hexdigest()
+            'authkey': self.md5_text(self.md5_text('') + tail),
          }
  
          api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
@@ -201,7 +204,8 @@ class IqiyiIE(InfoExtractor):
          return raw_data
  
      def get_enc_key(self, swf_url, video_id):
-        enc_key = '3601ba290e4f4662848c710e2122007e'  # last update at 2015-08-10 for Zombie
+        # TODO: automatic key extraction
+        enc_key = 'eac64f22daf001da6ba9aa8da4d501508bbe90a4d4091fea3b0582a85b38c2cc'  # last update at 2015-09-23-23 for Zombie::bite
          return enc_key
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py

index 852d722664a3d63aafed0f8246949335b4150c09..54993e2c9f690cf7b01ac3dcde7b4de6ef431d25 100644 (file)
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -1,74 +1,85 @@
  from __future__ import unicode_literals
  
-import json
-
  from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_parse_unquote,
-    compat_urlparse,
-)
+from ..compat import compat_urllib_parse
  from ..utils import (
+    encode_dict,
      get_element_by_attribute,
-    parse_duration,
-    strip_jsonp,
+    int_or_none,
  )
  
  
  class MiTeleIE(InfoExtractor):
-    IE_NAME = 'mitele.es'
+    IE_DESC = 'mitele.es'
      _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
  
      _TESTS = [{
          'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
+        'md5': 'ace7635b2a0b286aaa37d3ff192d2a8a',
          'info_dict': {
-            'id': '0fce117d',
-            'ext': 'mp4',
-            'title': 'Programa 144 - Tor, la web invisible',
-            'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
+            'id': '0NF1jJnxS1Wu3pHrmvFyw2',
              'display_id': 'programa-144',
+            'ext': 'flv',
+            'title': 'Tor, la web invisible',
+            'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
+            'thumbnail': 're:(?i)^https?://.*\.jpg$',
              'duration': 2913,
          },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
      }]
  
      def _real_extract(self, url):
-        episode = self._match_id(url)
-        webpage = self._download_webpage(url, episode)
-        embed_data_json = self._search_regex(
-            r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
-        ).replace('\'', '"')
-        embed_data = json.loads(embed_data_json)
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
  
-        domain = embed_data['mediaUrl']
-        if not domain.startswith('http'):
-            # only happens in telecinco.es videos
-            domain = 'http://' + domain
-        info_url = compat_urlparse.urljoin(
-            domain,
-            compat_urllib_parse_unquote(embed_data['flashvars']['host'])
-        )
-        info_el = self._download_xml(info_url, episode).find('./video/info')
+        config_url = self._search_regex(
+            r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url')
  
-        video_link = info_el.find('videoUrl/link').text
-        token_query = compat_urllib_parse.urlencode({'id': video_link})
-        token_info = self._download_json(
-            embed_data['flashvars']['ov_tk'] + '?' + token_query,
-            episode,
-            transform_source=strip_jsonp
-        )
-        formats = self._extract_m3u8_formats(
-            token_info['tokenizedUrl'], episode, ext='mp4')
+        config = self._download_json(
+            config_url, display_id, 'Downloading config JSON')
+
+        mmc = self._download_json(
+            config['services']['mmc'], display_id, 'Downloading mmc JSON')
+
+        formats = []
+        for location in mmc['locations']:
+            gat = self._proto_relative_url(location.get('gat'), 'http:')
+            bas = location.get('bas')
+            loc = location.get('loc')
+            ogn = location.get('ogn')
+            if None in (gat, bas, loc, ogn):
+                continue
+            token_data = {
+                'bas': bas,
+                'icd': loc,
+                'ogn': ogn,
+                'sta': '0',
+            }
+            media = self._download_json(
+                '%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data)).encode('utf-8')),
+                display_id, 'Downloading %s JSON' % location['loc'])
+            file_ = media.get('file')
+            if not file_:
+                continue
+            formats.extend(self._extract_f4m_formats(
+                file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
+                display_id, f4m_id=loc))
+
+        title = self._search_regex(
+            r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
+
+        video_id = self._search_regex(
+            r'data-media-id\s*=\s*"([^"]+)"', webpage,
+            'data media id', default=None) or display_id
+        thumbnail = config.get('poster', {}).get('imageUrl')
+        duration = int_or_none(mmc.get('duration'))
  
          return {
-            'id': embed_data['videoId'],
-            'display_id': episode,
-            'title': info_el.find('title').text,
-            'formats': formats,
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
              'description': get_element_by_attribute('class', 'text', webpage),
-            'thumbnail': info_el.find('thumb').text,
-            'duration': parse_duration(info_el.find('duration').text),
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
          }
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py

index 79a13958b05e25a1c9e586168bb3a10742fbe01f..e3cc6fde80ea545a0c98a26854afa13c81cbc9b9 100644 (file)
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -1,130 +1,380 @@
-# encoding: utf-8
+# coding: utf-8
  from __future__ import unicode_literals
  
  import re
  
  from .common import InfoExtractor
  from ..utils import (
-    ExtractorError,
+    determine_ext,
      int_or_none,
+    parse_iso8601,
      qualities,
-    parse_duration,
  )
  
  
  class NDRBaseIE(InfoExtractor):
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        return self._extract_embed(webpage, display_id)
+
  
-        page = self._download_webpage(url, video_id, 'Downloading page')
+class NDRIE(NDRBaseIE):
+    IE_NAME = 'ndr'
+    IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
+    _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
+    _TESTS = [{
+        # httpVideo, same content id
+        'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
+        'md5': '6515bc255dc5c5f8c85bbc38e035a659',
+        'info_dict': {
+            'id': 'hafengeburtstag988',
+            'display_id': 'Party-Poette-und-Parade',
+            'ext': 'mp4',
+            'title': 'Party, Pötte und Parade',
+            'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
+            'uploader': 'ndrtv',
+            'timestamp': 1431108900,
+            'upload_date': '20150510',
+            'duration': 3498,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # httpVideo, different content id
+        'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
+        'md5': '1043ff203eab307f0c51702ec49e9a71',
+        'info_dict': {
+            'id': 'osna272',
+            'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
+            'ext': 'mp4',
+            'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
+            'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
+            'uploader': 'ndrtv',
+            'timestamp': 1442059200,
+            'upload_date': '20150912',
+            'duration': 510,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # httpAudio, same content id
+        'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
+        'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+        'info_dict': {
+            'id': 'audio51535',
+            'display_id': 'La-Valette-entgeht-der-Hinrichtung',
+            'ext': 'mp3',
+            'title': 'La Valette entgeht der Hinrichtung',
+            'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
+            'uploader': 'ndrinfo',
+            'timestamp': 1290626100,
+            'upload_date': '20140729',
+            'duration': 884,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
  
-        title = self._og_search_title(page).strip()
-        description = self._og_search_description(page)
-        if description:
-            description = description.strip()
+    def _extract_embed(self, webpage, display_id):
+        embed_url = self._html_search_meta(
+            'embedURL', webpage, 'embed URL', fatal=True)
+        description = self._search_regex(
+            r'<p[^>]+itemprop="description">([^<]+)</p>',
+            webpage, 'description', fatal=False)
+        timestamp = parse_iso8601(
+            self._search_regex(
+                r'<span itemprop="datePublished" content="([^"]+)">',
+                webpage, 'upload date', fatal=False))
+        return {
+            '_type': 'url_transparent',
+            'url': embed_url,
+            'display_id': display_id,
+            'description': description,
+            'timestamp': timestamp,
+        }
  
-        duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None))
-        if not duration:
-            duration = parse_duration(self._html_search_regex(
-                r'(<span class="min">\d+</span>:<span class="sec">\d+</span>)',
-                page, 'duration', default=None))
  
-        formats = []
+class NJoyIE(NDRBaseIE):
+    IE_NAME = 'njoy'
+    IE_DESC = 'N-JOY'
+    _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
+    _TESTS = [{
+        # httpVideo, same content id
+        'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
+        'md5': 'cb63be60cd6f9dd75218803146d8dc67',
+        'info_dict': {
+            'id': 'comedycontest2480',
+            'display_id': 'Benaissa-beim-NDR-Comedy-Contest',
+            'ext': 'mp4',
+            'title': 'Benaissa beim NDR Comedy Contest',
+            'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39',
+            'uploader': 'ndrtv',
+            'upload_date': '20141129',
+            'duration': 654,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # httpVideo, different content id
+        'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
+        'md5': '417660fffa90e6df2fda19f1b40a64d8',
+        'info_dict': {
+            'id': 'dockville882',
+            'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
+            'ext': 'mp4',
+            'title': '"Ich hab noch nie" mit Felix Jaehn',
+            'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
+            'uploader': 'njoy',
+            'upload_date': '20150822',
+            'duration': 211,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    def _extract_embed(self, webpage, display_id):
+        video_id = self._search_regex(
+            r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
+        description = self._search_regex(
+            r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
+            webpage, 'description', fatal=False)
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'NDREmbedBase',
+            'url': 'ndr:%s' % video_id,
+            'display_id': display_id,
+            'description': description,
+        }
  
-        mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
-        if mp3_url:
-            formats.append({
-                'url': mp3_url.group('audio'),
-                'format_id': 'mp3',
-            })
  
-        thumbnail = None
+class NDREmbedBaseIE(InfoExtractor):
+    IE_NAME = 'ndr:embed:base'
+    _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)'
+    _TESTS = [{
+        'url': 'ndr:soundcheck3366',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.ndr.de/soundcheck3366-ppjson.json',
+        'only_matching': True,
+    }]
  
-        video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page)
-        if video_url:
-            thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
-            if thumbnails:
-                quality_key = qualities(['xs', 's', 'm', 'l', 'xl'])
-                largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1]))
-                thumbnail = 'http://www.ndr.de' + largest[0]
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id') or mobj.group('id_s')
  
-            for format_id in 'lo', 'hi', 'hq':
-                formats.append({
-                    'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
-                    'format_id': format_id,
-                })
+        ppjson = self._download_json(
+            'http://www.ndr.de/%s-ppjson.json' % video_id, video_id)
  
-        if not formats:
-            raise ExtractorError('No media links available for %s' % video_id)
+        playlist = ppjson['playlist']
+
+        formats = []
+        quality_key = qualities(('xs', 's', 'm', 'l', 'xl'))
+
+        for format_id, f in playlist.items():
+            src = f.get('src')
+            if not src:
+                continue
+            ext = determine_ext(src, None)
+            if ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
+            elif ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, m3u8_id='hls', entry_protocol='m3u8_native'))
+            else:
+                quality = f.get('quality')
+                ff = {
+                    'url': src,
+                    'format_id': quality or format_id,
+                    'quality': quality_key(quality),
+                }
+                type_ = f.get('type')
+                if type_ and type_.split('/')[0] == 'audio':
+                    ff['vcodec'] = 'none'
+                    ff['ext'] = ext or 'mp3'
+                formats.append(ff)
+        self._sort_formats(formats)
+
+        config = playlist['config']
+
+        live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive']
+        title = config['title']
+        if live:
+            title = self._live_title(title)
+        uploader = ppjson.get('config', {}).get('branding')
+        upload_date = ppjson.get('config', {}).get('publicationDate')
+        duration = int_or_none(config.get('duration'))
+
+        thumbnails = [{
+            'id': thumbnail.get('quality') or thumbnail_id,
+            'url': thumbnail['src'],
+            'preference': quality_key(thumbnail.get('quality')),
+        } for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')]
  
          return {
              'id': video_id,
              'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
+            'is_live': live,
+            'uploader': uploader if uploader != '-' else None,
+            'upload_date': upload_date[0:8] if upload_date else None,
              'duration': duration,
+            'thumbnails': thumbnails,
              'formats': formats,
          }
  
  
-class NDRIE(NDRBaseIE):
-    IE_NAME = 'ndr'
-    IE_DESC = 'NDR.de - Mediathek'
-    _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
-
-    _TESTS = [
-        {
-            'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
-            'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
-            'note': 'Video file',
-            'info_dict': {
-                'id': '25866',
-                'ext': 'mp4',
-                'title': 'Kartoffeltage in der Lewitz',
-                'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
-                'duration': 166,
-            },
-            'skip': '404 Not found',
-        },
-        {
-            'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
-            'md5': 'dadc003c55ae12a5d2f6bd436cd73f59',
-            'info_dict': {
-                'id': '988',
-                'ext': 'mp4',
-                'title': 'Party, Pötte und Parade',
-                'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.',
-                'duration': 3498,
-            },
-        },
-        {
-            'url': 'http://www.ndr.de/info/audio51535.html',
-            'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
-            'note': 'Audio file',
-            'info_dict': {
-                'id': '51535',
-                'ext': 'mp3',
-                'title': 'La Valette entgeht der Hinrichtung',
-                'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
-                'duration': 884,
-            }
-        }
-    ]
-
+class NDREmbedIE(NDREmbedBaseIE):
+    IE_NAME = 'ndr:embed'
+    _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+    _TESTS = [{
+        'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
+        'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
+        'info_dict': {
+            'id': 'ndraktuell28488',
+            'ext': 'mp4',
+            'title': 'Norddeutschland begrüßt Flüchtlinge',
+            'is_live': False,
+            'uploader': 'ndrtv',
+            'upload_date': '20150907',
+            'duration': 132,
+        },
+    }, {
+        'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
+        'md5': '002085c44bae38802d94ae5802a36e78',
+        'info_dict': {
+            'id': 'soundcheck3366',
+            'ext': 'mp4',
+            'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen',
+            'is_live': False,
+            'uploader': 'ndr2',
+            'upload_date': '20150912',
+            'duration': 3554,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.ndr.de/info/audio51535-player.html',
+        'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+        'info_dict': {
+            'id': 'audio51535',
+            'ext': 'mp3',
+            'title': 'La Valette entgeht der Hinrichtung',
+            'is_live': False,
+            'uploader': 'ndrinfo',
+            'upload_date': '20140729',
+            'duration': 884,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html',
+        'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c',
+        'info_dict': {
+            'id': 'visite11010',
+            'ext': 'mp4',
+            'title': 'Visite - die ganze Sendung',
+            'is_live': False,
+            'uploader': 'ndrtv',
+            'upload_date': '20150902',
+            'duration': 3525,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # httpVideoLive
+        'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
+        'info_dict': {
+            'id': 'livestream217',
+            'ext': 'flv',
+            'title': 're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+            'is_live': True,
+            'upload_date': '20150910',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.ndr.de/fernsehen/doku952-player.html',
+        'only_matching': True,
+    }]
  
-class NJoyIE(NDRBaseIE):
-    IE_NAME = 'N-JOY'
-    _VALID_URL = r'https?://www\.n-joy\.de/.+?(?P<id>\d+)\.html'
  
-    _TEST = {
-        'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
-        'md5': 'cb63be60cd6f9dd75218803146d8dc67',
+class NJoyEmbedIE(NDREmbedBaseIE):
+    IE_NAME = 'njoy:embed'
+    _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
+    _TESTS = [{
+        # httpVideo
+        'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
+        'md5': '8483cbfe2320bd4d28a349d62d88bd74',
          'info_dict': {
-            'id': '2480',
+            'id': 'doku948',
              'ext': 'mp4',
-            'title': 'Benaissa beim NDR Comedy Contest',
-            'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.',
-            'duration': 654,
-        }
-    }
+            'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
+            'is_live': False,
+            'upload_date': '20150807',
+            'duration': 1011,
+        },
+    }, {
+        # httpAudio
+        'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
+        'md5': 'd989f80f28ac954430f7b8a48197188a',
+        'info_dict': {
+            'id': 'stefanrichter100',
+            'ext': 'mp3',
+            'title': 'Interview mit einem Augenzeugen',
+            'is_live': False,
+            'uploader': 'njoy',
+            'upload_date': '20150909',
+            'duration': 140,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # httpAudioLive, no explicit ext
+        'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
+        'info_dict': {
+            'id': 'webradioweltweit100',
+            'ext': 'mp3',
+            'title': 're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+            'is_live': True,
+            'uploader': 'njoy',
+            'upload_date': '20150810',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html',
+        'only_matching': True,
+    }]
diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py

index dc54634a58e440fc70ae9bcb3e7d5781981b2b1e..55dc6107d0fcc7c69af04dcf1e5fbb716f5d8115 100644 (file)
--- a/youtube_dl/extractor/nfl.py
+++ b/youtube_dl/extractor/nfl.py
@@ -16,53 +16,104 @@ from ..utils import (
  
  class NFLIE(InfoExtractor):
      IE_NAME = 'nfl.com'
-    _VALID_URL = r'''(?x)https?://
-        (?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
-        (?:.+?/)*
-        (?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
-    _TESTS = [
-        {
-            'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
-            'md5': '394ef771ddcd1354f665b471d78ec4c6',
-            'info_dict': {
-                'id': '0ap3000000398478',
-                'ext': 'mp4',
-                'title': 'Week 3: Redskins vs. Eagles highlights',
-                'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
-                'upload_date': '20140921',
-                'timestamp': 1411337580,
-                'thumbnail': 're:^https?://.*\.jpg$',
-            }
-        },
-        {
-            'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
-            'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
-            'info_dict': {
-                'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
-                'ext': 'mp4',
-                'title': 'LIVE: Post Game vs. Browns',
-                'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
-                'upload_date': '20131229',
-                'timestamp': 1388354455,
-                'thumbnail': 're:^https?://.*\.jpg$',
-            }
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?P<host>
+                            (?:www\.)?
+                            (?:
+                                (?:
+                                    nfl|
+                                    buffalobills|
+                                    miamidolphins|
+                                    patriots|
+                                    newyorkjets|
+                                    baltimoreravens|
+                                    bengals|
+                                    clevelandbrowns|
+                                    steelers|
+                                    houstontexans|
+                                    colts|
+                                    jaguars|
+                                    titansonline|
+                                    denverbroncos|
+                                    kcchiefs|
+                                    raiders|
+                                    chargers|
+                                    dallascowboys|
+                                    giants|
+                                    philadelphiaeagles|
+                                    redskins|
+                                    chicagobears|
+                                    detroitlions|
+                                    packers|
+                                    vikings|
+                                    atlantafalcons|
+                                    panthers|
+                                    neworleanssaints|
+                                    buccaneers|
+                                    azcardinals|
+                                    stlouisrams|
+                                    49ers|
+                                    seahawks
+                                )\.com|
+                                .+?\.clubs\.nfl\.com
+                            )
+                        )/
+                        (?:.+?/)*
+                        (?P<id>[^/#?&]+)
+                    '''
+    _TESTS = [{
+        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
+        'md5': '394ef771ddcd1354f665b471d78ec4c6',
+        'info_dict': {
+            'id': '0ap3000000398478',
+            'ext': 'mp4',
+            'title': 'Week 3: Redskins vs. Eagles highlights',
+            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
+            'upload_date': '20140921',
+            'timestamp': 1411337580,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
+        'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
+        'info_dict': {
+            'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
+            'ext': 'mp4',
+            'title': 'LIVE: Post Game vs. Browns',
+            'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
+            'upload_date': '20131229',
+            'timestamp': 1388354455,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
+        'info_dict': {
+            'id': '0ap3000000467607',
+            'ext': 'mp4',
+            'title': 'Frustrations flare on the field',
+            'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
+            'timestamp': 1422850320,
+            'upload_date': '20150202',
          },
-        {
-            'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
-            'info_dict': {
-                'id': '0ap3000000467607',
-                'ext': 'mp4',
-                'title': 'Frustrations flare on the field',
-                'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
-                'timestamp': 1422850320,
-                'upload_date': '20150202',
-            },
+    }, {
+        'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
+        'md5': '4c319e2f625ffd0b481b4382c6fc124c',
+        'info_dict': {
+            'id': 'n-238346',
+            'ext': 'mp4',
+            'title': '10 Days at Gillette',
+            'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
+            'timestamp': 1442618809,
+            'upload_date': '20150918',
          },
-        {
-            'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
-            'only_matching': True,
-        }
-    ]
+    }, {
+        'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
+        'only_matching': True,
+    }]
  
      @staticmethod
      def prepend_host(host, url):
@@ -95,13 +146,14 @@ class NFLIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          config_url = NFLIE.prepend_host(host, self._search_regex(
-            r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL',
-            default='static/content/static/config/video/config.json'))
+            r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
+            webpage, 'config URL', default='static/content/static/config/video/config.json',
+            group='config'))
          # For articles, the id in the url is not the video id
          video_id = self._search_regex(
-            r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id)
-        config = self._download_json(config_url, video_id,
-                                     note='Downloading player config')
+            r'(?:<nflcs:avplayer[^>]+data-contentId\s*=\s*|contentId\s*:\s*)(["\'])(?P<id>.+?)\1',
+            webpage, 'video id', default=video_id, group='id')
+        config = self._download_json(config_url, video_id, 'Downloading player config')
          url_template = NFLIE.prepend_host(
              host, '{contentURLTemplate:}'.format(**config))
          video_data = self._download_json(
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py

index 279b18386197560346e1cbce716ecf7ff61af2f9..e98a5ef89d9bb034c72ffe590fda54fb67dc9b08 100644 (file)
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -72,7 +72,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
  
  class NHLIE(NHLBaseInfoExtractor):
      IE_NAME = 'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
  
      _TESTS = [{
          'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@@ -136,6 +136,9 @@ class NHLIE(NHLBaseInfoExtractor):
          'params': {
              'skip_download': True,  # Requires rtmpdump
          }
+    }, {
+        'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
@@ -146,9 +149,9 @@ class NHLIE(NHLBaseInfoExtractor):
  class NHLNewsIE(NHLBaseInfoExtractor):
      IE_NAME = 'nhl.com:news'
      IE_DESC = 'NHL news'
-    _VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
+    _VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.nhl.com/ice/news.htm?id=750727',
          'md5': '4b3d1262e177687a3009937bd9ec0be8',
          'info_dict': {
@@ -159,13 +162,26 @@ class NHLNewsIE(NHLBaseInfoExtractor):
              'duration': 37,
              'upload_date': '20150128',
          },
-    }
+    }, {
+        # iframe embed
+        'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
+        'md5': '9f663d1c006c90ac9fb82777d4294e12',
+        'info_dict': {
+            'id': '836127',
+            'ext': 'mp4',
+            'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
+            'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
+            'duration': 93,
+            'upload_date': '20150923',
+        },
+    }]
  
      def _real_extract(self, url):
          news_id = self._match_id(url)
          webpage = self._download_webpage(url, news_id)
          video_id = self._search_regex(
-            [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"],
+            [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
+             r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
              webpage, 'video id')
          return self._real_extract_video(video_id)
  
diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py

index 7f842b5c2560211cc88280e2b97cf107af588bfe..a06d38afde37a0f4ad3947776910e9c3b5a39286 100644 (file)
--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@@ -1,7 +1,6 @@
  from __future__ import unicode_literals
  
  import re
-import json
  
  from .common import InfoExtractor
  from ..utils import str_to_int
@@ -9,61 +8,93 @@ from ..utils import str_to_int
  
  class NineGagIE(InfoExtractor):
      IE_NAME = '9gag'
-    _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
-        (?:
-            v/(?P<numid>[0-9]+)|
-            p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
-        )
-    '''
+    _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
  
      _TESTS = [{
-        "url": "http://9gag.tv/v/1912",
-        "info_dict": {
-            "id": "1912",
-            "ext": "mp4",
-            "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
-            "title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
+        'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
+        'info_dict': {
+            'id': 'Kk2X5',
+            'ext': 'mp4',
+            'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
+            'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
              'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
              'uploader': 'CompilationChannel',
              'upload_date': '20131110',
-            "view_count": int,
-            "thumbnail": "re:^https?://",
+            'view_count': int,
          },
-        'add_ie': ['Youtube']
+        'add_ie': ['Youtube'],
      }, {
-        'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
+        'url': 'http://9gag.com/tv/p/aKolP3',
          'info_dict': {
-            'id': 'KklwM',
+            'id': 'aKolP3',
              'ext': 'mp4',
-            'display_id': 'alternate-banned-opening-scene-of-gravity',
-            "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
-            'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
-            'uploader': 'Krishna Shenoi',
-            'upload_date': '20140401',
-            'uploader_id': 'krishnashenoi93',
+            'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
+            'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
+            'uploader_id': 'rickmereki',
+            'uploader': 'Rick Mereki',
+            'upload_date': '20110803',
+            'view_count': int,
          },
+        'add_ie': ['Vimeo'],
+    }, {
+        'url': 'http://9gag.com/tv/p/KklwM',
+        'only_matching': True,
+    }, {
+        'url': 'http://9gag.tv/p/Kk2X5',
+        'only_matching': True,
+    }, {
+        'url': 'http://9gag.com/tv/embed/a5Dmvl',
+        'only_matching': True,
      }]
  
+    _EXTERNAL_VIDEO_PROVIDER = {
+        '1': {
+            'url': '%s',
+            'ie_key': 'Youtube',
+        },
+        '2': {
+            'url': 'http://player.vimeo.com/video/%s',
+            'ie_key': 'Vimeo',
+        },
+        '3': {
+            'url': 'http://instagram.com/p/%s',
+            'ie_key': 'Instagram',
+        },
+        '4': {
+            'url': 'http://vine.co/v/%s',
+            'ie_key': 'Vine',
+        },
+    }
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('numid') or mobj.group('id')
+        video_id = mobj.group('id')
          display_id = mobj.group('display_id') or video_id
  
          webpage = self._download_webpage(url, display_id)
  
-        post_view = json.loads(self._html_search_regex(
-            r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
+        post_view = self._parse_json(
+            self._search_regex(
+                r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
+                webpage, 'post view'),
+            display_id)
  
-        youtube_id = post_view['videoExternalId']
+        ie_key = None
+        source_url = post_view.get('sourceUrl')
+        if not source_url:
+            external_video_id = post_view['videoExternalId']
+            external_video_provider = post_view['videoExternalProvider']
+            source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
+            ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
          title = post_view['title']
-        description = post_view['description']
-        view_count = str_to_int(post_view['externalView'])
+        description = post_view.get('description')
+        view_count = str_to_int(post_view.get('externalView'))
          thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
  
          return {
              '_type': 'url_transparent',
-            'url': youtube_id,
-            'ie_key': 'Youtube',
+            'url': source_url,
+            'ie_key': ie_key,
              'id': video_id,
              'display_id': display_id,
              'title': title,
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py

index 6b2f3f55a60d19ff3b4735027a399b6c38ad1310..b97f62fdb839f4cdb395c3bf0fa152ec8eace0ca 100644 (file)
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@@ -1,64 +1,134 @@
  # encoding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .brightcove import BrightcoveIE
  from .common import InfoExtractor
  from ..utils import ExtractorError
+from ..compat import (
+    compat_str,
+    compat_urllib_request,
+)
+
+
+class NownessBaseIE(InfoExtractor):
+    def _extract_url_result(self, post):
+        if post['type'] == 'video':
+            for media in post['media']:
+                if media['type'] == 'video':
+                    video_id = media['content']
+                    source = media['source']
+                    if source == 'brightcove':
+                        player_code = self._download_webpage(
+                            'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
+                            note='Downloading player JavaScript',
+                            errnote='Unable to download player JavaScript')
+                        bc_url = BrightcoveIE._extract_brightcove_url(player_code)
+                        if bc_url is None:
+                            raise ExtractorError('Could not find player definition')
+                        return self.url_result(bc_url, 'Brightcove')
+                    elif source == 'vimeo':
+                        return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+                    elif source == 'youtube':
+                        return self.url_result(video_id, 'Youtube')
+                    elif source == 'cinematique':
+                        # youtube-dl currently doesn't support cinematique
+                        # return self.url_result('http://cinematique.com/embed/%s' % video_id, 'Cinematique')
+                        pass
  
+    def _api_request(self, url, request_path):
+        display_id = self._match_id(url)
+        request = compat_urllib_request.Request(
+            'http://api.nowness.com/api/' + request_path % display_id,
+            headers={
+                'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
+            })
+        return display_id, self._download_json(request, display_id)
  
-class NownessIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
  
-    _TESTS = [
-        {
-            'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
-            'md5': '068bc0202558c2e391924cb8cc470676',
-            'info_dict': {
-                'id': '2520295746001',
-                'ext': 'mp4',
-                'title': 'Candor: The Art of Gesticulation',
-                'description': 'Candor: The Art of Gesticulation',
-                'thumbnail': 're:^https?://.*\.jpg',
-                'uploader': 'Nowness',
-            }
+class NownessIE(NownessBaseIE):
+    IE_NAME = 'nowness'
+    _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/(?:story|(?:series|category)/[^/]+)/(?P<id>[^/]+?)(?:$|[?#])'
+    _TESTS = [{
+        'url': 'https://www.nowness.com/story/candor-the-art-of-gesticulation',
+        'md5': '068bc0202558c2e391924cb8cc470676',
+        'info_dict': {
+            'id': '2520295746001',
+            'ext': 'mp4',
+            'title': 'Candor: The Art of Gesticulation',
+            'description': 'Candor: The Art of Gesticulation',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'uploader': 'Nowness',
          },
-        {
-            'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
-            'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
-            'info_dict': {
-                'id': '3716354522001',
-                'ext': 'mp4',
-                'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
-                'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
-                'thumbnail': 're:^https?://.*\.jpg',
-                'uploader': 'Nowness',
-            }
+    }, {
+        'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
+        'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
+        'info_dict': {
+            'id': '3716354522001',
+            'ext': 'mp4',
+            'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+            'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'uploader': 'Nowness',
          },
-    ]
+    }, {
+        # vimeo
+        'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
+        'md5': '9a5a6a8edf806407e411296ab6bc2a49',
+        'info_dict': {
+            'id': '130020913',
+            'ext': 'mp4',
+            'title': 'Bleu, Blanc, Rouge - A Godard Supercut',
+            'description': 'md5:f0ea5f1857dffca02dbd37875d742cec',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'upload_date': '20150607',
+            'uploader': 'Cinema Sem Lei',
+            'uploader_id': 'cinemasemlei',
+        },
+    }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('slug')
+        _, post = self._api_request(url, 'post/getBySlug/%s')
+        return self._extract_url_result(post)
  
-        webpage = self._download_webpage(url, video_id)
-        player_url = self._search_regex(
-            r'"([^"]+/content/issue-[0-9.]+.js)"', webpage, 'player URL')
-        real_id = self._search_regex(
-            r'\sdata-videoId="([0-9]+)"', webpage, 'internal video ID')
  
-        player_code = self._download_webpage(
-            player_url, video_id,
-            note='Downloading player JavaScript',
-            errnote='Player download failed')
-        player_code = player_code.replace("'+d+'", real_id)
+class NownessPlaylistIE(NownessBaseIE):
+    IE_NAME = 'nowness:playlist'
+    _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/playlist/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://www.nowness.com/playlist/3286/i-guess-thats-why-they-call-it-the-blues',
+        'info_dict': {
+            'id': '3286',
+        },
+        'playlist_mincount': 8,
+    }
  
-        bc_url = BrightcoveIE._extract_brightcove_url(player_code)
-        if bc_url is None:
-            raise ExtractorError('Could not find player definition')
-        return {
-            '_type': 'url',
-            'url': bc_url,
-            'ie_key': 'Brightcove',
-        }
+    def _real_extract(self, url):
+        playlist_id, playlist = self._api_request(url, 'post?PlaylistId=%s')
+        entries = [self._extract_url_result(item) for item in playlist['items']]
+        return self.playlist_result(entries, playlist_id)
+
+
+class NownessSeriesIE(NownessBaseIE):
+    IE_NAME = 'nowness:series'
+    _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/series/(?P<id>[^/]+?)(?:$|[?#])'
+    _TEST = {
+        'url': 'https://www.nowness.com/series/60-seconds',
+        'info_dict': {
+            'id': '60',
+            'title': '60 Seconds',
+            'description': 'One-minute wisdom in a new NOWNESS series',
+        },
+        'playlist_mincount': 4,
+    }
+
+    def _real_extract(self, url):
+        display_id, series = self._api_request(url, 'series/getBySlug/%s')
+        entries = [self._extract_url_result(post) for post in series['posts']]
+        series_title = None
+        series_description = None
+        translations = series.get('translations', [])
+        if translations:
+            series_title = translations[0].get('title') or translations[0]['seoTitle']
+            series_description = translations[0].get('seoDescription')
+        return self.playlist_result(
+            entries, compat_str(series['id']), series_title, series_description)
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py

index 66520c2c56f102e53fe84c5e9970558b4714247b..ccc88cfb1a8657ec4266a03bfa07c8aa0e598cd6 100644 (file)
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
  from .common import InfoExtractor
  from ..compat import compat_urllib_parse_unquote
  from ..utils import (
+    ExtractorError,
      unified_strdate,
      int_or_none,
      qualities,
@@ -28,6 +29,7 @@ class OdnoklassnikiIE(InfoExtractor):
              'like_count': int,
              'age_limit': 0,
          },
+        'skip': 'Video has been blocked',
      }, {
          # metadataUrl
          'url': 'http://ok.ru/video/63567059965189-0',
@@ -72,6 +74,12 @@ class OdnoklassnikiIE(InfoExtractor):
          webpage = self._download_webpage(
              'http://ok.ru/video/%s' % video_id, video_id)
  
+        error = self._search_regex(
+            r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
+            webpage, 'error', default=None)
+        if error:
+            raise ExtractorError(error, expected=True)
+
          player = self._parse_json(
              unescapeHTML(self._search_regex(
                  r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
diff --git a/youtube_dl/extractor/openfilm.py b/youtube_dl/extractor/openfilm.py

deleted file mode 100644 (file)

index d2ceedd..0000000
--- a/youtube_dl/extractor/openfilm.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
-from ..utils import (
-    parse_iso8601,
-    parse_age_limit,
-    int_or_none,
-)
-
-
-class OpenFilmIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P<id>.+)'
-    _TEST = {
-        'url': 'http://www.openfilm.com/videos/human-resources-remastered',
-        'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37',
-        'info_dict': {
-            'id': '32736',
-            'display_id': 'human-resources-remastered',
-            'ext': 'mp4',
-            'title': 'Human Resources (Remastered)',
-            'description': 'Social Engineering in the 20th Century.',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'duration': 7164,
-            'timestamp': 1334756988,
-            'upload_date': '20120418',
-            'uploader_id': '41117',
-            'view_count': int,
-            'age_limit': 0,
-        },
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
-
-        player = compat_urllib_parse_unquote_plus(
-            self._og_search_video_url(webpage))
-
-        video = json.loads(self._search_regex(
-            r'\bp=({.+?})(?:&|$)', player, 'video JSON'))
-
-        video_url = '%s1.mp4' % video['location']
-        video_id = video.get('video_id')
-        display_id = video.get('alias') or display_id
-        title = video.get('title')
-        description = video.get('description')
-        thumbnail = video.get('main_thumb')
-        duration = int_or_none(video.get('duration'))
-        timestamp = parse_iso8601(video.get('dt_published'), ' ')
-        uploader_id = video.get('user_id')
-        view_count = int_or_none(video.get('views_count'))
-        age_limit = parse_age_limit(video.get('age_limit'))
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'url': video_url,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'timestamp': timestamp,
-            'uploader_id': uploader_id,
-            'view_count': view_count,
-            'age_limit': age_limit,
-        }
diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py

index bdc71017bc2b1ca12bf1fe70afb7bda95d9b8a2b..6d138ef25d2d5cec02a012f5a06af085a6c35d26 100644 (file)
--- a/youtube_dl/extractor/playwire.py
+++ b/youtube_dl/extractor/playwire.py
@@ -19,7 +19,7 @@ class PlaywireIE(InfoExtractor):
              'id': '3353705',
              'ext': 'mp4',
              'title': 'S04_RM_UCL_Rus',
-            'thumbnail': 're:^http://.*\.png$',
+            'thumbnail': 're:^https?://.*\.png$',
              'duration': 145.94,
          },
      }, {
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

index 7b0cdc41a3da8a84d1b0dcfb962cf99ca8e95532..a656ad85a1d5a6b7773e98c2a9be585803898db1 100644 (file)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -20,7 +20,7 @@ from ..aes import (
  
  
  class PornHubIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
+    _VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
      _TESTS = [{
          'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
          'md5': '882f488fa1f0026f023f33576004a2ed',
@@ -34,6 +34,9 @@ class PornHubIE(InfoExtractor):
      }, {
          'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
          'only_matching': True,
+    }, {
+        'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
+        'only_matching': True,
      }]
  
      @classmethod
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py

index 1631faf29f61c9cc15bca99394966c1917ca1a08..7ff1d06c46aacfa67a0134c878d635dce598028f 100644 (file)
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -5,6 +5,7 @@ import re
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
+    compat_urlparse,
  )
  from ..utils import (
      parse_duration,
@@ -72,6 +73,18 @@ class RaiIE(InfoExtractor):
                  'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
                  'uploader': 'RaiTre',
              }
+        },
+        {
+            'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
+            'md5': '037104d2c14132887e5e4cf114569214',
+            'info_dict': {
+                'id': '0c7a664b-d0f4-4b2c-8835-3f82e46f433e',
+                'ext': 'flv',
+                'title': 'Il pacco',
+                'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
+                'uploader': 'RaiTre',
+                'upload_date': '20141221',
+            },
          }
      ]
  
@@ -90,11 +103,14 @@ class RaiIE(InfoExtractor):
          relinker_url = self._extract_relinker_url(webpage)
  
          if not relinker_url:
-            iframe_path = self._search_regex(
-                r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
+            iframe_url = self._search_regex(
+                [r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
+                 r'drawMediaRaiTV\(["\'](.+?)["\']'],
                  webpage, 'iframe')
+            if not iframe_url.startswith('http'):
+                iframe_url = compat_urlparse.urljoin(url, iframe_url)
              webpage = self._download_webpage(
-                '%s/%s' % (host, iframe_path), video_id)
+                iframe_url, video_id)
              relinker_url = self._extract_relinker_url(webpage)
  
          relinker = self._download_json(
diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py

index 6e9903d5edea3d377aad5554630dbaa1f52d2c6c..f76fb12c045391815df80e1f0905e2723ea5f649 100644 (file)
--- a/youtube_dl/extractor/shahid.py
+++ b/youtube_dl/extractor/shahid.py
@@ -16,7 +16,7 @@ class ShahidIE(InfoExtractor):
          'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
          'info_dict': {
              'id': '90574',
-            'ext': 'm3u8',
+            'ext': 'mp4',
              'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
              'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
              'duration': 2972,
@@ -81,7 +81,7 @@ class ShahidIE(InfoExtractor):
                  compat_urllib_parse.urlencode({
                      'apiKey': 'sh@hid0nlin3',
                      'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
-                }).encode('utf-8')),
+                })),
              video_id, 'Downloading video JSON')
  
          video = video[api_vars['playerType']]
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index ed5dcc0d39a0bf014d8650f3df4110749ae4d649..2b60d354a15b19f2ae72a24ec793399f9bab0744 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -113,7 +113,7 @@ class SoundcloudIE(InfoExtractor):
          },
      ]
  
-    _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
+    _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
      _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
  
      def report_resolve(self, video_id):
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py

index ae94f055ca9a55ac43dd22c54754d6872dc0f571..2c8e9b9410f09f23349584e7945bed64e5dd3a76 100644 (file)
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@@ -1,24 +1,51 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-from .mitele import MiTeleIE
+import json
  
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_parse_unquote,
+    compat_urlparse,
+)
+from ..utils import (
+    get_element_by_attribute,
+    parse_duration,
+    strip_jsonp,
+)
  
-class TelecincoIE(MiTeleIE):
-    IE_NAME = 'telecinco.es'
-    _VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html'
+
+class TelecincoIE(InfoExtractor):
+    IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
+    _VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
  
      _TESTS = [{
          'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
+        'md5': '5cbef3ad5ef17bf0d21570332d140729',
          'info_dict': {
              'id': 'MDSVID20141015_0058',
              'ext': 'mp4',
              'title': 'Con Martín Berasategui, hacer un bacalao al ...',
              'duration': 662,
          },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
+    }, {
+        'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
+        'md5': '0a5b9f3cc8b074f50a0578f823a12694',
+        'info_dict': {
+            'id': 'MDSVID20150916_0128',
+            'ext': 'mp4',
+            'title': '¿Quién es este ex futbolista con el que hablan ...',
+            'duration': 79,
+        },
+    }, {
+        'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
+        'md5': 'ad1bfaaba922dd4a295724b05b68f86a',
+        'info_dict': {
+            'id': 'MDSVID20150513_0220',
+            'ext': 'mp4',
+            'title': '#DOYLACARA. Con la trata no hay trato',
+            'duration': 50,
          },
      }, {
          'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
@@ -27,3 +54,41 @@ class TelecincoIE(MiTeleIE):
          'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
          'only_matching': True,
      }]
+
+    def _real_extract(self, url):
+        episode = self._match_id(url)
+        webpage = self._download_webpage(url, episode)
+        embed_data_json = self._search_regex(
+            r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
+        ).replace('\'', '"')
+        embed_data = json.loads(embed_data_json)
+
+        domain = embed_data['mediaUrl']
+        if not domain.startswith('http'):
+            # only happens in telecinco.es videos
+            domain = 'http://' + domain
+        info_url = compat_urlparse.urljoin(
+            domain,
+            compat_urllib_parse_unquote(embed_data['flashvars']['host'])
+        )
+        info_el = self._download_xml(info_url, episode).find('./video/info')
+
+        video_link = info_el.find('videoUrl/link').text
+        token_query = compat_urllib_parse.urlencode({'id': video_link})
+        token_info = self._download_json(
+            embed_data['flashvars']['ov_tk'] + '?' + token_query,
+            episode,
+            transform_source=strip_jsonp
+        )
+        formats = self._extract_m3u8_formats(
+            token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
+
+        return {
+            'id': embed_data['videoId'],
+            'display_id': episode,
+            'title': info_el.find('title').text,
+            'formats': formats,
+            'description': get_element_by_attribute('class', 'text', webpage),
+            'thumbnail': info_el.find('thumb').text,
+            'duration': parse_duration(info_el.find('duration').text),
+        }
diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py

index 84fe71aef8501dcd9055dc517ae9097844d99a5d..5f7ac4b35b6c4576e5fb0998d56eb21ec522b58a 100644 (file)
--- a/youtube_dl/extractor/tudou.py
+++ b/youtube_dl/extractor/tudou.py
@@ -2,14 +2,12 @@
  
  from __future__ import unicode_literals
  
-import re
-import json
-
  from .common import InfoExtractor
+from ..compat import compat_str
  
  
  class TudouIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/([^/]+/)*(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
      _TESTS = [{
          'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
          'md5': '140a49ed444bd22f93330985d8475fcb',
@@ -27,41 +25,41 @@ class TudouIE(InfoExtractor):
              'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
              'thumbnail': 're:^https?://.*\.jpg$',
          }
+    }, {
+        'url': 'http://www.tudou.com/albumplay/cJAHGih4yYg.html',
+        'only_matching': True,
      }]
  
      _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
  
-    def _url_for_id(self, id, quality=None):
-        info_url = "http://v2.tudou.com/f?id=" + str(id)
+    def _url_for_id(self, video_id, quality=None):
+        info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
          if quality:
              info_url += '&hd' + quality
-        webpage = self._download_webpage(info_url, id, "Opening the info webpage")
-        final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url')
+        xml_data = self._download_xml(info_url, video_id, "Opening the info XML page")
+        final_url = xml_data.text
          return final_url
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
-        if m and m.group(1):
-            return {
-                '_type': 'url',
-                'url': 'youku:' + m.group(1),
-                'ie_key': 'Youku'
-            }
+        youku_vcode = self._search_regex(
+            r'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None)
+        if youku_vcode:
+            return self.url_result('youku:' + youku_vcode, ie='Youku')
  
          title = self._search_regex(
-            r",kw:\s*['\"](.+?)[\"']", webpage, 'title')
+            r',kw\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'title')
          thumbnail_url = self._search_regex(
-            r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)
+            r',pic\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False)
  
          player_url = self._search_regex(
-            r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']",
+            r'playerUrl\s*:\s*[\'"]([^\'"]+\.swf)[\'"]',
              webpage, 'player URL', default=self._PLAYER_URL)
  
-        segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
-        segments = json.loads(segs_json)
+        segments = self._parse_json(self._search_regex(
+            r'segs: \'([^\']+)\'', webpage, 'segments'), video_id)
          # It looks like the keys are the arguments that have to be passed as
          # the hd field in the request url, we pick the higher
          # Also, filter non-number qualities (see issue #3643).
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py

index 157bb74fe1a6ff164d7d14b1e942236197051686..078d283b23b42a8b2c668fbd40caebde0763581c 100644 (file)
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -1,10 +1,12 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
+from ..compat import compat_HTTPError
  from ..utils import (
+    ExtractorError,
      int_or_none,
      float_or_none,
-    str_to_int,
+    parse_iso8601,
  )
  
  
@@ -12,18 +14,41 @@ class VidmeIE(InfoExtractor):
      _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
      _TESTS = [{
          'url': 'https://vid.me/QNB',
-        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+        'md5': 'c62f1156138dc3323902188c5b5a8bd6',
          'info_dict': {
              'id': 'QNB',
              'ext': 'mp4',
              'title': 'Fishing for piranha - the easy way',
              'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
-            'duration': 119.92,
+            'thumbnail': 're:^https?://.*\.jpg',
              'timestamp': 1406313244,
              'upload_date': '20140725',
+            'age_limit': 0,
+            'duration': 119.92,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
+        },
+    }, {
+        'url': 'https://vid.me/Gc6M',
+        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+        'info_dict': {
+            'id': 'Gc6M',
+            'ext': 'mp4',
+            'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
              'thumbnail': 're:^https?://.*\.jpg',
+            'timestamp': 1441211642,
+            'upload_date': '20150902',
+            'uploader': 'SunshineM',
+            'uploader_id': '3552827',
+            'age_limit': 0,
+            'duration': 223.72,
              'view_count': int,
              'like_count': int,
+            'comment_count': int,
+        },
+        'params': {
+            'skip_download': True,
          },
      }, {
          # tests uploader field
@@ -33,63 +58,95 @@ class VidmeIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'The Carver',
              'description': 'md5:e9c24870018ae8113be936645b93ba3c',
-            'duration': 97.859999999999999,
+            'thumbnail': 're:^https?://.*\.jpg',
              'timestamp': 1433203629,
              'upload_date': '20150602',
              'uploader': 'Thomas',
-            'thumbnail': 're:^https?://.*\.jpg',
+            'uploader_id': '109747',
+            'age_limit': 0,
+            'duration': 97.859999999999999,
              'view_count': int,
              'like_count': int,
+            'comment_count': int,
          },
          'params': {
              'skip_download': True,
          },
      }, {
-        # From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
+        # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
          'url': 'https://vid.me/e/Wmur',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'Wmur',
+            'ext': 'mp4',
+            'title': 'naked smoking & stretching',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'timestamp': 1430931613,
+            'upload_date': '20150506',
+            'uploader': 'naked-yogi',
+            'uploader_id': '1638622',
+            'age_limit': 18,
+            'duration': 653.26999999999998,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
      }]
  
      def _real_extract(self, url):
-        url = url.replace('vid.me/e/', 'vid.me/')
          video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
  
-        video_url = self._html_search_regex(
-            r'<source src="([^"]+)"', webpage, 'video URL')
+        try:
+            response = self._download_json(
+                'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+                response = self._parse_json(e.cause.read(), video_id)
+            else:
+                raise
+
+        error = response.get('error')
+        if error:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error), expected=True)
  
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage, default='')
-        thumbnail = self._og_search_thumbnail(webpage)
-        timestamp = int_or_none(self._og_search_property(
-            'updated_time', webpage, fatal=False))
-        width = int_or_none(self._og_search_property(
-            'video:width', webpage, fatal=False))
-        height = int_or_none(self._og_search_property(
-            'video:height', webpage, fatal=False))
-        duration = float_or_none(self._html_search_regex(
-            r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
-        view_count = str_to_int(self._html_search_regex(
-            r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?',
-            webpage, 'view count', fatal=False))
-        like_count = str_to_int(self._html_search_regex(
-            r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
-            webpage, 'like count', fatal=False))
-        uploader = self._html_search_regex(
-            'class="video_author_username"[^>]*>([^<]+)',
-            webpage, 'uploader', default=None)
+        video = response['video']
+
+        formats = [{
+            'format_id': f.get('type'),
+            'url': f['uri'],
+            'width': int_or_none(f.get('width')),
+            'height': int_or_none(f.get('height')),
+            'preference': 0 if f.get('type', '').endswith('clip') else 1,
+        } for f in video.get('formats', []) if f.get('uri')]
+        self._sort_formats(formats)
+
+        title = video['title']
+        description = video.get('description')
+        thumbnail = video.get('thumbnail_url')
+        timestamp = parse_iso8601(video.get('date_created'), ' ')
+        uploader = video.get('user', {}).get('username')
+        uploader_id = video.get('user', {}).get('user_id')
+        age_limit = 18 if video.get('nsfw') is True else 0
+        duration = float_or_none(video.get('duration'))
+        view_count = int_or_none(video.get('view_count'))
+        like_count = int_or_none(video.get('likes_count'))
+        comment_count = int_or_none(video.get('comment_count'))
  
          return {
              'id': video_id,
-            'url': video_url,
              'title': title,
              'description': description,
              'thumbnail': thumbnail,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'age_limit': age_limit,
              'timestamp': timestamp,
-            'width': width,
-            'height': height,
              'duration': duration,
              'view_count': view_count,
              'like_count': like_count,
-            'uploader': uploader,
+            'comment_count': comment_count,
+            'formats': formats,
          }
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py

index 15377097e658b20e75a08f19b370be3bef2158c7..c76c20614e49b7468234aed68a985f6476fe1d0a 100644 (file)
--- a/youtube_dl/extractor/vier.py
+++ b/youtube_dl/extractor/vier.py
@@ -2,6 +2,7 @@
  from __future__ import unicode_literals
  
  import re
+import itertools
  
  from .common import InfoExtractor
  
@@ -91,31 +92,27 @@ class VierVideosIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          program = mobj.group('program')
  
-        webpage = self._download_webpage(url, program)
-
          page_id = mobj.group('page')
          if page_id:
              page_id = int(page_id)
              start_page = page_id
-            last_page = start_page + 1
              playlist_id = '%s-page%d' % (program, page_id)
          else:
              start_page = 0
-            last_page = int(self._search_regex(
-                r'videos\?page=(\d+)">laatste</a>',
-                webpage, 'last page', default=0)) + 1
              playlist_id = program
  
          entries = []
-        for current_page_id in range(start_page, last_page):
+        for current_page_id in itertools.count(start_page):
              current_page = self._download_webpage(
                  'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
                  program,
-                'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
+                'Downloading page %d' % (current_page_id + 1))
              page_entries = [
                  self.url_result('http://www.vier.be' + video_url, 'Vier')
                  for video_url in re.findall(
                      r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
              entries.extend(page_entries)
+            if page_id or '>Meer<' not in current_page:
+                break
  
          return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py

index cda02ba24b7cc01c317bf5d67ced5f231cfd2a80..632e57fb4277dbf23c3fa21683f27c70d278f996 100644 (file)
--- a/youtube_dl/extractor/viewster.py
+++ b/youtube_dl/extractor/viewster.py
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
  
  from .common import InfoExtractor
  from ..compat import (
+    compat_HTTPError,
      compat_urllib_request,
      compat_urllib_parse,
      compat_urllib_parse_unquote,
  )
  from ..utils import (
      determine_ext,
+    ExtractorError,
      int_or_none,
      parse_iso8601,
      HEADRequest,
@@ -16,14 +18,14 @@ from ..utils import (
  
  
  class ViewsterIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
+    _VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
      _TESTS = [{
          # movie, Type=Movie
          'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
-        'md5': '14d3cfffe66d57b41ae2d9c873416f01',
+        'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
          'info_dict': {
              'id': '1140-11855-000',
-            'ext': 'flv',
+            'ext': 'mp4',
              'title': 'The listening Project',
              'description': 'md5:bac720244afd1a8ea279864e67baa071',
              'timestamp': 1214870400,
@@ -33,10 +35,10 @@ class ViewsterIE(InfoExtractor):
      }, {
          # series episode, Type=Episode
          'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
-        'md5': 'd5434c80fcfdb61651cc2199a88d6ba3',
+        'md5': '9243079a8531809efe1b089db102c069',
          'info_dict': {
              'id': '1284-19427-001',
-            'ext': 'flv',
+            'ext': 'mp4',
              'title': 'The World and a Wall',
              'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
              'timestamp': 1428192000,
@@ -61,6 +63,14 @@ class ViewsterIE(InfoExtractor):
              'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
          },
          'playlist_mincount': 16,
+    }, {
+        # geo restricted series
+        'url': 'https://www.viewster.com/serie/1280-18794-002/',
+        'only_matching': True,
+    }, {
+        # geo restricted video
+        'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
+        'only_matching': True,
      }]
  
      _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
@@ -74,8 +84,8 @@ class ViewsterIE(InfoExtractor):
      def _real_extract(self, url):
          video_id = self._match_id(url)
          # Get 'api_token' cookie
-        self._request_webpage(HEADRequest(url), video_id)
-        cookies = self._get_cookies(url)
+        self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id)
+        cookies = self._get_cookies('http://www.viewster.com/')
          self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
  
          info = self._download_json(
@@ -85,10 +95,16 @@ class ViewsterIE(InfoExtractor):
          entry_id = info.get('Id') or info['id']
  
          # unfinished serie has no Type
-        if info.get('Type') in ['Serie', None]:
-            episodes = self._download_json(
-                'https://public-api.viewster.com/series/%s/episodes' % entry_id,
-                video_id, 'Downloading series JSON')
+        if info.get('Type') in ('Serie', None):
+            try:
+                episodes = self._download_json(
+                    'https://public-api.viewster.com/series/%s/episodes' % entry_id,
+                    video_id, 'Downloading series JSON')
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                    self.raise_geo_restricted()
+                else:
+                    raise
              entries = [
                  self.url_result(
                      'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
@@ -98,7 +114,7 @@ class ViewsterIE(InfoExtractor):
              return self.playlist_result(entries, video_id, title, description)
  
          formats = []
-        for media_type in ('application/f4m+xml', 'application/x-mpegURL'):
+        for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
              media = self._download_json(
                  'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
                  % (entry_id, compat_urllib_parse.quote(media_type)),
@@ -120,9 +136,22 @@ class ViewsterIE(InfoExtractor):
                      fatal=False  # m3u8 sometimes fail
                  ))
              else:
-                formats.append({
+                format_id = media.get('Bitrate')
+                f = {
                      'url': video_url,
-                })
+                    'format_id': 'mp4-%s' % format_id,
+                    'height': int_or_none(media.get('Height')),
+                    'width': int_or_none(media.get('Width')),
+                    'preference': 1,
+                }
+                if format_id and not f['height']:
+                    f['height'] = int_or_none(self._search_regex(
+                        r'^(\d+)[pP]$', format_id, 'height', default=None))
+                formats.append(f)
+
+        if not formats and not info.get('LanguageSets') and not info.get('VODSettings'):
+            self.raise_geo_restricted()
+
          self._sort_formats(formats)
  
          synopsis = info.get('Synopsis', {})
diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py

index 72eb010f8d1e480e37f133545ad6f8e1a64fef3f..ec8b999983f6ae89a3bf53909e9d70a463f87f52 100644 (file)
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@@ -19,25 +19,25 @@ class WashingtonPostIE(InfoExtractor):
              'title': 'Sinkhole of bureaucracy',
          },
          'playlist': [{
-            'md5': '79132cc09ec5309fa590ae46e4cc31bc',
+            'md5': 'b9be794ceb56c7267d410a13f99d801a',
              'info_dict': {
                  'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
                  'ext': 'mp4',
                  'title': 'Breaking Points: The Paper Mine',
-                'duration': 1287,
+                'duration': 1290,
                  'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
                  'uploader': 'The Washington Post',
                  'timestamp': 1395527908,
                  'upload_date': '20140322',
              },
          }, {
-            'md5': 'e1d5734c06865cc504ad99dc2de0d443',
+            'md5': '1fff6a689d8770966df78c8cb6c8c17c',
              'info_dict': {
                  'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
                  'ext': 'mp4',
                  'title': 'The town bureaucracy sustains',
                  'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
-                'duration': 2217,
+                'duration': 2220,
                  'timestamp': 1395528005,
                  'upload_date': '20140322',
                  'uploader': 'The Washington Post',
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py

index 97315750fd59198022a72eca53482efdd8d98e75..8938c0e4561d2c6163158e409106dd2e928e85bf 100644 (file)
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -63,7 +63,9 @@ class XHamsterIE(InfoExtractor):
          mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
          webpage = self._download_webpage(mrss_url, video_id)
  
-        title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
+        title = self._html_search_regex(
+            [r'<title>(?P<title>.+?)(?:, (?:[^,]+? )?Porn: xHamster| - xHamster\.com)</title>',
+             r'<h1>([^<]+)</h1>'], webpage, 'title')
  
          # Only a few videos have an description
          mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py

index 5aac8adb36e2ad12e798cb4f0c77e5b204c7b91b..8bbac54e2959c095cfee08ab0d8908b7e4248f42 100644 (file)
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@@ -19,7 +19,7 @@ class XuiteIE(InfoExtractor):
      _TESTS = [{
          # Audio
          'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
-        'md5': '63a42c705772aa53fd4c1a0027f86adf',
+        'md5': 'e79284c87b371424885448d11f6398c8',
          'info_dict': {
              'id': '3860914',
              'ext': 'mp3',
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index b8579b57307915a1575a9f3dcc6a4933a1b0d2bb..fca5ddc6971ee1743cd97f3741d0fe3d6443622f 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -144,6 +144,17 @@ class YahooIE(InfoExtractor):
          }, {
              'url': 'https://tw.news.yahoo.com/-100120367.html',
              'only_matching': True,
+        }, {
+            # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
+            'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
+            'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
+            'info_dict': {
+                'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
+                'ext': 'mp4',
+                'title': 'Communitary - Community Episode 1: Ladders',
+                'description': 'md5:8fc39608213295748e1e289807838c97',
+                'duration': 1646,
+            },
          }
      ]
  
@@ -171,6 +182,19 @@ class YahooIE(InfoExtractor):
          if nbc_sports_url:
              return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
  
+        # Query result is often embedded in webpage as JSON. Sometimes explicit requests
+        # to video API results in a failure with geo restriction reason therefore using
+        # embedded query result when present sounds reasonable.
+        config_json = self._search_regex(
+            r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
+            webpage, 'videoplayer applet', default=None)
+        if config_json:
+            config = self._parse_json(config_json, display_id, fatal=False)
+            if config:
+                sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
+                if sapi:
+                    return self._extract_info(display_id, sapi, webpage)
+
          items_json = self._search_regex(
              r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
              default=None)
@@ -190,22 +214,10 @@ class YahooIE(InfoExtractor):
              video_id = info['id']
          return self._get_info(video_id, display_id, webpage)
  
-    def _get_info(self, video_id, display_id, webpage):
-        region = self._search_regex(
-            r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
-            webpage, 'region', fatal=False, default='US')
-        data = compat_urllib_parse.urlencode({
-            'protocol': 'http',
-            'region': region,
-        })
-        query_url = (
-            'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
-            '{id}?{data}'.format(id=video_id, data=data))
-        query_result = self._download_json(
-            query_url, display_id, 'Downloading video info')
-
-        info = query_result['query']['results']['mediaObj'][0]
+    def _extract_info(self, display_id, query, webpage):
+        info = query['query']['results']['mediaObj'][0]
          meta = info.get('meta')
+        video_id = info.get('id')
  
          if not meta:
              msg = info['status'].get('msg')
@@ -231,6 +243,9 @@ class YahooIE(InfoExtractor):
                      'ext': 'flv',
                  })
              else:
+                if s.get('format') == 'm3u8_playlist':
+                    format_info['protocol'] = 'm3u8_native'
+                    format_info['ext'] = 'mp4'
                  format_url = compat_urlparse.urljoin(host, path)
                  format_info['url'] = format_url
              formats.append(format_info)
@@ -264,6 +279,21 @@ class YahooIE(InfoExtractor):
              'subtitles': subtitles,
          }
  
+    def _get_info(self, video_id, display_id, webpage):
+        region = self._search_regex(
+            r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
+            webpage, 'region', fatal=False, default='US')
+        data = compat_urllib_parse.urlencode({
+            'protocol': 'http',
+            'region': region,
+        })
+        query_url = (
+            'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
+            '{id}?{data}'.format(id=video_id, data=data))
+        query_result = self._download_json(
+            query_url, display_id, 'Downloading video info')
+        return self._extract_info(display_id, query_result, webpage)
+
  
  class YahooSearchIE(SearchInfoExtractor):
      IE_DESC = 'Yahoo screen search'
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 97ce365505f246176891a6d486f38a3f92e6f711..b252e36e1162406dedfcc531d7d038e6bd357348 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1654,12 +1654,15 @@ class YoutubeChannelIE(InfoExtractor):
          channel_page = self._download_webpage(
              url + '?view=57', channel_id,
              'Downloading channel page', fatal=False)
-        channel_playlist_id = self._html_search_meta(
-            'channelId', channel_page, 'channel id', default=None)
-        if not channel_playlist_id:
-            channel_playlist_id = self._search_regex(
-                r'data-channel-external-id="([^"]+)"',
-                channel_page, 'channel id', default=None)
+        if channel_page is False:
+            channel_playlist_id = False
+        else:
+            channel_playlist_id = self._html_search_meta(
+                'channelId', channel_page, 'channel id', default=None)
+            if not channel_playlist_id:
+                channel_playlist_id = self._search_regex(
+                    r'data-channel-external-id="([^"]+)"',
+                    channel_page, 'channel id', default=None)
          if channel_playlist_id and channel_playlist_id.startswith('UC'):
              playlist_id = 'UU' + channel_playlist_id[2:]
              return self.url_result(
@@ -1970,6 +1973,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
              annotation_id=annotation_[^&]+|
              x-yt-cl=[0-9]+|
              hl=[^&]*|
+            t=[0-9]+
          )?
          |
              attribution_link\?a=[^&]+
@@ -1992,6 +1996,9 @@ class YoutubeTruncatedURLIE(InfoExtractor):
      }, {
          'url': 'https://www.youtube.com/watch?hl=en-GB',
          'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/watch?t=2372',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py

index 1f723908be8d4ff0247affc5aed9ffc44e777602..4f320e124ae8fcd44f44e2753e73d2408d03efd8 100644 (file)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -135,7 +135,10 @@ class FFmpegPostProcessor(PostProcessor):
  
          files_cmd = []
          for path in input_paths:
-            files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
+            files_cmd.extend([
+                encodeArgument('-i'),
+                encodeFilename(self._ffmpeg_filename_argument(path), True)
+            ])
          cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
                 files_cmd +
                 [encodeArgument(o) for o in opts] +
@@ -155,10 +158,10 @@ class FFmpegPostProcessor(PostProcessor):
          self.run_ffmpeg_multiple_files([path], out_path, opts)
  
      def _ffmpeg_filename_argument(self, fn):
-        # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
-        if fn.startswith('-'):
-            return './' + fn
-        return fn
+        # Always use 'file:' because the filename may contain ':' (ffmpeg
+        # interprets that as a protocol) or can start with '-' (-- is broken in
+        # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
+        return 'file:' + fn
  
  
  class FFmpegExtractAudioPP(FFmpegPostProcessor):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 206dd56bcc6da0278543c54f0066df25f50842de..1dc3153fd901e8fef6241e747f863a52a221e12d 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -619,7 +619,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
      # expected HTTP responses to meet HTTP/1.0 or later (see also
      # https://github.com/rg3/youtube-dl/issues/6727)
      if sys.version_info < (3, 0):
-        kwargs['strict'] = True
+        kwargs[b'strict'] = True
      hc = http_class(*args, **kwargs)
      source_address = ydl_handler._params.get('source_address')
      if source_address is not None:
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 6bc689b7516ef4979edb3ff63bf34ab11d9c0ace..7ef4f275562c578a194757a3fa3ee7ee8dfd95da 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.09.03'
+__version__ = '2015.09.22'
author	Yen Chi Hsuan <yan12125@gmail.com>
	Fri, 25 Sep 2015 09:02:10 +0000 (17:02 +0800)
committer	Yen Chi Hsuan <yan12125@gmail.com>
	Fri, 25 Sep 2015 09:02:10 +0000 (17:02 +0800)
AUTHORS		patch \| blob \| history
README.md		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
youtube_dl/compat.py		patch \| blob \| history
youtube_dl/downloader/hls.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/arte.py		patch \| blob \| history
youtube_dl/extractor/bbc.py		patch \| blob \| history
youtube_dl/extractor/clubic.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/condenast.py		patch \| blob \| history
youtube_dl/extractor/divxstage.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/hostingbulk.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/iconosquare.py		patch \| blob \| history
youtube_dl/extractor/iqiyi.py		patch \| blob \| history
youtube_dl/extractor/mitele.py		patch \| blob \| history
youtube_dl/extractor/ndr.py		patch \| blob \| history
youtube_dl/extractor/nfl.py		patch \| blob \| history
youtube_dl/extractor/nhl.py		patch \| blob \| history
youtube_dl/extractor/ninegag.py		patch \| blob \| history
youtube_dl/extractor/nowness.py		patch \| blob \| history
youtube_dl/extractor/odnoklassniki.py		patch \| blob \| history
youtube_dl/extractor/openfilm.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/playwire.py		patch \| blob \| history
youtube_dl/extractor/pornhub.py		patch \| blob \| history
youtube_dl/extractor/rai.py		patch \| blob \| history
youtube_dl/extractor/shahid.py		patch \| blob \| history
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/telecinco.py		patch \| blob \| history
youtube_dl/extractor/tudou.py		patch \| blob \| history
youtube_dl/extractor/vidme.py		patch \| blob \| history
youtube_dl/extractor/vier.py		patch \| blob \| history
youtube_dl/extractor/viewster.py		patch \| blob \| history
youtube_dl/extractor/washingtonpost.py		patch \| blob \| history
youtube_dl/extractor/xhamster.py		patch \| blob \| history
youtube_dl/extractor/xuite.py		patch \| blob \| history
youtube_dl/extractor/yahoo.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/postprocessor/ffmpeg.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history