Merge branch 'radiojavan' of https://github.com/mtp1376/youtube-dl into mtp1376-radio...

author Sergey M․ <dstftw@gmail.com>

Sat, 4 Apr 2015 10:10:17 +0000 (16:10 +0600)

committer Sergey M․ <dstftw@gmail.com>

Sat, 4 Apr 2015 10:10:17 +0000 (16:10 +0600)
author Sergey M․ <dstftw@gmail.com>
Sat, 4 Apr 2015 10:10:17 +0000 (16:10 +0600)
committer Sergey M․ <dstftw@gmail.com>
Sat, 4 Apr 2015 10:10:17 +0000 (16:10 +0600)
diff --git a/AUTHORS b/AUTHORS

index 59f1b5f21345b95622d4c21af7354f355dc782f8..48769320a6dbbdfd86b89c4adecfe7fdeef3d94e 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -118,3 +118,5 @@ Devin J. Pohly
  Eduardo Ferro Aldama
  Jeff Buchbinder
  Amish Bhadeshia
+Joram Schrijver
+Will W.
diff --git a/Makefile b/Makefile

index c6c76274f995a85185290d35868b974c13240aa2..fdb1abb60cacfe49295a7438e3d0f4f51c248359 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
  
  clean:
         rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
-       find -name "*.pyc" -delete
+       find . -name "*.pyc" -delete
  
  PREFIX ?= /usr/local
  BINDIR ?= $(PREFIX)/bin
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py

index 6a5bd9eda333246c47064bf84cfc03da09de4caf..7a219ebe97c555be79a55cf4dc30bf2cb823ca28 100644 (file)
--- a/devscripts/check-porn.py
+++ b/devscripts/check-porn.py
@@ -28,7 +28,7 @@ for test in get_testcases():
      if METHOD == 'EURISTIC':
          try:
              webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
-        except:
+        except Exception:
              print('\nFail: {0}'.format(test['name']))
              continue
  
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index baf7b3880fd6180f19cdf44f04a9d38d2e0d4062..2785b958706978b004349e452f16aea8a70c4df7 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -2,6 +2,8 @@
   - **1tv**: Первый канал
   - **1up.com**
   - **220.ro**
+ - **22tracks:genre**
+ - **22tracks:track**
   - **24video**
   - **3sat**
   - **4tube**
@@ -109,6 +111,7 @@
   - **DctpTv**
   - **DeezerPlaylist**
   - **defense.gouv.fr**
+ - **DHM**: Filmarchiv - Deutsches Historisches Museum
   - **Discovery**
   - **divxstage**: DivxStage
   - **Dotsub**
@@ -118,6 +121,7 @@
   - **DrTuber**
   - **DRTV**
   - **Dump**
+ - **Dumpert**
   - **dvtv**: http://video.aktualne.cz/
   - **EaglePlatform**
   - **EbaumsWorld**
@@ -251,6 +255,7 @@
   - **Mgoon**
   - **Minhateca**
   - **MinistryGrid**
+ - **miomio.tv**
   - **mitele.es**
   - **mixcloud**
   - **MLB**
@@ -284,6 +289,8 @@
   - **NBA**
   - **NBC**
   - **NBCNews**
+ - **NBCSports**
+ - **NBCSportsVPlayer**
   - **ndr**: NDR.de - Mediathek
   - **NDTV**
   - **NerdCubedFeed**
@@ -380,6 +387,8 @@
   - **rutube:movie**: Rutube movies
   - **rutube:person**: Rutube person videos
   - **RUTV**: RUTV.RU
+ - **safari**: safaribooksonline.com online video
+ - **safari:course**: safaribooksonline.com online courses
   - **Sandia**: Sandia National Laboratories
   - **Sapo**: SAPO Vídeos
   - **savefrom.net**
@@ -497,9 +506,11 @@
   - **Urort**: NRK P3 Urørt
   - **ustream**
   - **ustream:channel**
+ - **Varzesh3**
   - **Vbox7**
   - **VeeHD**
   - **Veoh**
+ - **Vessel**
   - **Vesti**: Вести.Ru
   - **Vevo**
   - **VGTV**
@@ -588,7 +599,7 @@
   - **youtube:show**: YouTube.com (multi-season) shows
   - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
   - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
   - **Zapiks**
   - **ZDF**
   - **ZDFChannel**
diff --git a/test/test_all_urls.py b/test/test_all_urls.py

index 6ae168b7f472938a3a69344fac123e047478ee73..a9db42b300864180c10dca730f772f7f5a26aad8 100644 (file)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -59,7 +59,7 @@ class TestAllURLsMatching(unittest.TestCase):
          self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
  
      def test_youtube_feeds(self):
-        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
+        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
          self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
          self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
          self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
diff --git a/test/test_utils.py b/test/test_utils.py

index a8ab876850d3645eb615d552162128befdac316d..abaf1ab7331eab044c684b592bd5bfb85f4d39e0 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -200,6 +200,8 @@ class TestUtil(unittest.TestCase):
  
      def test_unescape_html(self):
          self.assertEqual(unescapeHTML('%20;'), '%20;')
+        self.assertEqual(unescapeHTML('&#x2F;'), '/')
+        self.assertEqual(unescapeHTML('&#47;'), '/')
          self.assertEqual(
              unescapeHTML('&eacute;'), 'é')
  
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index b5ef5e009f972cb9308bf5c09c561ce393c05a2a..ce4b72fd3191f05d8035eba5c34fff342affae82 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1701,10 +1701,10 @@ class YoutubeDL(object):
              out = out.decode().strip()
              if re.match('[0-9a-f]+', out):
                  self._write_string('[debug] Git HEAD: ' + out + '\n')
-        except:
+        except Exception:
              try:
                  sys.exc_clear()
-            except:
+            except Exception:
                  pass
          self._write_string('[debug] Python version %s - %s\n' % (
              platform.python_version(), platform_name()))
@@ -1768,6 +1768,14 @@ class YoutubeDL(object):
  
          debuglevel = 1 if self.params.get('debug_printtraffic') else 0
          https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
+        # The ssl context is only available in python 2.7.9 and 3.x
+        if hasattr(https_handler, '_context'):
+            ctx = https_handler._context
+            # get_ca_certs is unavailable prior to python 3.4
+            if hasattr(ctx, 'get_ca_certs') and len(ctx.get_ca_certs()) == 0:
+                self.report_warning(
+                    'No ssl certificates were loaded, urls that use https '
+                    'won\'t work')
          ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
          opener = compat_urllib_request.build_opener(
              proxy_handler, https_handler, cookie_processor, ydlh)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 852b2fc3db24b85138b44e7a5cf8f4338d787ce7..1c8b411b7f037d4bce2face086f0743f4f23003a 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -189,10 +189,6 @@ def _real_main(argv=None):
      if opts.allsubtitles and not opts.writeautomaticsub:
          opts.writesubtitles = True
  
-    if sys.version_info < (3,):
-        # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
-        if opts.outtmpl is not None:
-            opts.outtmpl = opts.outtmpl.decode(preferredencoding())
      outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
                 (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
                 (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py

index b2bf149ef63ffb1c31bfb02f976f5cce2dbadad3..973bcd32074107f70c1b781e95b97ef34501b88f 100644 (file)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -389,7 +389,7 @@ else:
                  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
              out, err = sp.communicate()
              lines, columns = map(int, out.split())
-        except:
+        except Exception:
              pass
          return _terminal_size(columns, lines)
  
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py

index 8ed5c19a6b8079c9807faa4fa7a505a398974975..a0fc5ead06a4e8adba7886d2a4087d28ca9cdbcb 100644 (file)
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -204,7 +204,7 @@ class FileDownloader(object):
              return
          try:
              os.utime(filename, (time.time(), filetime))
-        except:
+        except Exception:
              pass
          return filetime
  
@@ -318,7 +318,7 @@ class FileDownloader(object):
          )
  
          continuedl_and_exists = (
-            self.params.get('continuedl', False) and
+            self.params.get('continuedl', True) and
              os.path.isfile(encodeFilename(filename)) and
              not self.params.get('nopart', False)
          )
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py

index 4047d7167478b34c9cacc79dcbbfeee1bb31d317..d136bebd1fe45761312bd90c31a95ddaf1754271 100644 (file)
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
  
          open_mode = 'wb'
          if resume_len != 0:
-            if self.params.get('continuedl', False):
+            if self.params.get('continuedl', True):
                  self.report_resuming_byte(resume_len)
                  request.add_header('Range', 'bytes=%d-' % resume_len)
                  open_mode = 'ab'
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py

index 89e98ae61e128c80eab5b0e04109b1baa2ecff7e..ddf5724ae6f05259194c67473bfd212ddeb896ff 100644 (file)
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):
          protocol = info_dict.get('rtmp_protocol', None)
          real_time = info_dict.get('rtmp_real_time', False)
          no_resume = info_dict.get('no_resume', False)
-        continue_dl = info_dict.get('continuedl', False)
+        continue_dl = info_dict.get('continuedl', True)
  
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index df4a7419a21cb28e826ab77da1bc4799da3ff93e..d7e8138be6fb9b6891c020b69a83ffe225f619ae 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -106,6 +106,7 @@ from .dbtv import DBTVIE
  from .dctp import DctpTvIE
  from .deezer import DeezerPlaylistIE
  from .dfb import DFBIE
+from .dhm import DHMIE
  from .dotsub import DotsubIE
  from .douyutv import DouyuTVIE
  from .dreisat import DreiSatIE
@@ -114,6 +115,7 @@ from .drtuber import DrTuberIE
  from .drtv import DRTVIE
  from .dvtv import DVTVIE
  from .dump import DumpIE
+from .dumpert import DumpertIE
  from .defense import DefenseGouvFrIE
  from .discovery import DiscoveryIE
  from .divxstage import DivxStageIE
@@ -274,6 +276,7 @@ from .metacritic import MetacriticIE
  from .mgoon import MgoonIE
  from .minhateca import MinhatecaIE
  from .ministrygrid import MinistryGridIE
+from .miomio import MioMioIE
  from .mit import TechTVMITIE, MITIE, OCWMITIE
  from .mitele import MiTeleIE
  from .mixcloud import MixcloudIE
@@ -309,6 +312,8 @@ from .nba import NBAIE
  from .nbc import (
      NBCIE,
      NBCNewsIE,
+    NBCSportsIE,
+    NBCSportsVPlayerIE,
  )
  from .ndr import NDRIE
  from .ndtv import NDTVIE
@@ -421,6 +426,10 @@ from .rutube import (
  )
  from .rutv import RUTVIE
  from .sandia import SandiaIE
+from .safari import (
+    SafariIE,
+    SafariCourseIE,
+)
  from .sapo import SapoIE
  from .savefrom import SaveFromIE
  from .sbs import SBSIE
@@ -553,6 +562,7 @@ from .varzesh3 import Varzesh3IE
  from .vbox7 import Vbox7IE
  from .veehd import VeeHDIE
  from .veoh import VeohIE
+from .vessel import VesselIE
  from .vesti import VestiIE
  from .vevo import VevoIE
  from .vgtv import VGTVIE
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py

index 4a88ccd13caf604f3ea892c6784d603434fb06ee..0dca29b712c79a27fb621f094a6f64ab503ba3df 100644 (file)
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -6,32 +6,39 @@ from .common import InfoExtractor
  
  
  class BloombergIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
+    _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
  
      _TEST = {
-        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
          # The md5 checksum changes
          'info_dict': {
              'id': 'qurhIVlJSB6hzkVi229d8g',
              'ext': 'flv',
              'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
-            'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
+            'description': 'md5:a8ba0302912d03d246979735c17d2761',
          },
      }
  
      def _real_extract(self, url):
          name = self._match_id(url)
          webpage = self._download_webpage(url, name)
-
-        f4m_url = self._search_regex(
-            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
-            'f4m url')
+        video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
          title = re.sub(': Video$', '', self._og_search_title(webpage))
  
+        embed_info = self._download_json(
+            'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+        formats = []
+        for stream in embed_info['streams']:
+            if stream["muxing_format"] == "TS":
+                formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
+            else:
+                formats.extend(self._extract_f4m_formats(stream['url'], video_id))
+        self._sort_formats(formats)
+
          return {
-            'id': name.split('-')[-1],
+            'id': video_id,
              'title': title,
-            'formats': self._extract_f4m_formats(f4m_url, name),
+            'formats': formats,
              'description': self._og_search_description(webpage),
              'thumbnail': self._og_search_thumbnail(webpage),
          }
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py

index 90ea074387ef6afe4aaa87a41c13ec6cf5a1aa7b..0a77e951c52ebe9b3a1307203e31d0658c2c89cc 100644 (file)
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -12,7 +12,7 @@ from ..utils import (
  
  class CNNIE(InfoExtractor):
      _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
-        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln|ktvk)(?:-ap)?|(?=&)))'''
  
      _TESTS = [{
          'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
@@ -45,6 +45,9 @@ class CNNIE(InfoExtractor):
              'description': 'md5:e7223a503315c9f150acac52e76de086',
              'upload_date': '20141222',
          }
+    }, {
+        'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py

index 4f67c3aacc5dec5410b76d08728eecaf429f222b..47d58330b6cfd5b1df32374e9debb11a89347554 100644 (file)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
      def _build_request(url):
          """Build a request with the family filter disabled"""
          request = compat_urllib_request.Request(url)
-        request.add_header('Cookie', 'family_filter=off')
-        request.add_header('Cookie', 'ff=off')
+        request.add_header('Cookie', 'family_filter=off; ff=off')
          return request
  
  
@@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
              video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
  
          embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
-        embed_page = self._download_webpage(embed_url, video_id,
-                                            'Downloading embed page')
+        embed_request = self._build_request(embed_url)
+        embed_page = self._download_webpage(
+            embed_request, video_id, 'Downloading embed page')
          info = self._search_regex(r'var info = ({.*?}),$', embed_page,
                                    'video info', flags=re.MULTILINE)
          info = json.loads(info)
diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py

new file mode 100644 (file)

index 0000000..3ed1f16
--- /dev/null
+++ b/youtube_dl/extractor/dhm.py
@@ -0,0 +1,73 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    parse_duration,
+)
+
+
+class DHMIE(InfoExtractor):
+    IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+    _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+    _TESTS = [{
+        'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+        'md5': '11c475f670209bf6acca0b2b7ef51827',
+        'info_dict': {
+            'id': 'the-marshallplan-at-work-in-west-germany',
+            'ext': 'flv',
+            'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+            'description': 'md5:1fabd480c153f97b07add61c44407c82',
+            'duration': 660,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+        'md5': '09890226332476a3e3f6f2cb74734aa5',
+        'info_dict': {
+            'id': 'rolle-1',
+            'ext': 'flv',
+            'title': 'ROLLE 1',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        playlist_url = self._search_regex(
+            r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+        playlist = self._download_xml(playlist_url, video_id)
+
+        track = playlist.find(
+            './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
+
+        video_url = xpath_text(
+            track, './{http://xspf.org/ns/0/}location',
+            'video url', fatal=True)
+        thumbnail = xpath_text(
+            track, './{http://xspf.org/ns/0/}image',
+            'thumbnail')
+
+        title = self._search_regex(
+            [r'dc:title="([^"]+)"', r'<title> &raquo;([^<]+)</title>'],
+            webpage, 'title').strip()
+        description = self._html_search_regex(
+            r'<p><strong>Description:</strong>(.+?)</p>',
+            webpage, 'description', default=None)
+        duration = parse_duration(self._search_regex(
+            r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+            webpage, 'duration', default=None))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py

index d7956e6e4a20d733947f70a2933e06c2f0d144de..479430c51072ab91e976df4d459af372c5608cdd 100644 (file)
--- a/youtube_dl/extractor/douyutv.py
+++ b/youtube_dl/extractor/douyutv.py
@@ -1,19 +1,23 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
+import hashlib
+import time
  from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (ExtractorError, unescapeHTML)
+from ..compat import (compat_str, compat_basestring)
  
  
  class DouyuTVIE(InfoExtractor):
      _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.douyutv.com/iseven',
          'info_dict': {
-            'id': 'iseven',
+            'id': '17732',
+            'display_id': 'iseven',
              'ext': 'flv',
              'title': 're:^清晨醒脑！T-ara根本停不下来！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': 'md5:9e525642c25a0a24302869937cf69d17',
+            'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
              'thumbnail': 're:^https?://.*\.jpg$',
              'uploader': '7师傅',
              'uploader_id': '431925',
@@ -22,22 +26,52 @@ class DouyuTVIE(InfoExtractor):
          'params': {
              'skip_download': True,
          }
-    }
+    }, {
+        'url': 'http://www.douyutv.com/85982',
+        'info_dict': {
+            'id': '85982',
+            'display_id': '85982',
+            'ext': 'flv',
+            'title': 're:^小漠从零单排记！——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'douyu小漠',
+            'uploader_id': '3769985',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
+        if video_id.isdigit():
+            room_id = video_id
+        else:
+            page = self._download_webpage(url, video_id)
+            room_id = self._html_search_regex(
+                r'"room_id"\s*:\s*(\d+),', page, 'room id')
+
+        prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
+            room_id, int(time.time()))
+
+        auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
          config = self._download_json(
-            'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
+            'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
+            video_id)
  
          data = config['data']
  
          error_code = config.get('error', 0)
-        show_status = data.get('show_status')
          if error_code is not 0:
-            raise ExtractorError(
-                'Server reported error %i' % error_code, expected=True)
+            error_desc = 'Server reported error %i' % error_code
+            if isinstance(data, (compat_str, compat_basestring)):
+                error_desc += ': ' + data
+            raise ExtractorError(error_desc, expected=True)
  
+        show_status = data.get('show_status')
          # 1 = live, 2 = offline
          if show_status == '2':
              raise ExtractorError(
@@ -46,7 +80,7 @@ class DouyuTVIE(InfoExtractor):
          base_url = data['rtmp_url']
          live_path = data['rtmp_live']
  
-        title = self._live_title(data['room_name'])
+        title = self._live_title(unescapeHTML(data['room_name']))
          description = data.get('show_details')
          thumbnail = data.get('room_src')
  
@@ -66,7 +100,8 @@ class DouyuTVIE(InfoExtractor):
          self._sort_formats(formats)
  
          return {
-            'id': video_id,
+            'id': room_id,
+            'display_id': video_id,
              'title': title,
              'description': description,
              'thumbnail': thumbnail,
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py

new file mode 100644 (file)

index 0000000..e43bc81
--- /dev/null
+++ b/youtube_dl/extractor/dumpert.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..utils import qualities
+
+
+class DumpertIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+    _TEST = {
+        'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+        'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+        'info_dict': {
+            'id': '6646981/951bc60f',
+            'ext': 'mp4',
+            'title': 'Ik heb nieuws voor je',
+            'description': 'Niet schrikken hoor',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        files_base64 = self._search_regex(
+            r'data-files="([^"]+)"', webpage, 'data files')
+
+        files = self._parse_json(
+            base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
+            video_id)
+
+        quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+
+        formats = [{
+            'url': video_url,
+            'format_id': format_id,
+            'quality': quality(format_id),
+        } for format_id, video_url in files.items() if format_id != 'still']
+        self._sort_formats(formats)
+
+        title = self._html_search_meta(
+            'title', webpage) or self._og_search_title(webpage)
+        description = self._html_search_meta(
+            'description', webpage) or self._og_search_description(webpage)
+        thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats
+        }
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py

index 79e2fbd394681283e07a7146bc51f39a7499324d..0cbca90b061cf2358600146f37f6da5b61d71709 100644 (file)
--- a/youtube_dl/extractor/eroprofile.py
+++ b/youtube_dl/extractor/eroprofile.py
@@ -1,11 +1,17 @@
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import ExtractorError
  
  
  class EroProfileIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
-    _TEST = {
+    _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+    _NETRC_MACHINE = 'eroprofile'
+    _TESTS = [{
          'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
          'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
          'info_dict': {
@@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor):
              'thumbnail': 're:https?://.*\.jpg',
              'age_limit': 18,
          }
-    }
+    }, {
+        'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+        'md5': '1baa9602ede46ce904c431f5418d8916',
+        'info_dict': {
+            'id': '1133519',
+            'ext': 'm4v',
+            'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+            'thumbnail': 're:https?://.*\.jpg',
+            'age_limit': 18,
+        },
+        'skip': 'Requires login',
+    }]
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        query = compat_urllib_parse.urlencode({
+            'username': username,
+            'password': password,
+            'url': 'http://www.eroprofile.com/',
+        })
+        login_url = self._LOGIN_URL + query
+        login_page = self._download_webpage(login_url, None, False)
+
+        m = re.search(r'Your username or password was incorrect\.', login_page)
+        if m:
+            raise ExtractorError(
+                'Wrong username and/or password.', expected=True)
+
+        self.report_login()
+        redirect_url = self._search_regex(
+            r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+        self._download_webpage(redirect_url, None, False)
+
+    def _real_initialize(self):
+        self._login()
  
      def _real_extract(self, url):
          display_id = self._match_id(url)
  
          webpage = self._download_webpage(url, display_id)
  
+        m = re.search(r'You must be logged in to view this video\.', webpage)
+        if m:
+            raise ExtractorError(
+                'This video requires login. Please specify a username and password and try again.', expected=True)
+
          video_id = self._search_regex(
              [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
              webpage, 'video id', default=None)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 8a49b0b54a86329bd821817dead195c768000246..2ff002643c9e4404b3427f4f309c187186ccc37a 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -29,6 +29,7 @@ from ..utils import (
      xpath_text,
  )
  from .brightcove import BrightcoveIE
+from .nbc import NBCSportsVPlayerIE
  from .ooyala import OoyalaIE
  from .rutv import RUTVIE
  from .smotri import SmotriIE
@@ -620,6 +621,16 @@ class GenericIE(InfoExtractor):
                  'age_limit': 0,
              },
          },
+        # 5min embed
+        {
+            'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+            'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+            'info_dict': {
+                'id': '518726732',
+                'ext': 'mp4',
+                'title': 'Facebook Creates "On This Day" | Crunch Report',
+            },
+        },
          # RSS feed with enclosure
          {
              'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
@@ -629,6 +640,16 @@ class GenericIE(InfoExtractor):
                  'upload_date': '20150228',
                  'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
              }
+        },
+        # NBC Sports vplayer embed
+        {
+            'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+            'info_dict': {
+                'id': 'ln7x1qSThw4k',
+                'ext': 'flv',
+                'title': "PFT Live: New leader in the 'new-look' defense",
+                'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+            },
          }
      ]
  
@@ -1236,6 +1257,17 @@ class GenericIE(InfoExtractor):
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'Pladform')
  
+        # Look for 5min embeds
+        mobj = re.search(
+            r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+        if mobj is not None:
+            return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+        # Look for NBC Sports VPlayer embeds
+        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+        if nbc_sports_url:
+            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
          def check_video(vurl):
              if YoutubeIE.suitable(vurl):
                  return True
diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py

new file mode 100644 (file)

index 0000000..cc3f271
--- /dev/null
+++ b/youtube_dl/extractor/miomio.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    int_or_none,
+)
+
+
+class MioMioIE(InfoExtractor):
+    IE_NAME = 'miomio.tv'
+    _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.miomio.tv/watch/cc179734/',
+        'md5': '48de02137d0739c15b440a224ad364b9',
+        'info_dict': {
+            'id': '179734',
+            'ext': 'flv',
+            'title': '手绘动漫鬼泣但丁全程画法',
+            'duration': 354,
+        },
+    }, {
+        'url': 'http://www.miomio.tv/watch/cc184024/',
+        'info_dict': {
+            'id': '43729',
+            'title': '《动漫同人插画绘制》',
+        },
+        'playlist_mincount': 86,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta(
+            'description', webpage, 'title', fatal=True)
+
+        mioplayer_path = self._search_regex(
+            r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
+
+        xml_config = self._search_regex(
+            r'flashvars="type=sina&amp;(.+?)&amp;',
+            webpage, 'xml config')
+
+        # skipping the following page causes lags and eventually connection drop-outs
+        self._request_webpage(
+            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
+            video_id)
+
+        # the following xml contains the actual configuration information on the video file(s)
+        vid_config = self._download_xml(
+            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
+            video_id)
+
+        http_headers = {
+            'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
+        }
+
+        entries = []
+        for f in vid_config.findall('./durl'):
+            segment_url = xpath_text(f, 'url', 'video url')
+            if not segment_url:
+                continue
+            order = xpath_text(f, 'order', 'order')
+            segment_id = video_id
+            segment_title = title
+            if order:
+                segment_id += '-%s' % order
+                segment_title += ' part %s' % order
+            entries.append({
+                'id': segment_id,
+                'url': segment_url,
+                'title': segment_title,
+                'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
+                'http_headers': http_headers,
+            })
+
+        if len(entries) == 1:
+            segment = entries[0]
+            segment['id'] = video_id
+            segment['title'] = title
+            return segment
+
+        return {
+            '_type': 'multi_video',
+            'id': video_id,
+            'entries': entries,
+            'title': title,
+            'http_headers': http_headers,
+        }
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py

index 21aea0c5513ab30cbe27384351229c42e864fab3..84f29155841007f3088a86470040407073726067 100644 (file)
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):
              r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
          description = self._og_search_description(webpage)
          like_count = str_to_int(self._search_regex(
-            r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
+            r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
              webpage, 'like count', fatal=False))
          view_count = str_to_int(self._search_regex(
              [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py

index 3645d3033f74ae174e3eaa85ad55bbe677d9daba..ecd0ac8b1b501d9ad97261f57a5b0fee1cd68ce7 100644 (file)
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -14,7 +14,7 @@ from ..utils import (
  
  
  class NBCIE(InfoExtractor):
-    _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+    _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
  
      _TESTS = [
          {
@@ -50,6 +50,57 @@ class NBCIE(InfoExtractor):
          return self.url_result(theplatform_url)
  
  
+class NBCSportsVPlayerIE(InfoExtractor):
+    _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+    _TESTS = [{
+        'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
+        'info_dict': {
+            'id': '9CsDKds0kvHI',
+            'ext': 'flv',
+            'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+            'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+        }
+    }, {
+        'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_url(webpage):
+        iframe_m = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+        if iframe_m:
+            return iframe_m.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        theplatform_url = self._og_search_video_url(webpage)
+        return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+    # Does not include https becuase its certificate is invalid
+    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+    _TEST = {
+        'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+        'info_dict': {
+            'id': 'PHJSaFWbrTY9',
+            'ext': 'flv',
+            'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+            'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        return self.url_result(
+            NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
  class NBCNewsIE(InfoExtractor):
      _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
          (?:video/.+?/(?P<id>\d+)|
diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py

index a20672c0cc7fea5309e77bb193b887ced2b8d7d5..46cebc0d7b05080491d5f1d32ee8a709b549debc 100644 (file)
--- a/youtube_dl/extractor/phoenix.py
+++ b/youtube_dl/extractor/phoenix.py
@@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url
  
  
  class PhoenixIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://www.phoenix.de/content/884301',
-        'md5': 'ed249f045256150c92e72dbb70eadec6',
-        'info_dict': {
-            'id': '884301',
-            'ext': 'mp4',
-            'title': 'Michael Krons mit Hans-Werner Sinn',
-            'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
-            'upload_date': '20141025',
-            'uploader': 'Im Dialog',
-        }
-    }
+    _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
+        (?:
+            phoenix/die_sendungen/(?:[^/]+/)?
+        )?
+        (?P<id>[0-9]+)'''
+    _TESTS = [
+        {
+            'url': 'http://www.phoenix.de/content/884301',
+            'md5': 'ed249f045256150c92e72dbb70eadec6',
+            'info_dict': {
+                'id': '884301',
+                'ext': 'mp4',
+                'title': 'Michael Krons mit Hans-Werner Sinn',
+                'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
+                'upload_date': '20141025',
+                'uploader': 'Im Dialog',
+            }
+        },
+        {
+            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
+            'only_matching': True,
+        },
+    ]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py

index 9576aed0e6668189c1959df3166b1e550facc7b0..e766ccca322da0e17389e949cd11fed3a5cb1910 100644 (file)
--- a/youtube_dl/extractor/playfm.py
+++ b/youtube_dl/extractor/playfm.py
@@ -4,85 +4,72 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
+from ..compat import compat_str
  from ..utils import (
      ExtractorError,
-    float_or_none,
      int_or_none,
-    str_to_int,
+    parse_iso8601,
  )
  
  
  class PlayFMIE(InfoExtractor):
      IE_NAME = 'play.fm'
-    _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
  
      _TEST = {
-        'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
+        'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
          'md5': 'c505f8307825a245d0c7ad1850001f22',
          'info_dict': {
-            'id': '137220',
+            'id': '71276',
              'ext': 'mp3',
-            'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
-            'uploader': 'Sven Tasnadi',
-            'uploader_id': 'sventasnadi',
-            'duration': 5627.428,
-            'upload_date': '20140712',
+            'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+            'description': '',
+            'duration': 5627,
+            'timestamp': 1406033781,
+            'upload_date': '20140722',
+            'uploader': 'Dan Drastic',
+            'uploader_id': '71170',
              'view_count': int,
              'comment_count': int,
-            'thumbnail': 're:^https?://.*\.jpg$',
          },
      }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
-        upload_date = mobj.group('upload_date')
-
-        rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
-        req = compat_urllib_request.Request(
-            'http://www.play.fm/flexRead/recording', data=rec_data)
-        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        rec_doc = self._download_xml(req, video_id)
+        slug = mobj.group('slug')
  
-        error_node = rec_doc.find('./error')
-        if error_node is not None:
-            raise ExtractorError('An error occured: %s (code %s)' % (
-                error_node.text, rec_doc.find('./status').text))
+        recordings = self._download_json(
+            'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
  
-        recording = rec_doc.find('./recording')
-        title = recording.find('./title').text
-        view_count = str_to_int(recording.find('./stats/playcount').text)
-        comment_count = str_to_int(recording.find('./stats/comments').text)
-        duration = float_or_none(recording.find('./duration').text, scale=1000)
-        thumbnail = recording.find('./image').text
+        error = recordings.get('error')
+        if isinstance(error, dict):
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error.get('message')),
+                expected=True)
  
-        artist = recording.find('./artists/artist')
-        uploader = artist.find('./name').text
-        uploader_id = artist.find('./slug').text
-
-        video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
-            'http:', recording.find('./url').text,
-            recording.find('./_class').text, recording.find('./file_id').text,
-            rec_doc.find('./uuid').text, video_id,
-            rec_doc.find('./jingle/file_id').text,
-            'http%3A%2F%2Fwww.play.fm%2Fplayer',
-        )
+        audio_url = recordings['audio']
+        video_id = compat_str(recordings.get('id') or video_id)
+        title = recordings['title']
+        description = recordings.get('description')
+        duration = int_or_none(recordings.get('recordingDuration'))
+        timestamp = parse_iso8601(recordings.get('created_at'))
+        uploader = recordings.get('page', {}).get('title')
+        uploader_id = compat_str(recordings.get('page', {}).get('id'))
+        view_count = int_or_none(recordings.get('playCount'))
+        comment_count = int_or_none(recordings.get('commentCount'))
+        categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
  
          return {
              'id': video_id,
-            'url': video_url,
-            'ext': 'mp3',
-            'filesize': int_or_none(recording.find('./size').text),
+            'url': audio_url,
              'title': title,
-            'upload_date': upload_date,
-            'view_count': view_count,
-            'comment_count': comment_count,
+            'description': description,
              'duration': duration,
-            'thumbnail': thumbnail,
+            'timestamp': timestamp,
              'uploader': uploader,
              'uploader_id': uploader_id,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'categories': categories,
          }
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

index 3a27e37890dc78b26af866c9884807c97c56ccb9..0c8b731cf47267568e43ccd09ff21f1683b4d992 100644 (file)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
      }
  
      def _extract_count(self, pattern, webpage, name):
-        count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
-        if count:
-            count = str_to_int(count)
-        return count
+        return str_to_int(self._search_regex(
+            pattern, webpage, '%s count' % name, fatal=False))
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
@@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
          if thumbnail:
              thumbnail = compat_urllib_parse.unquote(thumbnail)
  
-        view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
-        like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
-        dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+        view_count = self._extract_count(
+            r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+        like_count = self._extract_count(
+            r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+        dislike_count = self._extract_count(
+            r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
          comment_count = self._extract_count(
-            r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
  
          video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
          if webpage.find('"encrypted":true') != -1:
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py

index 385681d06e3dda356193d9f89c7ccbdd4cbde453..7cc7996642cae1de1ca2a585391d167025b92162 100644 (file)
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -10,6 +10,7 @@ from ..compat import (
  )
  from ..utils import (
      unified_strdate,
+    int_or_none,
  )
  
  
@@ -24,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor):
              'info_dict': {
                  'id': '2104602',
                  'ext': 'mp4',
-                'title': 'Staffel 2, Episode 18 - Jahresrückblick',
+                'title': 'Episode 18 - Staffel 2',
                  'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
                  'upload_date': '20131231',
                  'duration': 5845.04,
@@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor):
              urls_sources = urls_sources.values()
  
          def fix_bitrate(bitrate):
+            bitrate = int_or_none(bitrate)
+            if not bitrate:
+                return None
              return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
  
          for source in urls_sources:
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py

new file mode 100644 (file)

index 0000000..10251f2
--- /dev/null
+++ b/youtube_dl/extractor/safari.py
@@ -0,0 +1,157 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
+    smuggle_url,
+    std_headers,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+    _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
+    _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
+    _NETRC_MACHINE = 'safari'
+
+    _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+    _API_FORMAT = 'json'
+
+    LOGGED_IN = False
+
+    def _real_initialize(self):
+        # We only need to log in once for courses or individual videos
+        if not self.LOGGED_IN:
+            self._login()
+            SafariBaseIE.LOGGED_IN = True
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            raise ExtractorError(
+                self._ACCOUNT_CREDENTIALS_HINT,
+                expected=True)
+
+        headers = std_headers
+        if 'Referer' not in headers:
+            headers['Referer'] = self._LOGIN_URL
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None,
+            'Downloading login form')
+
+        csrf = self._html_search_regex(
+            r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
+            login_page, 'csrf token')
+
+        login_form = {
+            'csrfmiddlewaretoken': csrf,
+            'email': username,
+            'password1': password,
+            'login': 'Sign In',
+            'next': '',
+        }
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
+        login_page = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+            raise ExtractorError(
+                'Login failed; make sure your credentials are correct and try again.',
+                expected=True)
+
+        self.to_screen('Login successful')
+
+
+class SafariIE(SafariBaseIE):
+    IE_NAME = 'safari'
+    IE_DESC = 'safaribooksonline.com online video'
+    _VALID_URL = r'''(?x)https?://
+                            (?:www\.)?safaribooksonline\.com/
+                                (?:
+                                    library/view/[^/]+|
+                                    api/v1/book
+                                )/
+                                (?P<course_id>\d+)/
+                                    (?:chapter(?:-content)?/)?
+                                (?P<part>part\d+)\.html
+    '''
+
+    _TESTS = [{
+        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+        'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+        'info_dict': {
+            'id': '2842601850001',
+            'ext': 'mp4',
+            'title': 'Introduction',
+        },
+        'skip': 'Requires safaribooksonline account credentials',
+    }, {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        course_id = mobj.group('course_id')
+        part = mobj.group('part')
+
+        webpage = self._download_webpage(
+            '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
+            part)
+
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if not bc_url:
+            raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+
+        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+
+
+class SafariCourseIE(SafariBaseIE):
+    IE_NAME = 'safari:course'
+    IE_DESC = 'safaribooksonline.com online courses'
+
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+
+    _TESTS = [{
+        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+        'info_dict': {
+            'id': '9780133392838',
+            'title': 'Hadoop Fundamentals LiveLessons',
+        },
+        'playlist_count': 22,
+        'skip': 'Requires safaribooksonline account credentials',
+    }, {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        course_id = self._match_id(url)
+
+        course_json = self._download_json(
+            '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+            course_id, 'Downloading course JSON')
+
+        if 'chapters' not in course_json:
+            raise ExtractorError(
+                'No chapters found for course %s' % course_id, expected=True)
+
+        entries = [
+            self.url_result(chapter, 'Safari')
+            for chapter in course_json['chapters']]
+
+        course_title = course_json['title']
+
+        return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py

index 9f79ff5c1b66d2bf37369a6009a914043493b407..0b717a1e42b8dd2c3d8a88d602f001876cf99e03 100644 (file)
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
          page_title = mobj.group('title')
          webpage = self._download_webpage(url, page_title)
          slideshare_obj = self._search_regex(
-            r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
+            r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
              webpage, 'slideshare object')
          info = json.loads(slideshare_obj)
          if info['slideshow']['type'] != 'video':
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index 9d45059723c3450a0774275ad708eed88b6e0fcb..316b2c90f110770299084889552b8137e072a617 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -242,7 +242,7 @@ class SoundcloudIE(InfoExtractor):
  
  
  class SoundcloudSetIE(SoundcloudIE):
-    _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
      IE_NAME = 'soundcloud:set'
      _TESTS = [{
          'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
@@ -287,7 +287,7 @@ class SoundcloudSetIE(SoundcloudIE):
  
  
  class SoundcloudUserIE(SoundcloudIE):
-    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
      IE_NAME = 'soundcloud:user'
      _TESTS = [{
          'url': 'https://soundcloud.com/the-concept-band',
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index 7cb06f351e5b388142b00b51aeba69a1ecfef250..a46a7ecba299c2e4e4e30eb69e7a8cfb3214155d 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -54,7 +54,7 @@ class TeamcocoIE(InfoExtractor):
              embed_url, video_id, 'Downloading embed page')
  
          player_data = self._parse_json(self._search_regex(
-            r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
+            r'Y\.Ginger\.Module\.Player(?:;var\s*player\s*=\s*new\s*m)?\((\{.*?\})\);', embed, 'player data'), video_id)
          data = self._parse_json(
              base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
  
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index feac666f78baff49f4fb312a147acad67d320bc2..0e3e627f495aa5051cb96fe05f180f9790761129 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -92,7 +92,7 @@ class ThePlatformIE(InfoExtractor):
              error_msg = next(
                  n.attrib['abstract']
                  for n in meta.findall(_x('.//smil:ref'))
-                if n.attrib.get('title') == 'Geographic Restriction')
+                if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
          except StopIteration:
              pass
          else:
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py

index 06554a1befefb938796f106b5ac45c75d942dc14..96c809eaf7155290210e0f8b18d3a2c7c948ba97 100644 (file)
--- a/youtube_dl/extractor/ultimedia.py
+++ b/youtube_dl/extractor/ultimedia.py
@@ -42,7 +42,6 @@ class UltimediaIE(InfoExtractor):
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-
          webpage = self._download_webpage(url, video_id)
  
          deliver_url = self._search_regex(
@@ -81,8 +80,8 @@ class UltimediaIE(InfoExtractor):
          title = clean_html((
              self._html_search_regex(
                  r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
-                webpage, 'title', default=None)
-            or self._search_regex(
+                webpage, 'title', default=None) or
+            self._search_regex(
                  r"var\s+nameVideo\s*=\s*'([^']+)'",
                  deliver_page, 'title')))
  
diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py

index eb49586cc2d074915319415ae8a515981184b57d..9369abaf8f7bdfa2b220c39d02f9460dbab711c2 100644 (file)
--- a/youtube_dl/extractor/varzesh3.py
+++ b/youtube_dl/extractor/varzesh3.py
@@ -1,48 +1,45 @@
  # coding: utf-8
  from __future__ import unicode_literals
+
  from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
-import re
  
  
  class Varzesh3IE(InfoExtractor):
-    _VALID_URL = r'(?P<url>(https?://(?:www\.)?video\.varzesh3\.com)/(?P<id>.+))'
-    _TEST ={
+    _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
+    _TEST = {
          'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
          'md5': '2a933874cb7dce4366075281eb49e855',
          'info_dict': {
-            'url': 'http://dl1.video.varzesh3.com/video/clip94/1/video/namayeshi/saves_week26.mp4',
              'id': '76337',
              'ext': 'mp4',
              'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
-            'thumbnail': 'http://video.varzesh3.com/wp-content/uploads/230315_saves_week26.jpg',
              'description': 'فصل ۲۰۱۵-۲۰۱۴',
+            'thumbnail': 're:^https?://.*\.jpg$',
          }
      }
  
      def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_url = self._search_regex(
+            r'<source[^>]+src="([^"]+)"', webpage, 'video url')
  
-        if not 'shortlink' in webpage:
-            raise ExtractorError('URL has no videos or there is a problem.')
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(
+            r'(?s)<div class="matn">(.+?)</div>',
+            webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage)
  
-        title = self._html_search_regex(r'meta[^>]+property="og:title"[^>]+content="([^"]+)"', webpage, 'title')
-        video_link = self._html_search_regex(r'source[^>]+src="([^"]+)"', webpage, 'video_link')
-        vid_id = self._html_search_regex(r"link[^>]+rel='canonical'[^>]+href='\/\?p=([^']+)'\/>", webpage, 'vid_id')
-        try:
-            description = self._html_search_regex(r'<div class="matn">(.*?)</div>', webpage, 'description', flags=re.DOTALL)
-        except:
-            description = title
-        thumbnail = self._html_search_regex(r'link[^>]+rel="image_src"[^>]+href="([^"]+)"', webpage, 'thumbnail')
+        video_id = self._search_regex(
+            r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
+            webpage, display_id, default=display_id)
  
          return {
-            'url': video_link,
-            'id': vid_id,
+            'url': video_url,
+            'id': video_id,
              'title': title,
-            'ext': video_link.split(".")[-1],
              'description': description,
              'thumbnail': thumbnail,
          }
diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py

new file mode 100644 (file)

index 0000000..6215f06
--- /dev/null
+++ b/youtube_dl/extractor/vessel.py
@@ -0,0 +1,127 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+)
+
+
+class VesselIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
+    _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
+    _LOGIN_URL = 'https://www.vessel.com/api/account/login'
+    _NETRC_MACHINE = 'vessel'
+    _TEST = {
+        'url': 'https://www.vessel.com/videos/HDN7G5UMs',
+        'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
+        'info_dict': {
+            'id': 'HDN7G5UMs',
+            'ext': 'mp4',
+            'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20150317',
+            'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
+            'timestamp': int,
+        },
+    }
+
+    @staticmethod
+    def make_json_request(url, data):
+        payload = json.dumps(data).encode('utf-8')
+        req = compat_urllib_request.Request(url, payload)
+        req.add_header('Content-Type', 'application/json; charset=utf-8')
+        return req
+
+    @staticmethod
+    def find_assets(data, asset_type):
+        for asset in data.get('assets', []):
+            if asset.get('type') == asset_type:
+                yield asset
+
+    def _check_access_rights(self, data):
+        access_info = data.get('__view', {})
+        if not access_info.get('allow_access', True):
+            err_code = access_info.get('error_code') or ''
+            if err_code == 'ITEM_PAID_ONLY':
+                raise ExtractorError(
+                    'This video requires subscription.', expected=True)
+            else:
+                raise ExtractorError(
+                    'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+        self.report_login()
+        data = {
+            'client_id': 'web',
+            'type': 'password',
+            'user_key': username,
+            'password': password,
+        }
+        login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
+        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+    def _real_initialize(self):
+        self._login()
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        data = self._parse_json(self._search_regex(
+            r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
+        asset_id = data['model']['data']['id']
+
+        req = VesselIE.make_json_request(
+            self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
+        data = self._download_json(req, video_id)
+
+        self._check_access_rights(data)
+
+        try:
+            video_asset = next(VesselIE.find_assets(data, 'video'))
+        except StopIteration:
+            raise ExtractorError('No video assets found')
+
+        formats = []
+        for f in video_asset.get('sources', []):
+            if f['name'] == 'hls-index':
+                formats.extend(self._extract_m3u8_formats(
+                    f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
+            else:
+                formats.append({
+                    'format_id': f['name'],
+                    'tbr': f.get('bitrate'),
+                    'height': f.get('height'),
+                    'width': f.get('width'),
+                    'url': f['location'],
+                })
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for im_asset in VesselIE.find_assets(data, 'image'):
+            thumbnails.append({
+                'url': im_asset['location'],
+                'width': im_asset.get('width', 0),
+                'height': im_asset.get('height', 0),
+            })
+
+        return {
+            'id': video_id,
+            'title': data['title'],
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'description': data.get('short_description'),
+            'duration': data.get('duration'),
+            'comment_count': data.get('comment_count'),
+            'like_count': data.get('like_count'),
+            'view_count': data.get('view_count'),
+            'timestamp': parse_iso8601(data.get('released_at')),
+        }
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index bd09652cd96340155cc084f6814df4fbecd6f707..28bcc89cd7423dafa40032076d1bd3ad12f4bdcf 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -244,6 +244,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
          # and latter we extract those that are Vimeo specific.
          self.report_extraction(video_id)
  
+        vimeo_config = self._search_regex(
+            r'vimeo\.config\s*=\s*({.+?});', webpage,
+            'vimeo config', default=None)
+        if vimeo_config:
+            seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
+            if seed_status.get('state') == 'failed':
+                raise ExtractorError(
+                    '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+                    expected=True)
+
          # Extract the config JSON
          try:
              try:
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py

index 4971965f9d090cce61a2e8b6d1486fadc873b4dc..81d885fdcee1cf788c217e862629df58f386d73c 100644 (file)
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor):
          'only_matching': True,
      }]
  
+    @staticmethod
+    def base64_decode_utf8(data):
+        return base64.b64decode(data.encode('utf-8')).decode('utf-8')
+
+    @staticmethod
+    def base64_encode_utf8(data):
+        return base64.b64encode(data.encode('utf-8')).decode('utf-8')
+
      def _extract_flv_config(self, media_id):
-        base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+        base64_media_id = self.base64_encode_utf8(media_id)
          flv_config = self._download_xml(
              'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
              'flv config')
          prop_dict = {}
          for prop in flv_config.findall('./property'):
-            prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+            prop_id = self.base64_decode_utf8(prop.attrib['id'])
              # CDATA may be empty in flv config
              if not prop.text:
                  continue
-            encoded_content = base64.b64decode(prop.text).decode('utf-8')
+            encoded_content = self.base64_decode_utf8(prop.text)
              prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
          return prop_dict
  
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index 97dbac4cce53d7fe956b074fddbe40993fd5681f..b777159c5639304edf1433857f626c29299e4bcb 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -17,6 +17,8 @@ from ..utils import (
      int_or_none,
  )
  
+from .nbc import NBCSportsVPlayerIE
+
  
  class YahooIE(InfoExtractor):
      IE_DESC = 'Yahoo screen and movies'
@@ -129,6 +131,15 @@ class YahooIE(InfoExtractor):
          }, {
              'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
              'only_matching': True,
+        }, {
+            'note': 'NBC Sports embeds',
+            'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+            'info_dict': {
+                'id': '9CsDKds0kvHI',
+                'ext': 'flv',
+                'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+                'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+            }
          }
      ]
  
@@ -151,6 +162,10 @@ class YahooIE(InfoExtractor):
                  items = json.loads(items_json)
                  video_id = items[0]['id']
                  return self._get_info(video_id, display_id, webpage)
+        # Look for NBCSports iframes
+        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+        if nbc_sports_url:
+            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
  
          items_json = self._search_regex(
              r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py

index e4c855ee0e3616981c77d41b441f15adf6ce0453..6abe72f739b63d8b39d8cdfc5bfccf70dc545715 100644 (file)
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -52,7 +52,7 @@ class YouPornIE(InfoExtractor):
              webpage, 'JSON parameters')
          try:
              params = json.loads(json_params)
-        except:
+        except ValueError:
              raise ExtractorError('Invalid JSON')
  
          self.report_extraction(video_id)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 27c8c4453773974f52d02ecba58f092221c6e118..5488101e1997d1078c44a73ef0af5487aa3dcea9 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1263,27 +1263,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
  
          return self.playlist_result(url_results, playlist_id, title)
  
-    def _real_extract(self, url):
-        # Extract playlist id
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
-        playlist_id = mobj.group(1) or mobj.group(2)
-
-        # Check if it's a video-specific URL
-        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-        if 'v' in query_dict:
-            video_id = query_dict['v'][0]
-            if self._downloader.params.get('noplaylist'):
-                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
-                return self.url_result(video_id, 'Youtube', video_id=video_id)
-            else:
-                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
-
-        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
-            # Mixes require a custom extraction process
-            return self._extract_mix(playlist_id)
-
+    def _extract_playlist(self, playlist_id):
          url = self._TEMPLATE_URL % playlist_id
          page = self._download_webpage(url, playlist_id)
          more_widget_html = content_html = page
@@ -1327,6 +1307,29 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          url_results = self._ids_to_results(ids)
          return self.playlist_result(url_results, playlist_id, playlist_title)
  
+    def _real_extract(self, url):
+        # Extract playlist id
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError('Invalid URL: %s' % url)
+        playlist_id = mobj.group(1) or mobj.group(2)
+
+        # Check if it's a video-specific URL
+        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        if 'v' in query_dict:
+            video_id = query_dict['v'][0]
+            if self._downloader.params.get('noplaylist'):
+                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+                return self.url_result(video_id, 'Youtube', video_id=video_id)
+            else:
+                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+            # Mixes require a custom extraction process
+            return self._extract_mix(playlist_id)
+
+        return self._extract_playlist(playlist_id)
+
  
  class YoutubeChannelIE(InfoExtractor):
      IE_DESC = 'YouTube.com channels'
@@ -1643,21 +1646,26 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
  
  
  class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+    IE_NAME = 'youtube:recommended'
      IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
      _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
      _FEED_NAME = 'recommended'
      _PLAYLIST_TITLE = 'Youtube Recommended videos'
  
  
-class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+    IE_NAME = 'youtube:watchlater'
      IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
-    _FEED_NAME = 'watch_later'
-    _PLAYLIST_TITLE = 'Youtube Watch Later'
-    _PERSONAL_FEED = True
+    _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
+
+    _TESTS = []  # override PlaylistIE tests
+
+    def _real_extract(self, url):
+        return self._extract_playlist('WL')
  
  
  class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+    IE_NAME = 'youtube:history'
      IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
      _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
      _FEED_NAME = 'history'
diff --git a/youtube_dl/options.py b/youtube_dl/options.py

index 35c7e5fb3507f5e846e166c5f6c15f6cf637498a..8e80e3759410ed18afa3a1c432c53e0785d90b51 100644 (file)
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -13,6 +13,7 @@ from .compat import (
      compat_kwargs,
  )
  from .utils import (
+    preferredencoding,
      write_string,
  )
  from .version import __version__
@@ -797,7 +798,7 @@ def parseOpts(overrideArguments=None):
          # Workaround for Python 2.x, where argv is a byte list
          if sys.version_info < (3,):
              command_line_conf = [
-                a.decode('utf-8', 'replace') for a in command_line_conf]
+                a.decode(preferredencoding(), 'replace') for a in command_line_conf]
  
          if '--ignore-config' in command_line_conf:
              system_conf = []
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py

index b6f51cfd5e1ed5cebb4981dfbf3152e67ed33d1d..0b60ac7e750bc2b308e4c7a8c01c8b988b036450 100644 (file)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
  import io
  import os
  import subprocess
-import sys
  import time
  
  
@@ -117,6 +116,10 @@ class FFmpegPostProcessor(PostProcessor):
      def executable(self):
          return self._paths[self.basename]
  
+    @property
+    def probe_available(self):
+        return self.probe_basename is not None
+
      @property
      def probe_executable(self):
          return self._paths[self.probe_basename]
@@ -169,7 +172,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
  
      def get_audio_codec(self, path):
  
-        if not self.probe_executable:
+        if not self.probe_available:
              raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
          try:
              cmd = [
@@ -269,19 +272,17 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
              else:
                  self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
                  self.run_ffmpeg(path, new_path, acodec, more_opts)
-        except:
-            etype, e, tb = sys.exc_info()
-            if isinstance(e, AudioConversionError):
-                msg = 'audio conversion failed: ' + e.msg
-            else:
-                msg = 'error running ' + self.basename
-            raise PostProcessingError(msg)
+        except AudioConversionError as e:
+            raise PostProcessingError(
+                'audio conversion failed: ' + e.msg)
+        except Exception:
+            raise PostProcessingError('error running ' + self.basename)
  
          # Try to update the date time for extracted audio file.
          if information.get('filetime') is not None:
              try:
                  os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
-            except:
+            except Exception:
                  self._downloader.report_warning('Cannot update utime of audio file')
  
          information['filepath'] = new_path
diff --git a/youtube_dl/update.py b/youtube_dl/update.py

index d8be4049f5dce0fdd9a61f2aff3c4284d494e598..de3169eef1d6ec29d82a60b2f4b6a68f49d7dd4e 100644 (file)
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -65,7 +65,7 @@ def update_self(to_screen, verbose):
      # Check if there is a new version
      try:
          newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
-    except:
+    except Exception:
          if verbose:
              to_screen(compat_str(traceback.format_exc()))
          to_screen('ERROR: can\'t find the current version. Please try again later.')
@@ -78,7 +78,7 @@ def update_self(to_screen, verbose):
      try:
          versions_info = opener.open(JSON_URL).read().decode('utf-8')
          versions_info = json.loads(versions_info)
-    except:
+    except Exception:
          if verbose:
              to_screen(compat_str(traceback.format_exc()))
          to_screen('ERROR: can\'t obtain versions info. Please try again later.')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 472d4df41fda2cb1ffd0392cc4da0f4bdcc2a48a..90e0ed9ab7eb36d4c9f30efffdd2868e9cfe6d47 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -75,7 +75,7 @@ def preferredencoding():
      try:
          pref = locale.getpreferredencoding()
          'TEST'.encode(pref)
-    except:
+    except Exception:
          pref = 'UTF-8'
  
      return pref
@@ -127,7 +127,7 @@ def write_json_file(obj, fn):
              except OSError:
                  pass
          os.rename(tf.name, fn)
-    except:
+    except Exception:
          try:
              os.remove(tf.name)
          except OSError:
@@ -348,7 +348,7 @@ def _htmlentity_transform(entity):
      if entity in compat_html_entities.name2codepoint:
          return compat_chr(compat_html_entities.name2codepoint[entity])
  
-    mobj = re.match(r'#(x?[0-9]+)', entity)
+    mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
      if mobj is not None:
          numstr = mobj.group(1)
          if numstr.startswith('x'):
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 039ceadf2ee938604b7832fb919915f520eb63da..e1c385bece408a11247e0b72f9335f066bd04838 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.03.24'
+__version__ = '2015.04.03'
author	Sergey M․ <dstftw@gmail.com>
	Sat, 4 Apr 2015 10:10:17 +0000 (16:10 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Sat, 4 Apr 2015 10:10:17 +0000 (16:10 +0600)
AUTHORS		patch \| blob \| history
Makefile		patch \| blob \| history
devscripts/check-porn.py		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
test/test_all_urls.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/compat.py		patch \| blob \| history
youtube_dl/downloader/common.py		patch \| blob \| history
youtube_dl/downloader/http.py		patch \| blob \| history
youtube_dl/downloader/rtmp.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/bloomberg.py		patch \| blob \| history
youtube_dl/extractor/cnn.py		patch \| blob \| history
youtube_dl/extractor/dailymotion.py		patch \| blob \| history
youtube_dl/extractor/dhm.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/douyutv.py		patch \| blob \| history
youtube_dl/extractor/dumpert.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/eroprofile.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/miomio.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/mixcloud.py		patch \| blob \| history
youtube_dl/extractor/nbc.py		patch \| blob \| history
youtube_dl/extractor/phoenix.py		patch \| blob \| history
youtube_dl/extractor/playfm.py		patch \| blob \| history
youtube_dl/extractor/pornhub.py		patch \| blob \| history
youtube_dl/extractor/prosiebensat1.py		patch \| blob \| history
youtube_dl/extractor/safari.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/slideshare.py		patch \| blob \| history
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/teamcoco.py		patch \| blob \| history
youtube_dl/extractor/theplatform.py		patch \| blob \| history
youtube_dl/extractor/ultimedia.py		patch \| blob \| history
youtube_dl/extractor/varzesh3.py		patch \| blob \| history
youtube_dl/extractor/vessel.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/xuite.py		patch \| blob \| history
youtube_dl/extractor/yahoo.py		patch \| blob \| history
youtube_dl/extractor/youporn.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/options.py		patch \| blob \| history
youtube_dl/postprocessor/ffmpeg.py		patch \| blob \| history
youtube_dl/update.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history