Merge remote-tracking branch 'naglis/wistia'

author Philipp Hagemeister <phihag@phihag.de>

Thu, 25 Sep 2014 00:03:49 +0000 (02:03 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Thu, 25 Sep 2014 00:03:49 +0000 (02:03 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Thu, 25 Sep 2014 00:03:49 +0000 (02:03 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Thu, 25 Sep 2014 00:03:49 +0000 (02:03 +0200)
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py

index 1fa99f88b595644df5c4ed50e4c134cc15668638..410f9edc297036d7aeb59250c3241dc10bb7cf2d 100644 (file)
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -10,7 +10,6 @@ from test.helper import FakeYDL
  
  
  from youtube_dl.extractor import (
-    YoutubeUserIE,
      YoutubePlaylistIE,
      YoutubeIE,
      YoutubeChannelIE,
@@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):
          self.assertEqual(len(entries), 25)
          self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
  
-    def test_youtube_channel(self):
-        dl = FakeYDL()
-        ie = YoutubeChannelIE(dl)
-        #test paginated channel
-        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
-        self.assertTrue(len(result['entries']) > 90)
-        #test autogenerated channel
-        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
-        self.assertTrue(len(result['entries']) >= 18)
-
-    def test_youtube_user(self):
-        dl = FakeYDL()
-        ie = YoutubeUserIE(dl)
-        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
-        self.assertTrue(len(result['entries']) >= 320)
-
-    def test_youtube_show(self):
-        dl = FakeYDL()
-        ie = YoutubeShowIE(dl)
-        result = ie.extract('http://www.youtube.com/show/airdisasters')
-        self.assertTrue(len(result) >= 3)
-
      def test_youtube_mix(self):
          dl = FakeYDL()
          ie = YoutubePlaylistIE(dl)
@@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):
          entries = result['entries']
          self.assertEqual(len(entries), 100)
  
-    def test_youtube_toplist(self):
-        dl = FakeYDL()
-        ie = YoutubeTopListIE(dl)
-        result = ie.extract('yttoplist:music:Trending')
-        entries = result['entries']
-        self.assertTrue(len(entries) >= 5)
-
-    def test_youtube_search_url(self):
-        dl = FakeYDL()
-        ie = YoutubeSearchURLIE(dl)
-        result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
-        entries = result['entries']
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'youtube-dl test video')
-        self.assertTrue(len(entries) >= 5)
-
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py

index 4ea5811a56f77180afea16b2e5fbc30056850b49..3f941596edd83edda99917b57187485941133e8f 100644 (file)
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
  
  from .common import FileDownloader
  from .hls import HlsFD
+from .hls import NativeHlsFD
  from .http import HttpFD
  from .mplayer import MplayerFD
  from .rtmp import RtmpFD
@@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict):
  
      if url.startswith('rtmp'):
          return RtmpFD
+    if protocol == 'm3u8_native':
+        return NativeHlsFD
      if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
          return HlsFD
      if url.startswith('mms') or url.startswith('rtsp'):
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py

index 9ce97f5fe6c4517ec8b6d681e924bcd387a185cf..f85f0c94e7544b6eabad3d1f4158f88b9e3c981d 100644 (file)
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -42,6 +42,7 @@ class FileDownloader(object):
      Subclasses of this one must re-define the real_download method.
      """
  
+    _TEST_FILE_SIZE = 10241
      params = None
  
      def __init__(self, ydl, params):
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py

index 71353f607daead364acbdad83b18b79e61a5bffa..b3be16ff15353ed057607064bba2faa3fcdc2fad 100644 (file)
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -16,6 +16,7 @@ from ..utils import (
      format_bytes,
      encodeFilename,
      sanitize_open,
+    xpath_text,
  )
  
  
@@ -251,6 +252,8 @@ class F4mFD(FileDownloader):
              # We only download the first fragment
              fragments_list = fragments_list[:1]
          total_frags = len(fragments_list)
+        # For some akamai manifests we'll need to add a query to the fragment url
+        akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
  
          tmpfilename = self.temp_name(filename)
          (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
@@ -290,6 +293,8 @@ class F4mFD(FileDownloader):
          for (seg_i, frag_i) in fragments_list:
              name = 'Seg%d-Frag%d' % (seg_i, frag_i)
              url = base_url + name
+            if akamai_pv:
+                url += '?' + akamai_pv.strip(';')
              frag_filename = '%s-%s' % (tmpfilename, name)
              success = http_dl.download(frag_filename, {'url': url})
              if not success:
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py

index 32852f333a0a6329f48ef2d690555d218d4228c5..56cce281130ee4d8374e4f44582cf61cc8e073f5 100644 (file)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -1,8 +1,13 @@
+from __future__ import unicode_literals
+
  import os
+import re
  import subprocess
  
  from .common import FileDownloader
  from ..utils import (
+    compat_urlparse,
+    compat_urllib_request,
      check_executable,
      encodeFilename,
  )
@@ -43,3 +48,57 @@ class HlsFD(FileDownloader):
              self.to_stderr(u"\n")
              self.report_error(u'%s exited with code %d' % (program, retval))
              return False
+
+
+class NativeHlsFD(FileDownloader):
+    """ A more limited implementation that does not require ffmpeg """
+
+    def real_download(self, filename, info_dict):
+        url = info_dict['url']
+        self.report_destination(filename)
+        tmpfilename = self.temp_name(filename)
+
+        self.to_screen(
+            '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
+        data = self.ydl.urlopen(url).read()
+        s = data.decode('utf-8', 'ignore')
+        segment_urls = []
+        for line in s.splitlines():
+            line = line.strip()
+            if line and not line.startswith('#'):
+                segment_url = (
+                    line
+                    if re.match(r'^https?://', line)
+                    else compat_urlparse.urljoin(url, line))
+                segment_urls.append(segment_url)
+
+        is_test = self.params.get('test', False)
+        remaining_bytes = self._TEST_FILE_SIZE if is_test else None
+        byte_counter = 0
+        with open(tmpfilename, 'wb') as outf:
+            for i, segurl in enumerate(segment_urls):
+                self.to_screen(
+                    '[hlsnative] %s: Downloading segment %d / %d' %
+                    (info_dict['id'], i + 1, len(segment_urls)))
+                seg_req = compat_urllib_request.Request(segurl)
+                if remaining_bytes:
+                    seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
+
+                segment = self.ydl.urlopen(seg_req).read()
+                if remaining_bytes:
+                    segment = segment[:remaining_bytes]
+                    remaining_bytes -= len(segment)
+                outf.write(segment)
+                byte_counter += len(segment)
+                if remaining_bytes <= 0:
+                    break
+
+        self._hook_progress({
+            'downloaded_bytes': byte_counter,
+            'total_bytes': byte_counter,
+            'filename': filename,
+            'status': 'finished',
+        })
+        self.try_rename(tmpfilename, filename)
+        return True
+
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py

index 6caf7451ed99a00511c56cc4a7c6cb7711601ff0..f62555ce0e33353f5eac848e3956b263f9d43bcb 100644 (file)
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -14,8 +14,6 @@ from ..utils import (
  
  
  class HttpFD(FileDownloader):
-    _TEST_FILE_SIZE = 10241
-
      def real_download(self, filename, info_dict):
          url = info_dict['url']
          tmpfilename = self.temp_name(filename)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 625666acbe5c98edeab7278cb4ac31de217a8dfb..1f1fc0eb269d5c34821692193da1c93849ff8276 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -199,6 +199,7 @@ from .malemotion import MalemotionIE
  from .mdr import MDRIE
  from .metacafe import MetacafeIE
  from .metacritic import MetacriticIE
+from .mgoon import MgoonIE
  from .ministrygrid import MinistryGridIE
  from .mit import TechTVMITIE, MITIE, OCWMITIE
  from .mitele import MiTeleIE
@@ -239,6 +240,7 @@ from .ndtv import NDTVIE
  from .newgrounds import NewgroundsIE
  from .newstube import NewstubeIE
  from .nfb import NFBIE
+from .nfl import NFLIE
  from .nhl import NHLIE, NHLVideocenterIE
  from .niconico import NiconicoIE
  from .ninegag import NineGagIE
@@ -248,7 +250,10 @@ from .nosvideo import NosVideoIE
  from .novamov import NovaMovIE
  from .nowness import NownessIE
  from .nowvideo import NowVideoIE
-from .npo import NPOIE
+from .npo import (
+    NPOIE,
+    TegenlichtVproIE,
+)
  from .nrk import (
      NRKIE,
      NRKTVIE,
@@ -361,6 +366,7 @@ from .thisav import ThisAVIE
  from .tinypic import TinyPicIE
  from .tlc import TlcIE, TlcDeIE
  from .tnaflix import TNAFlixIE
+from .thvideo import THVideoIE
  from .toutv import TouTvIE
  from .toypics import ToypicsUserIE, ToypicsIE
  from .traileraddict import TrailerAddictIE
@@ -447,6 +453,7 @@ from .yahoo import (
  from .youjizz import YouJizzIE
  from .youku import YoukuIE
  from .youporn import YouPornIE
+from .yourupload import YourUploadIE
  from .youtube import (
      YoutubeIE,
      YoutubeChannelIE,
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 9c30a1d331064e6efed709a45a5c0755780ea26d..60cab6f4e731df18a7e828d64cf5f471ccda9e25 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -15,6 +15,7 @@ from ..utils import (
      compat_http_client,
      compat_urllib_error,
      compat_urllib_parse_urlparse,
+    compat_urlparse,
      compat_str,
  
      clean_html,
@@ -640,7 +641,9 @@ class InfoExtractor(object):
  
          return formats
  
-    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
+    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+                              entry_protocol='m3u8', preference=None):
+
          formats = [{
              'format_id': 'm3u8-meta',
              'url': m3u8_url,
@@ -651,6 +654,11 @@ class InfoExtractor(object):
              'format_note': 'Quality selection URL',
          }]
  
+        format_url = lambda u: (
+            u
+            if re.match(r'^https?://', u)
+            else compat_urlparse.urljoin(m3u8_url, u))
+
          m3u8_doc = self._download_webpage(m3u8_url, video_id)
          last_info = None
          kv_rex = re.compile(
@@ -667,15 +675,17 @@ class InfoExtractor(object):
                  continue
              else:
                  if last_info is None:
-                    formats.append({'url': line})
+                    formats.append({'url': format_url(line)})
                      continue
                  tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
  
                  f = {
                      'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
-                    'url': line.strip(),
+                    'url': format_url(line.strip()),
                      'tbr': tbr,
                      'ext': ext,
+                    'protocol': entry_protocol,
+                    'preference': preference,
                  }
                  codecs = last_info.get('CODECS')
                  if codecs:
diff --git a/youtube_dl/extractor/divxstage.py b/youtube_dl/extractor/divxstage.py

index 4ca3f37a223a0ce4790a123a79233440ffcc2c24..b88379e066f08bf507a5a461d987b1f1eb27049a 100644 (file)
--- a/youtube_dl/extractor/divxstage.py
+++ b/youtube_dl/extractor/divxstage.py
@@ -7,7 +7,7 @@ class DivxStageIE(NovaMovIE):
      IE_NAME = 'divxstage'
      IE_DESC = 'DivxStage'
  
-    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'}
  
      _HOST = 'www.divxstage.eu'
  
@@ -24,4 +24,4 @@ class DivxStageIE(NovaMovIE):
              'title': 'youtubedl test video',
              'description': 'This is a test video for youtubedl.',
          }
-    }
-\ No newline at end of file
+    }
diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py

index 1e1763abf79b833ddc8e982d7a9acbbd080785a7..817a9bd6143544560541dbdb1c2d1ea18b95177d 100644 (file)
--- a/youtube_dl/extractor/dropbox.py
+++ b/youtube_dl/extractor/dropbox.py
@@ -5,24 +5,29 @@ import os.path
  import re
  
  from .common import InfoExtractor
-from ..utils import compat_urllib_parse_unquote
+from ..utils import compat_urllib_parse_unquote, url_basename
  
  
  class DropboxIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
+    _TESTS = [{
          'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
          'info_dict': {
              'id': 'nelirfsxnmcfbfh',
              'ext': 'mp4',
              'title': 'youtube-dl test video \'ä"BaW_jenozKc'
          }
-    }
+    },
+    {
+        'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
+        'only_matching': True,
+    },
+    ]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
-        fn = compat_urllib_parse_unquote(mobj.group('title'))
+        fn = compat_urllib_parse_unquote(url_basename(url))
          title = os.path.splitext(fn)[0]
          video_url = (
              re.sub(r'[?&]dl=0', '', url) +
diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py

index 21ea5ec2bf1c499149809971b780daa9d10d0291..e09982e88b913676a2f8c75946e79c82502bf650 100644 (file)
--- a/youtube_dl/extractor/flickr.py
+++ b/youtube_dl/extractor/flickr.py
@@ -10,13 +10,13 @@ from ..utils import (
  
  
  class FlickrIE(InfoExtractor):
-    """Information Extractor for Flickr videos"""
-    _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
+    _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
      _TEST = {
          'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
-        'file': '5645318632.mp4',
          'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
          'info_dict': {
+            'id': '5645318632',
+            'ext': 'mp4',
              "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 
              "uploader_id": "forestwander-nature-pictures", 
              "title": "Dark Hollow Waterfalls"
@@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor):
              raise ExtractorError('Unable to extract video url')
          video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
  
-        return [{
-            'id':          video_id,
-            'url':         video_url,
-            'ext':         'mp4',
-            'title':       self._og_search_title(webpage),
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': self._og_search_title(webpage),
              'description': self._og_search_description(webpage),
-            'thumbnail':   self._og_search_thumbnail(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
              'uploader_id': video_uploader_id,
-        }]
+        }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 2d77f604abe7772c4af6ddfd798028420aae23f7..367f930dd093567b1c033961fdf0326ded47dbba 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -597,7 +597,9 @@ class GenericIE(InfoExtractor):
  
          # Helper method
          def _playlist_from_matches(matches, getter, ie=None):
-            urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
+            urlrs = orderedSet(
+                self.url_result(self._proto_relative_url(getter(m)), ie)
+                for m in matches)
              return self.playlist_result(
                  urlrs, playlist_id=video_id, playlist_title=video_title)
  
@@ -642,11 +644,11 @@ class GenericIE(InfoExtractor):
              )
              (["\'])
                  (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
-                (?:embed|v)/.+?)
+                (?:embed|v|p)/.+?)
              \1''', webpage)
          if matches:
              return _playlist_from_matches(
-                matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
+                matches, lambda m: unescapeHTML(m[1]))
  
          # Look for embedded Dailymotion player
          matches = re.findall(
diff --git a/youtube_dl/extractor/mgoon.py b/youtube_dl/extractor/mgoon.py

new file mode 100644 (file)

index 0000000..94bc87b
--- /dev/null
+++ b/youtube_dl/extractor/mgoon.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    qualities,
+    unified_strdate,
+)
+
+
+class MgoonIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(?:www\.)?
+    (?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)|
+        video\.mgoon\.com)/(?P<id>[0-9]+)'''
+    _API_URL = 'http://mpos.mgoon.com/player/video?id={0:}'
+    _TESTS = [
+        {
+            'url': 'http://m.mgoon.com/ch/hi6618/v/5582148',
+            'md5': 'dd46bb66ab35cf6d51cc812fd82da79d',
+            'info_dict': {
+                'id': '5582148',
+                'uploader_id': 'hi6618',
+                'duration': 240.419,
+                'upload_date': '20131220',
+                'ext': 'mp4',
+                'title': 'md5:543aa4c27a4931d371c3f433e8cebebc',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            }
+        },
+        {
+            'url': 'http://www.mgoon.com/play/view/5582148',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://video.mgoon.com/5582148',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        data = self._download_json(self._API_URL.format(video_id), video_id)
+
+        if data.get('errorInfo', {}).get('code') != 'NONE':
+            raise ExtractorError('%s encountered an error: %s' % (
+                self.IE_NAME, data['errorInfo']['message']), expected=True)
+
+        v_info = data['videoInfo']
+        title = v_info.get('v_title')
+        thumbnail = v_info.get('v_thumbnail')
+        duration = v_info.get('v_duration')
+        upload_date = unified_strdate(v_info.get('v_reg_date'))
+        uploader_id = data.get('userInfo', {}).get('u_alias')
+        if duration:
+            duration /= 1000.0
+
+        age_limit = None
+        if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT':
+            age_limit = 18
+
+        formats = []
+        get_quality = qualities(['360p', '480p', '720p', '1080p'])
+        for fmt in data['videoFiles']:
+            formats.append({
+                'format_id': fmt['label'],
+                'quality': get_quality(fmt['label']),
+                'url': fmt['url'],
+                'ext': fmt['format'],
+
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'upload_date': upload_date,
+            'uploader_id': uploader_id,
+            'age_limit': age_limit,
+        }
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py

index d2e4acbada5b99c5c3eac4fe6b966ce77dab1ef9..e75ab7c398604451db54bc9d3afe66e4df074871 100644 (file)
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
-        'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
+        # md5 checksum is not stable
          'info_dict': {
-            'id': 'u1RInQZRN7QJ',
+            'id': 'bTmnLCvIbaaH',
              'ext': 'flv',
              'title': 'I Am a Firefighter',
              'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py

new file mode 100644 (file)

index 0000000..963c458
--- /dev/null
+++ b/youtube_dl/extractor/nfl.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    remove_end,
+)
+
+
+class NFLIE(InfoExtractor):
+    IE_NAME = 'nfl.com'
+    _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)'
+    _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json'
+    _TEST = {
+        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
+        # 'md5': '5eb8c40a727dda106d510e5d6ffa79e5',  # md5 checksum fluctuates
+        'info_dict': {
+            'id': '0ap3000000398478',
+            'ext': 'mp4',
+            'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights',
+            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
+            'upload_date': '20140921',
+            'timestamp': 1411337580,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        config = self._download_json(self._PLAYER_CONFIG_URL, video_id,
+                                     note='Downloading player config')
+        url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config)
+        video_data = self._download_json(url_template.format(id=video_id), video_id)
+
+        cdns = config.get('cdns')
+        if not cdns:
+            raise ExtractorError('Failed to get CDN data', expected=True)
+
+        formats = []
+        streams = video_data.get('cdnData', {}).get('bitrateInfo', [])
+        for name, cdn in cdns.items():
+            # LimeLight streams don't seem to work
+            if cdn.get('name') == 'LIMELIGHT':
+                continue
+
+            protocol = cdn.get('protocol')
+            host = remove_end(cdn.get('host', ''), '/')
+            if not (protocol and host):
+                continue
+
+            path_prefix = cdn.get('pathprefix', '')
+            if path_prefix and not path_prefix.endswith('/'):
+                path_prefix = '%s/' % path_prefix
+
+            get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format(
+                protocol=protocol,
+                host=host,
+                prefix=path_prefix,
+                path=p,
+            )
+
+            if protocol == 'rtmp':
+                preference = -2
+            elif 'prog' in name.lower():
+                preference = -1
+            else:
+                preference = 0
+
+            for stream in streams:
+                path = stream.get('path')
+                if not path:
+                    continue
+
+                formats.append({
+                    'url': get_url(path),
+                    'vbr': int_or_none(stream.get('rate', 0), 1000),
+                    'preference': preference,
+                    'format_note': name,
+                })
+
+        self._sort_formats(formats)
+
+        thumbnail = None
+        for q in ('xl', 'l', 'm', 's', 'xs'):
+            thumbnail = video_data.get('imagePaths', {}).get(q)
+            if thumbnail:
+                break
+
+        return {
+            'id': video_id,
+            'title': video_data.get('storyHeadline'),
+            'formats': formats,
+            'description': video_data.get('caption'),
+            'duration': video_data.get('duration'),
+            'thumbnail': thumbnail,
+            'timestamp': int_or_none(video_data.get('posted'), 1000),
+        }
diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py

index 959fdf59027018e0b78030e4670cca8ca1a08855..7f1bc6377a42e99d853a38dd406a60dda929e49f 100644 (file)
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -2,6 +2,8 @@
  from __future__ import unicode_literals
  
  import re
+import time
+import hashlib
  
  from .common import InfoExtractor
  from ..utils import (
@@ -17,6 +19,7 @@ from ..utils import (
  class NocoIE(InfoExtractor):
      _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
      _LOGIN_URL = 'http://noco.tv/do.php'
+    _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
      _NETRC_MACHINE = 'noco'
  
      _TEST = {
@@ -55,33 +58,52 @@ class NocoIE(InfoExtractor):
          login = self._download_json(request, None, 'Logging in as %s' % username)
  
          if 'erreur' in login:
-            raise  ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
+            raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
+
+    def _call_api(self, path, video_id, note):
+        ts = compat_str(int(time.time() * 1000))
+        tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
+        url = self._API_URL_TEMPLATE % (path, ts, tk)
+
+        resp = self._download_json(url, video_id, note)
+
+        if isinstance(resp, dict) and resp.get('error'):
+            self._raise_error(resp['error'], resp['description'])
+
+        return resp
+
+    def _raise_error(self, error, description):
+        raise ExtractorError(
+            '%s returned error: %s - %s' % (self.IE_NAME, error, description),
+            expected=True)
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
  
-        medias = self._download_json(
-            'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
+        medias = self._call_api(
+            'shows/%s/medias' % video_id,
+            video_id, 'Downloading video JSON')
+
+        qualities = self._call_api(
+            'qualities',
+            video_id, 'Downloading qualities JSON')
  
          formats = []
  
-        for fmt in medias['fr']['video_list']['default']['quality_list']:
-            format_id = fmt['quality_key']
+        for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
  
-            file = self._download_json(
-                'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
+            video = self._call_api(
+                'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
                  video_id, 'Downloading %s video JSON' % format_id)
  
-            file_url = file['file']
+            file_url = video['file']
              if not file_url:
                  continue
  
-            if file_url == 'forbidden':
-                raise ExtractorError(
-                    '%s returned error: %s - %s' % (
-                        self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']),
-                    expected=True)
+            if file_url in ['forbidden', 'not found']:
+                popmessage = video['popmessage']
+                self._raise_error(popmessage['title'], popmessage['message'])
  
              formats.append({
                  'url': file_url,
@@ -91,20 +113,31 @@ class NocoIE(InfoExtractor):
                  'abr': fmt['audiobitrate'],
                  'vbr': fmt['videobitrate'],
                  'filesize': fmt['filesize'],
-                'format_note': fmt['quality_name'],
-                'preference': fmt['priority'],
+                'format_note': qualities[format_id]['quality_name'],
+                'preference': qualities[format_id]['priority'],
              })
  
          self._sort_formats(formats)
  
-        show = self._download_json(
-            'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
+        show = self._call_api(
+            'shows/by_id/%s' % video_id,
+            video_id, 'Downloading show JSON')[0]
  
-        upload_date = unified_strdate(show['indexed'])
+        upload_date = unified_strdate(show['online_date_start_utc'])
          uploader = show['partner_name']
          uploader_id = show['partner_key']
          duration = show['duration_ms'] / 1000.0
-        thumbnail = show['screenshot']
+
+        thumbnails = []
+        for thumbnail_key, thumbnail_url in show.items():
+            m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
+            if not m:
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+                'width': int(m.group('width')),
+                'height': int(m.group('height')),
+            })
  
          episode = show.get('show_TT') or show.get('show_OT')
          family = show.get('family_TT') or show.get('family_OT')
@@ -124,7 +157,7 @@ class NocoIE(InfoExtractor):
              'id': video_id,
              'title': title,
              'description': description,
-            'thumbnail': thumbnail,
+            'thumbnails': thumbnails,
              'upload_date': upload_date,
              'uploader': uploader,
              'uploader_id': uploader_id,
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py

index 7a154e94abfb8c40093705111a25f38e3ce9016e..f36d446d2dd398ac48168f2181299bdfc920797f 100644 (file)
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -7,6 +7,7 @@ from ..utils import (
      unified_strdate,
      parse_duration,
      qualities,
+    url_basename,
  )
  
  
@@ -55,7 +56,9 @@ class NPOIE(InfoExtractor):
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
+        return self._get_info(video_id)
  
+    def _get_info(self, video_id):
          metadata = self._download_json(
              'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
              video_id,
@@ -106,3 +109,30 @@ class NPOIE(InfoExtractor):
              'duration': parse_duration(metadata.get('tijdsduur')),
              'formats': formats,
          }
+
+
+class TegenlichtVproIE(NPOIE):
+    IE_NAME = 'tegenlicht.vpro.nl'
+    _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
+
+    _TESTS = [
+        {
+            'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
+            'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+            'info_dict': {
+                'id': 'VPWON_1169289',
+                'ext': 'm4v',
+                'title': 'Tegenlicht',
+                'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+                'upload_date': '20130225',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        name = url_basename(url)
+        webpage = self._download_webpage(url, name)
+        urn = self._html_search_meta('mediaurn', webpage)
+        info_page = self._download_json(
+            'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
+        return self._get_info(info_page['mid'])
diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py

index 72df4d842b0d0f2a41a1ae60a8258815709cc805..ebc0468042a22c2bccdfb5b7e45861c0bc45f61c 100644 (file)
--- a/youtube_dl/extractor/playfm.py
+++ b/youtube_dl/extractor/playfm.py
@@ -10,6 +10,7 @@ from ..utils import (
      ExtractorError,
      float_or_none,
      int_or_none,
+    str_to_int,
  )
  
  
@@ -29,6 +30,7 @@ class PlayFMIE(InfoExtractor):
              'duration': 5627.428,
              'upload_date': '20140712',
              'view_count': int,
+            'comment_count': int,
              'thumbnail': 're:^https?://.*\.jpg$',
          },
      }
@@ -51,7 +53,8 @@ class PlayFMIE(InfoExtractor):
  
          recording = rec_doc.find('./recording')
          title = recording.find('./title').text
-        view_count = int_or_none(recording.find('./stats/playcount').text)
+        view_count = str_to_int(recording.find('./stats/playcount').text)
+        comment_count = str_to_int(recording.find('./stats/comments').text)
          duration = float_or_none(recording.find('./duration').text, scale=1000)
          thumbnail = recording.find('./image').text
  
@@ -75,6 +78,7 @@ class PlayFMIE(InfoExtractor):
              'title': title,
              'upload_date': upload_date,
              'view_count': view_count,
+            'comment_count': comment_count,
              'duration': duration,
              'thumbnail': thumbnail,
              'uploader': uploader,
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py

index 34058fd4bc12652ed5332103c81fd83e344d5efd..409f8540a0b2e2ef9db1ab3c5746d7779a2e5db3 100644 (file)
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -12,7 +12,7 @@ from ..utils import (
  
  class SBSIE(InfoExtractor):
      IE_DESC = 'sbs.com.au'
-    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
+    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/(?:single/)?(?P<id>[0-9]+)'
  
      _TESTS = [{
          # Original URL is handled by the generic IE which finds the iframe:
@@ -21,12 +21,16 @@ class SBSIE(InfoExtractor):
          'md5': '3150cf278965eeabb5b4cea1c963fe0a',
          'info_dict': {
              'id': '320403011771',
-            'ext': 'flv',
+            'ext': 'mp4',
              'title': 'Dingo Conservation',
              'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
              'thumbnail': 're:http://.*\.jpg',
          },
          'add_ies': ['generic'],
+    },
+    {
+        'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index b6b2dba9ca9e6ee02c7dc6b2cf01d3601874a6b2..0be793b1c262ed6c951fa6695de3cf22680d5720 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -5,6 +5,7 @@ import json
  
  from .common import InfoExtractor
  from ..utils import (
+    compat_str,
      ExtractorError,
      xpath_with_ns,
  )
@@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor):
          body = meta.find(_x('smil:body'))
  
          f4m_node = body.find(_x('smil:seq//smil:video'))
-        if f4m_node is not None:
+        if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
              f4m_url = f4m_node.attrib['src']
              if 'manifest.f4m?' not in f4m_url:
                  f4m_url += '?'
              # the parameters are from syfy.com, other sites may use others,
              # they also work for nbc.com
              f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
-            formats = [{
-                'ext': 'flv',
-                'url': f4m_url,
-            }]
+            formats = self._extract_f4m_formats(f4m_url, video_id)
          else:
-            base_url = head.find(_x('smil:meta')).attrib['base']
-            switch = body.find(_x('smil:switch'))
              formats = []
-            for f in switch.findall(_x('smil:video')):
-                attr = f.attrib
-                width = int(attr['width'])
-                height = int(attr['height'])
-                vbr = int(attr['system-bitrate']) // 1000
-                format_id = '%dx%d_%dk' % (width, height, vbr)
-                formats.append({
-                    'format_id': format_id,
-                    'url': base_url,
-                    'play_path': 'mp4:' + attr['src'],
-                    'ext': 'flv',
-                    'width': width,
-                    'height': height,
-                    'vbr': vbr,
-                })
+            switch = body.find(_x('smil:switch'))
+            if switch is not None:
+                base_url = head.find(_x('smil:meta')).attrib['base']
+                for f in switch.findall(_x('smil:video')):
+                    attr = f.attrib
+                    width = int(attr['width'])
+                    height = int(attr['height'])
+                    vbr = int(attr['system-bitrate']) // 1000
+                    format_id = '%dx%d_%dk' % (width, height, vbr)
+                    formats.append({
+                        'format_id': format_id,
+                        'url': base_url,
+                        'play_path': 'mp4:' + attr['src'],
+                        'ext': 'flv',
+                        'width': width,
+                        'height': height,
+                        'vbr': vbr,
+                    })
+            else:
+                switch = body.find(_x('smil:seq//smil:switch'))
+                for f in switch.findall(_x('smil:video')):
+                    attr = f.attrib
+                    vbr = int(attr['system-bitrate']) // 1000
+                    formats.append({
+                        'format_id': compat_str(vbr),
+                        'url': attr['src'],
+                        'vbr': vbr,
+                    })
              self._sort_formats(formats)
  
          return {
diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py

new file mode 100644 (file)

index 0000000..607e947
--- /dev/null
+++ b/youtube_dl/extractor/thvideo.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    unified_strdate
+)
+
+
+class THVideoIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://thvideo.tv/v/th1987/',
+        'md5': 'fa107b1f73817e325e9433505a70db50',
+        'info_dict': {
+            'id': '1987',
+            'ext': 'mp4',
+            'title': '【动画】秘封活动记录 ～ The Sealed Esoteric History.分镜稿预览',
+            'display_id': 'th1987',
+            'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg',
+            'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ～ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...',
+            'upload_date': '20140722'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        # extract download link from mobile player page
+        webpage_player = self._download_webpage(
+            'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id),
+            video_id, note='Downloading video source page')
+        video_url = self._html_search_regex(
+            r'<source src="(.*?)" type', webpage_player, 'video url')
+
+        # extract video info from main page
+        webpage = self._download_webpage(
+            'http://thvideo.tv/v/th%s' % (video_id), video_id)
+        title = self._og_search_title(webpage)
+        display_id = 'th%s' % video_id
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+        upload_date = unified_strdate(self._html_search_regex(
+            r'span itemprop="datePublished" content="(.*?)">', webpage,
+            'upload date', fatal=False))
+
+        return {
+            'id': video_id,
+            'ext': 'mp4',
+            'url': video_url,
+            'title': title,
+            'display_id': display_id,
+            'thumbnail': thumbnail,
+            'description': description,
+            'upload_date': upload_date
+        }
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py

index 08a48c05acf34b6cd190ba52d58189935fe6b20f..64a1e903022a78fa3a2b15eeff5eed20afce568d 100644 (file)
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@@ -14,27 +14,35 @@ from ..aes import aes_decrypt_text
  
  
  class Tube8IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
-        'md5': '44bf12b98313827dd52d35b8706a4ea0',
-        'info_dict': {
-            'id': '229795',
-            'ext': 'mp4',
-            'description': 'hot teen Kasia grinding',
-            'uploader': 'unknown',
-            'title': 'Kasia music video',
-            'age_limit': 18,
-        }
-    }
+    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
+            'md5': '44bf12b98313827dd52d35b8706a4ea0',
+            'info_dict': {
+                'id': '229795',
+                'display_id': 'kasia-music-video',
+                'ext': 'mp4',
+                'description': 'hot teen Kasia grinding',
+                'uploader': 'unknown',
+                'title': 'Kasia music video',
+                'age_limit': 18,
+            }
+        },
+        {
+            'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
+            'only_matching': True,
+        },
+    ]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
  
          req = compat_urllib_request.Request(url)
          req.add_header('Cookie', 'age_verified=1')
-        webpage = self._download_webpage(req, video_id)
+        webpage = self._download_webpage(req, display_id)
  
          flashvars = json.loads(self._html_search_regex(
              r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
@@ -70,6 +78,7 @@ class Tube8IE(InfoExtractor):
  
          return {
              'id': video_id,
+            'display_id': display_id,
              'url': video_url,
              'title': title,
              'description': description,
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py

index d2ffd1b6ba893f2cb2cc50f00a3131a835dba97d..ebab8b86c6f5727795d54b234ee4adda7258cf39 100644 (file)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -6,6 +6,7 @@ import xml.etree.ElementTree
  from .common import InfoExtractor
  from ..utils import (
      compat_HTTPError,
+    compat_urllib_request,
      ExtractorError,
  )
  
@@ -24,7 +25,7 @@ class VevoIE(InfoExtractor):
  
      _TESTS = [{
          'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
-        "md5": "06bea460acb744eab74a9d7dcb4bfd61",
+        "md5": "95ee28ee45e70130e3ab02b0f579ae23",
          'info_dict': {
              'id': 'GB1101300280',
              'ext': 'mp4',
@@ -40,7 +41,7 @@ class VevoIE(InfoExtractor):
      }, {
          'note': 'v3 SMIL format',
          'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
-        'md5': '893ec0e0d4426a1d96c01de8f2bdff58',
+        'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
          'info_dict': {
              'id': 'USUV71302923',
              'ext': 'mp4',
@@ -69,6 +70,21 @@ class VevoIE(InfoExtractor):
      }]
      _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
  
+    def _real_initialize(self):
+        req = compat_urllib_request.Request(
+            'http://www.vevo.com/auth', data=b'')
+        webpage = self._download_webpage(
+            req, None,
+            note='Retrieving oauth token',
+            errnote='Unable to retrieve oauth token',
+            fatal=False)
+        if webpage is False:
+            self._oauth_token = None
+        else:
+            self._oauth_token = self._search_regex(
+                r'access_token":\s*"([^"]+)"',
+                webpage, 'access token', fatal=False)
+
      def _formats_from_json(self, video_info):
          last_version = {'version': -1}
          for version in video_info['videoVersions']:
@@ -129,6 +145,26 @@ class VevoIE(InfoExtractor):
              })
          return formats
  
+    def _download_api_formats(self, video_id):
+        if not self._oauth_token:
+            self._downloader.report_warning(
+                'No oauth token available, skipping API HLS download')
+            return []
+
+        api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
+            video_id, self._oauth_token)
+        api_data = self._download_json(
+            api_url, video_id,
+            note='Downloading HLS formats',
+            errnote='Failed to download HLS format list', fatal=False)
+        if api_data is None:
+            return []
+
+        m3u8_url = api_data[0]['url']
+        return self._extract_m3u8_formats(
+            m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
+            preference=0)
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
@@ -152,30 +188,8 @@ class VevoIE(InfoExtractor):
          else:
              age_limit = None
  
-        # Download SMIL
-        smil_blocks = sorted((
-            f for f in video_info['videoVersions']
-            if f['sourceType'] == 13),
-            key=lambda f: f['version'])
-
-        smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
-            self._SMIL_BASE_URL, video_id, video_id.lower())
-        if smil_blocks:
-            smil_url_m = self._search_regex(
-                r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
-                fatal=False)
-            if smil_url_m is not None:
-                smil_url = smil_url_m
-
-        try:
-            smil_xml = self._download_webpage(smil_url, video_id,
-                                              'Downloading SMIL info')
-            formats.extend(self._formats_from_smil(smil_xml))
-        except ExtractorError as ee:
-            if not isinstance(ee.cause, compat_HTTPError):
-                raise
-            self._downloader.report_warning(
-                'Cannot download SMIL information, falling back to JSON ..')
+        # Download via HLS API
+        formats.extend(self._download_api_formats(video_id))
  
          self._sort_formats(formats)
          timestamp_ms = int(self._search_regex(
diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py

index 46b4d91330c50b5fb27917a42ef69d07a1a36ad9..268e2f6183405117b7fd10f6037857fa6ce8cf10 100644 (file)
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -5,7 +5,10 @@ import re
  import hashlib
  
  from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+    ExtractorError,
+    unified_strdate,
+)
  
  
  class WatIE(InfoExtractor):
@@ -57,6 +60,11 @@ class WatIE(InfoExtractor):
  
          video_info = self.download_video_info(real_id)
  
+        error_desc = video_info.get('error_desc')
+        if error_desc:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
+
          geo_list = video_info.get('geoList')
          country = geo_list[0] if geo_list else ''
  
diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py

new file mode 100644 (file)

index 0000000..40fc416
--- /dev/null
+++ b/youtube_dl/extractor/yourupload.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class YourUploadIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(?:www\.)?
+        (?:yourupload\.com/watch|
+           embed\.yourupload\.com|
+           embed\.yucache\.net
+        )/(?P<id>[A-Za-z0-9]+)
+        '''
+    _TESTS = [
+        {
+            'url': 'http://yourupload.com/watch/14i14h',
+            'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
+            'info_dict': {
+                'id': '14i14h',
+                'ext': 'mp4',
+                'title': 'BigBuckBunny_320x180.mp4',
+                'thumbnail': 're:^https?://.*\.jpe?g',
+            }
+        },
+        {
+            'url': 'http://embed.yourupload.com/14i14h',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://embed.yucache.net/14i14h?client_file_id=803349',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        url = 'http://embed.yucache.net/{0:}'.format(video_id)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        url = self._og_search_video_url(webpage)
+
+        formats = [{
+            'format_id': 'sd',
+            'url': url,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index b54c69122afb1265acc545cd79daa8ffd09a1752..99198e38092a8ed507b8e44aae41677e7ce17e17 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
      def _set_language(self):
          return bool(self._download_webpage(
              self._LANG_URL, None,
-            note=u'Setting language', errnote='unable to set language',
+            note='Setting language', errnote='unable to set language',
              fatal=False))
  
      def _login(self):
@@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
          # No authentication to be performed
          if username is None:
              if self._LOGIN_REQUIRED:
-                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
              return True
  
          login_page = self._download_webpage(
              self._LOGIN_URL, None,
-            note=u'Downloading login page',
-            errnote=u'unable to fetch login page', fatal=False)
+            note='Downloading login page',
+            errnote='unable to fetch login page', fatal=False)
          if login_page is False:
              return
  
@@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
          req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
          login_results = self._download_webpage(
              req, None,
-            note=u'Logging in', errnote=u'unable to log in', fatal=False)
+            note='Logging in', errnote='unable to log in', fatal=False)
          if login_results is False:
              return False
  
          if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
-            raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
+            raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
  
          # Two-Factor
          # TODO add SMS and phone call support - these require making a request and then prompting the user
@@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
              tfa_code = self._get_tfa_info()
  
              if tfa_code is None:
-                self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>')
-                self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
+                self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
+                self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
                  return False
  
              # Unlike the first login form, secTok and timeStmp are both required for the TFA form
  
              match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
              if match is None:
-                self._downloader.report_warning(u'Failed to get secTok - did the page structure change?')
+                self._downloader.report_warning('Failed to get secTok - did the page structure change?')
              secTok = match.group(1)
              match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
              if match is None:
-                self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?')
+                self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
              timeStmp = match.group(1)
  
              tfa_form_strs = {
@@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
              tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
              tfa_results = self._download_webpage(
                  tfa_req, None,
-                note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False)
+                note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
  
              if tfa_results is False:
                  return False
  
              if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
-                self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.')
+                self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
                  return False
              if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
-                self._downloader.report_warning(u'unable to log in - did the page structure change?')
+                self._downloader.report_warning('unable to log in - did the page structure change?')
                  return False
              if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
-                self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
+                self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
                  return False
  
          if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
-            self._downloader.report_warning(u'unable to log in: bad username or password')
+            self._downloader.report_warning('unable to log in: bad username or password')
              return False
          return True
  
@@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
  
          self._download_webpage(
              req, None,
-            note=u'Confirming age', errnote=u'Unable to confirm age')
+            note='Confirming age', errnote='Unable to confirm age')
          return True
  
      def _real_initialize(self):
@@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                              youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                           (?:                                                  # the various things that can precede the ID:
-                             (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
+                             (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
                               |(?:                                             # or the v= param in all its forms
                                   (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #!
@@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
      IE_NAME = 'youtube'
      _TESTS = [
          {
-            u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
-            u"file":  u"BaW_jenozKc.mp4",
-            u"info_dict": {
-                u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
-                u"uploader": u"Philipp Hagemeister",
-                u"uploader_id": u"phihag",
-                u"upload_date": u"20121002",
-                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
-                u"categories": [u'Science & Technology'],
+            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
+            'info_dict': {
+                'id': 'BaW_jenozKc',
+                'ext': 'mp4',
+                'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+                'uploader': 'Philipp Hagemeister',
+                'uploader_id': 'phihag',
+                'upload_date': '20121002',
+                'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
+                'categories': ['Science & Technology'],
                  'like_count': int,
                  'dislike_count': int,
              }
          },
          {
-            u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
-            u"file":  u"UxxajLWwzqY.mp4",
-            u"note": u"Test generic use_cipher_signature video (#897)",
-            u"info_dict": {
-                u"upload_date": u"20120506",
-                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
-                u"uploader": u"Icona Pop",
-                u"uploader_id": u"IconaPop"
+            'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
+            'note': 'Test generic use_cipher_signature video (#897)',
+            'info_dict': {
+                'id': 'UxxajLWwzqY',
+                'ext': 'mp4',
+                'upload_date': '20120506',
+                'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
+                'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
+                'uploader': 'Icona Pop',
+                'uploader_id': 'IconaPop',
              }
          },
          {
-            u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
-            u"file":  u"07FYdnEawAQ.mp4",
-            u"note": u"Test VEVO video with age protection (#956)",
-            u"info_dict": {
-                u"upload_date": u"20130703",
-                u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
-                u"description": u"md5:64249768eec3bc4276236606ea996373",
-                u"uploader": u"justintimberlakeVEVO",
-                u"uploader_id": u"justintimberlakeVEVO"
+            'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
+            'note': 'Test VEVO video with age protection (#956)',
+            'info_dict': {
+                'id': '07FYdnEawAQ',
+                'ext': 'mp4',
+                'upload_date': '20130703',
+                'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+                'description': 'md5:64249768eec3bc4276236606ea996373',
+                'uploader': 'justintimberlakeVEVO',
+                'uploader_id': 'justintimberlakeVEVO',
              }
          },
          {
-            u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
-            u"file":  u"yZIXLfi8CZQ.mp4",
-            u"note": u"Embed-only video (#1746)",
-            u"info_dict": {
-                u"upload_date": u"20120608",
-                u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
-                u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
-                u"uploader": u"SET India",
-                u"uploader_id": u"setindia"
+            'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
+            'note': 'Embed-only video (#1746)',
+            'info_dict': {
+                'id': 'yZIXLfi8CZQ',
+                'ext': 'mp4',
+                'upload_date': '20120608',
+                'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
+                'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
+                'uploader': 'SET India',
+                'uploader_id': 'setindia'
              }
          },
          {
-            u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
-            u"file": u"a9LDPn-MO4I.m4a",
-            u"note": u"256k DASH audio (format 141) via DASH manifest",
-            u"info_dict": {
-                u"upload_date": "20121002",
-                u"uploader_id": "8KVIDEO",
-                u"description": '',
-                u"uploader": "8KVIDEO",
-                u"title": "UHDTV TEST 8K VIDEO.mp4"
+            'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
+            'note': '256k DASH audio (format 141) via DASH manifest',
+            'info_dict': {
+                'id': 'a9LDPn-MO4I',
+                'ext': 'm4a',
+                'upload_date': '20121002',
+                'uploader_id': '8KVIDEO',
+                'description': '',
+                'uploader': '8KVIDEO',
+                'title': 'UHDTV TEST 8K VIDEO.mp4'
              },
-            u"params": {
-                u"youtube_include_dash_manifest": True,
-                u"format": "141",
+            'params': {
+                'youtube_include_dash_manifest': True,
+                'format': '141',
              },
          },
          # DASH manifest with encrypted signature
@@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'uploader_id': 'AfrojackVEVO',
                  'upload_date': '20131011',
              },
-            u"params": {
+            'params': {
                  'youtube_include_dash_manifest': True,
                  'format': '141',
              },
@@ -397,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
      def report_video_info_webpage_download(self, video_id):
          """Report attempt to download video info webpage."""
-        self.to_screen(u'%s: Downloading video info webpage' % video_id)
+        self.to_screen('%s: Downloading video info webpage' % video_id)
  
      def report_information_extraction(self, video_id):
          """Report attempt to extract video information."""
-        self.to_screen(u'%s: Extracting video information' % video_id)
+        self.to_screen('%s: Extracting video information' % video_id)
  
      def report_unavailable_format(self, video_id, format):
          """Report extracted video URL."""
-        self.to_screen(u'%s: Format %s not available' % (video_id, format))
+        self.to_screen('%s: Format %s not available' % (video_id, format))
  
      def report_rtmp_download(self):
          """Indicate the download will use the RTMP protocol."""
-        self.to_screen(u'RTMP download detected')
+        self.to_screen('RTMP download detected')
  
      def _signature_cache_id(self, example_sig):
          """ Return a string representation of a signature """
@@ -429,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              player_type, player_id, self._signature_cache_id(example_sig))
          assert os.path.basename(func_id) == func_id
  
-        cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
+        cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
          if cache_spec is not None:
              return lambda s: ''.join(s[i] for i in cache_spec)
  
          if player_type == 'js':
              code = self._download_webpage(
                  player_url, video_id,
-                note=u'Downloading %s player %s' % (player_type, player_id),
-                errnote=u'Download of %s failed' % player_url)
+                note='Downloading %s player %s' % (player_type, player_id),
+                errnote='Download of %s failed' % player_url)
              res = self._parse_sig_js(code)
          elif player_type == 'swf':
              urlh = self._request_webpage(
                  player_url, video_id,
-                note=u'Downloading %s player %s' % (player_type, player_id),
-                errnote=u'Download of %s failed' % player_url)
+                note='Downloading %s player %s' % (player_type, player_id),
+                errnote='Download of %s failed' % player_url)
              code = urlh.read()
              res = self._parse_sig_swf(code)
          else:
@@ -454,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              cache_res = res(test_string)
              cache_spec = [ord(c) for c in cache_res]
  
-        self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
+        self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
          return res
  
      def _print_sig_code(self, func, example_sig):
          def gen_sig_code(idxs):
              def _genslice(start, end, step):
                  starts = '' if start == 0 else str(start)
-                ends = (u':%d' % (end+step)) if end + step >= 0 else ':'
-                steps = '' if step == 1 else (u':%d' % step)
+                ends = (':%d' % (end+step)) if end + step >= 0 else ':'
+                steps = '' if step == 1 else (':%d' % step)
                  return 's[%s%s%s]' % (starts, ends, steps)
  
              step = None
@@ -492,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          expr_code = ' + '.join(gen_sig_code(cache_spec))
          signature_id_tuple = '(%s)' % (
              ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
-        code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+        code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
                  '    return %s\n') % (signature_id_tuple, expr_code)
-        self.to_screen(u'Extracted signature function:\n' + code)
+        self.to_screen('Extracted signature function:\n' + code)
  
      def _parse_sig_js(self, jscode):
          funcname = self._search_regex(
@@ -516,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          """Turn the encrypted s field into a working signature"""
  
          if player_url is None:
-            raise ExtractorError(u'Cannot decrypt signature without player_url')
+            raise ExtractorError('Cannot decrypt signature without player_url')
  
-        if player_url.startswith(u'//'):
+        if player_url.startswith('//'):
              player_url = 'https:' + player_url
          try:
              player_id = (player_url, self._signature_cache_id(s))
@@ -542,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
                  video_id, note=False)
          except ExtractorError as err:
-            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
              return {}
          lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
  
@@ -560,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              url = 'https://www.youtube.com/api/timedtext?' + params
              sub_lang_list[lang] = url
          if not sub_lang_list:
-            self._downloader.report_warning(u'video doesn\'t have subtitles')
+            self._downloader.report_warning('video doesn\'t have subtitles')
              return {}
          return sub_lang_list
  
@@ -568,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
          sub_format = self._downloader.params.get('subtitlesformat', 'srt')
-        self.to_screen(u'%s: Looking for automatic captions' % video_id)
+        self.to_screen('%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
          err_msg = 'Couldn\'t find automatic captions for %s' % video_id
          if mobj is None:
@@ -589,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              caption_list = self._download_xml(list_url, video_id)
              original_lang_node = caption_list.find('track')
              if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
-                self._downloader.report_warning(u'Video doesn\'t have automatic captions')
+                self._downloader.report_warning('Video doesn\'t have automatic captions')
                  return {}
              original_lang = original_lang_node.attrib['lang_code']
  
@@ -615,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
      def extract_id(cls, url):
          mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
          if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
          video_id = mobj.group(2)
          return video_id
  
@@ -635,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
      def _extract_annotations(self, video_id):
          url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
-        return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
+        return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
  
      def _real_extract(self, url):
          proto = (
@@ -705,14 +710,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          # Check for "rental" videos
          if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
-            raise ExtractorError(u'"rental" videos not supported')
+            raise ExtractorError('"rental" videos not supported')
  
          # Start extracting information
          self.report_information_extraction(video_id)
  
          # uploader
          if 'author' not in video_info:
-            raise ExtractorError(u'Unable to extract uploader name')
+            raise ExtractorError('Unable to extract uploader name')
          video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
  
          # uploader_id
@@ -721,13 +726,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          if mobj is not None:
              video_uploader_id = mobj.group(1)
          else:
-            self._downloader.report_warning(u'unable to extract uploader nickname')
+            self._downloader.report_warning('unable to extract uploader nickname')
  
          # title
          if 'title' in video_info:
              video_title = video_info['title'][0]
          else:
-            self._downloader.report_warning(u'Unable to extract video title')
+            self._downloader.report_warning('Unable to extract video title')
              video_title = '_'
  
          # thumbnail image
@@ -737,7 +742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          if m_thumb is not None:
              video_thumbnail = m_thumb.group(1)
          elif 'thumbnail_url' not in video_info:
-            self._downloader.report_warning(u'unable to extract video thumbnail')
+            self._downloader.report_warning('unable to extract video thumbnail')
              video_thumbnail = None
          else:   # don't panic if we can't find it
              video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
@@ -791,8 +796,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              if count is not None:
                  return int(count.replace(',', ''))
              return None
-        like_count = _extract_count(u'like')
-        dislike_count = _extract_count(u'dislike')
+        like_count = _extract_count('like')
+        dislike_count = _extract_count('dislike')
  
          # subtitles
          video_subtitles = self.extract_subtitles(video_id, video_webpage)
@@ -802,7 +807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              return
  
          if 'length_seconds' not in video_info:
-            self._downloader.report_warning(u'unable to extract video duration')
+            self._downloader.report_warning('unable to extract video duration')
              video_duration = None
          else:
              video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
@@ -823,11 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
              # this signatures are encrypted
              if 'url_encoded_fmt_stream_map' not in args:
-                raise ValueError(u'No stream_map present')  # caught below
+                raise ValueError('No stream_map present')  # caught below
              re_signature = re.compile(r'[&,]s=')
              m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
              if m_s is not None:
-                self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
+                self.to_screen('%s: Encrypted signatures detected.' % video_id)
                  video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
              m_s = re_signature.search(args.get('adaptive_fmts', ''))
              if m_s is not None:
@@ -905,7 +910,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                                  player_desc = 'html5 player %s' % player_version
  
                          parts_sizes = self._signature_cache_id(encrypted_sig)
-                        self.to_screen(u'{%s} signature length %s, %s' %
+                        self.to_screen('{%s} signature length %s, %s' %
                              (format_id, parts_sizes, player_desc))
  
                      signature = self._decrypt_signature(
@@ -920,7 +925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              url_map = self._extract_from_m3u8(manifest_url, video_id)
              formats = _map_to_format_list(url_map)
          else:
-            raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
+            raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
  
          # Look for the DASH manifest
          if (self._downloader.params.get('youtube_include_dash_manifest', False)):
@@ -941,9 +946,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
                  dash_doc = self._download_xml(
                      dash_manifest_url, video_id,
-                    note=u'Downloading DASH manifest',
-                    errnote=u'Could not download DASH manifest')
-                for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+                    note='Downloading DASH manifest',
+                    errnote='Could not download DASH manifest')
+                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
                      url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
                      if url_el is None:
                          continue
@@ -969,7 +974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                          existing_format.update(f)
  
              except (ExtractorError, KeyError) as e:
-                self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
+                self.report_warning('Skipping DASH manifest: %s' % e, video_id)
  
          self._sort_formats(formats)
  
@@ -1000,7 +1005,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                          (?:\w+\.)?
                          youtube\.com/
                          (?:
-                           (?:course|view_play_list|my_playlists|artist|playlist|watch)
+                           (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
                             \? (?:.*?&)*? (?:p|a|list)=
                          |  p/
                          )
@@ -1056,6 +1061,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
              'title': 'YDL_safe_search',
          },
          'playlist_count': 2,
+    }, {
+        'note': 'embedded',
+        'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+        'playlist_count': 4,
+        'info_dict': {
+            'title': 'JODA15',
+        }
+    }, {
+        'note': 'Embedded SWF player',
+        'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
+        'playlist_count': 4,
+        'info_dict': {
+            'title': 'JODA7',
+        }
      }]
  
      def _real_initialize(self):
@@ -1090,7 +1109,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          # Extract playlist id
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
          playlist_id = mobj.group(1) or mobj.group(2)
  
          # Check if it's a video-specific URL
@@ -1098,16 +1117,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          if 'v' in query_dict:
              video_id = query_dict['v'][0]
              if self._downloader.params.get('noplaylist'):
-                self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
+                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
                  return self.url_result(video_id, 'Youtube', video_id=video_id)
              else:
-                self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  
          if playlist_id.startswith('RD'):
              # Mixes require a custom extraction process
              return self._extract_mix(playlist_id)
          if playlist_id.startswith('TL'):
-            raise ExtractorError(u'For downloading YouTube.com top lists, use '
+            raise ExtractorError('For downloading YouTube.com top lists, use '
                  'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
  
          url = self._TEMPLATE_URL % playlist_id
@@ -1152,19 +1171,28 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
  
  class YoutubeTopListIE(YoutubePlaylistIE):
      IE_NAME = 'youtube:toplist'
-    IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
+    IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
          ' (Example: "yttoplist:music:Top Tracks")')
      _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
-    _TESTS = []
+    _TESTS = [{
+        'url': 'yttoplist:music:Trending',
+        'playlist_mincount': 5,
+        'skip': 'Only works for logged-in users',
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          channel = mobj.group('chann')
          title = mobj.group('title')
          query = compat_urllib_parse.urlencode({'title': title})
-        playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
-        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
-        link = self._html_search_regex(playlist_re, channel_page, 'list')
+        channel_page = self._download_webpage(
+            'https://www.youtube.com/%s' % channel, title)
+        link = self._html_search_regex(
+            r'''(?x)
+                <a\s+href="([^"]+)".*?>\s*
+                <span\s+class="branded-page-module-title-text">\s*
+                <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
+            channel_page, 'list')
          url = compat_urlparse.urljoin('https://www.youtube.com/', link)
          
          video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
@@ -1190,6 +1218,11 @@ class YoutubeChannelIE(InfoExtractor):
      _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
      _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
      IE_NAME = 'youtube:channel'
+    _TESTS = [{
+        'note': 'paginated channel',
+        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+        'playlist_mincount': 91,
+    }]
  
      def extract_videos_from_page(self, page):
          ids_in_page = []
@@ -1202,7 +1235,7 @@ class YoutubeChannelIE(InfoExtractor):
          # Extract channel id
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
  
          # Download channel page
          channel_id = mobj.group(1)
@@ -1224,7 +1257,7 @@ class YoutubeChannelIE(InfoExtractor):
              for pagenum in itertools.count(1):
                  url = self._MORE_PAGES_URL % (pagenum, channel_id)
                  page = self._download_json(
-                    url, channel_id, note=u'Downloading page #%s' % pagenum,
+                    url, channel_id, note='Downloading page #%s' % pagenum,
                      transform_source=uppercase_escape)
  
                  ids_in_page = self.extract_videos_from_page(page['content_html'])
@@ -1233,7 +1266,7 @@ class YoutubeChannelIE(InfoExtractor):
                  if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
                      break
  
-        self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
+        self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
  
          url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
                         for video_id in video_ids]
@@ -1248,6 +1281,17 @@ class YoutubeUserIE(InfoExtractor):
      _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
      IE_NAME = 'youtube:user'
  
+    _TESTS = [{
+        'url': 'https://www.youtube.com/user/TheLinuxFoundation',
+        'playlist_mincount': 320,
+        'info_dict': {
+            'title': 'TheLinuxFoundation',
+        }
+    }, {
+        'url': 'ytuser:phihag',
+        'only_matching': True,
+    }]
+
      @classmethod
      def suitable(cls, url):
          # Don't return True if the url can be extracted with other youtube
@@ -1260,7 +1304,7 @@ class YoutubeUserIE(InfoExtractor):
          # Extract username
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
  
          username = mobj.group(1)
  
@@ -1281,7 +1325,7 @@ class YoutubeUserIE(InfoExtractor):
              try:
                  response = json.loads(page)
              except ValueError as err:
-                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
+                raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
              if 'entry' not in response['feed']:
                  return
  
@@ -1322,9 +1366,9 @@ class YoutubeSearchIE(SearchInfoExtractor):
                  compat_urllib_parse.quote_plus(query.encode('utf-8')),
                  (PAGE_SIZE * pagenum) + 1)
              data_json = self._download_webpage(
-                result_url, video_id=u'query "%s"' % query,
-                note=u'Downloading page %s' % (pagenum + 1),
-                errnote=u'Unable to download API page')
+                result_url, video_id='query "%s"' % query,
+                note='Downloading page %s' % (pagenum + 1),
+                errnote='Unable to download API page')
              data = json.loads(data_json)
              api_response = data['data']
  
@@ -1356,6 +1400,13 @@ class YoutubeSearchURLIE(InfoExtractor):
      IE_DESC = 'YouTube.com search URLs'
      IE_NAME = 'youtube:search_url'
      _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
+    _TESTS = [{
+        'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'title': 'youtube-dl test video',
+        }
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -1390,17 +1441,38 @@ class YoutubeSearchURLIE(InfoExtractor):
  
  class YoutubeShowIE(InfoExtractor):
      IE_DESC = 'YouTube.com (multi-season) shows'
-    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
+    _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
      IE_NAME = 'youtube:show'
+    _TESTS = [{
+        'url': 'http://www.youtube.com/show/airdisasters',
+        'playlist_mincount': 3,
+        'info_dict': {
+            'id': 'airdisasters',
+            'title': 'Air Disasters',
+        }
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        show_name = mobj.group(1)
-        webpage = self._download_webpage(url, show_name, 'Downloading show webpage')
+        playlist_id = mobj.group('id')
+        webpage = self._download_webpage(
+            url, playlist_id, 'Downloading show webpage')
          # There's one playlist for each season of the show
          m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
-        self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
-        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
+        self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
+        entries = [
+            self.url_result(
+                'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
+            for season in m_seasons
+        ]
+        title = self._og_search_title(webpage, fatal=False)
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': title,
+            'entries': entries,
+        }
  
  
  class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 940e9c8cf79d3a45d2f0e451d42d0396e7e6ccd4..c17701d6af1e6564cc382a0fd6b36a1a82682e88 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2014.09.19'
+__version__ = '2014.09.25'
author	Philipp Hagemeister <phihag@phihag.de>
	Thu, 25 Sep 2014 00:03:49 +0000 (02:03 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Thu, 25 Sep 2014 00:03:49 +0000 (02:03 +0200)
test/test_youtube_lists.py		patch \| blob \| history
youtube_dl/downloader/__init__.py		patch \| blob \| history
youtube_dl/downloader/common.py		patch \| blob \| history
youtube_dl/downloader/f4m.py		patch \| blob \| history
youtube_dl/downloader/hls.py		patch \| blob \| history
youtube_dl/downloader/http.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/divxstage.py		patch \| blob \| history
youtube_dl/extractor/dropbox.py		patch \| blob \| history
youtube_dl/extractor/flickr.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/mgoon.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/nbc.py		patch \| blob \| history
youtube_dl/extractor/nfl.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/noco.py		patch \| blob \| history
youtube_dl/extractor/npo.py		patch \| blob \| history
youtube_dl/extractor/playfm.py		patch \| blob \| history
youtube_dl/extractor/sbs.py		patch \| blob \| history
youtube_dl/extractor/theplatform.py		patch \| blob \| history
youtube_dl/extractor/thvideo.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tube8.py		patch \| blob \| history
youtube_dl/extractor/vevo.py		patch \| blob \| history
youtube_dl/extractor/wat.py		patch \| blob \| history
youtube_dl/extractor/yourupload.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history