Merge remote-tracking branch 'yasoob/master'

author Philipp Hagemeister <phihag@phihag.de>

Mon, 1 Jul 2013 13:19:45 +0000 (15:19 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Mon, 1 Jul 2013 13:19:45 +0000 (15:19 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Mon, 1 Jul 2013 13:19:45 +0000 (15:19 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Mon, 1 Jul 2013 13:19:45 +0000 (15:19 +0200)
diff --git a/README.md b/README.md

index 81b86e264c117954bfddae70f8ba4feb2e16a198..ceb85fef1b67d4b227ae288f0523d5f6b798759b 100644 (file)
--- a/README.md
+++ b/README.md
@@ -168,7 +168,7 @@ The `-o` option allows users to indicate a template for the output file names. T
   - `playlist`: The name or the id of the playlist that contains the video.
   - `playlist_index`: The index of the video in the playlist, a five-digit number.
  
-The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment).
+The current default template is `%(title)s-%(id)s.%(ext)s`.
  
  In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
  
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py

index b168cea77b9fe3b85273b4d04a62e5fcfc129609..c3d69e6f445af3846081af3bdea0f8c9a2a7063c 100644 (file)
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -5,18 +5,25 @@
  import sys
  
  tests = [
+    # 88
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
       "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
+    # 87
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
       "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+    # 86 - vfl_ymO4Z 2013/06/27
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
       "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
+    # 85
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
       "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+    # 84
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
       "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
+    # 83
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
       "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"),
+    # 82
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
       "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
  ]
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 6334ce3c42545ab08484fd36c0cc597bd3fd1c63..98388a9f341d7e0cb13e62ca14a566bda8208173 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -118,6 +118,7 @@ def parseOpts(overrideArguments=None):
      selection      = optparse.OptionGroup(parser, 'Video Selection')
      authentication = optparse.OptionGroup(parser, 'Authentication Options')
      video_format   = optparse.OptionGroup(parser, 'Video Format Options')
+    downloader     = optparse.OptionGroup(parser, 'Download Options')
      postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
      filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
      verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
@@ -130,15 +131,6 @@ def parseOpts(overrideArguments=None):
              action='store_true', dest='update_self', help='update this program to latest version')
      general.add_option('-i', '--ignore-errors',
              action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
-    general.add_option('-r', '--rate-limit',
-            dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
-    general.add_option('-R', '--retries',
-            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
-    general.add_option('--buffer-size',
-            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
-    general.add_option('--no-resize-buffer',
-            action='store_true', dest='noresizebuffer',
-            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
      general.add_option('--dump-user-agent',
              action='store_true', dest='dump_user_agent',
              help='display the current browser identification', default=False)
@@ -152,7 +144,7 @@ def parseOpts(overrideArguments=None):
              help='List all supported extractors and the URLs they would handle', default=False)
      general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
      general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
-    general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
+
  
      selection.add_option('--playlist-start',
              dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
@@ -211,6 +203,17 @@ def parseOpts(overrideArguments=None):
              action='store', dest='subtitleslang', metavar='LANG',
              help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
  
+    downloader.add_option('-r', '--rate-limit',
+            dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
+    downloader.add_option('-R', '--retries',
+            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
+    downloader.add_option('--buffer-size',
+            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
+    downloader.add_option('--no-resize-buffer',
+            action='store_true', dest='noresizebuffer',
+            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
+    downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
+
      verbosity.add_option('-q', '--quiet',
              action='store_true', dest='quiet', help='activates quiet mode', default=False)
      verbosity.add_option('-s', '--simulate',
@@ -317,6 +320,7 @@ def parseOpts(overrideArguments=None):
  
      parser.add_option_group(general)
      parser.add_option_group(selection)
+    parser.add_option_group(downloader)
      parser.add_option_group(filesystem)
      parser.add_option_group(verbosity)
      parser.add_option_group(video_format)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index ec800d9fbc42dd84acc13674b092b81a935d4831..ba0e867138952b4dad6542b5e6202635fec3f2a1 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -47,14 +47,17 @@ from .statigram import StatigramIE
  from .steam import SteamIE
  from .teamcoco import TeamcocoIE
  from .ted import TEDIE
+from .tf1 import TF1IE
  from .traileraddict import TrailerAddictIE
  from .tudou import TudouIE
  from .tumblr import TumblrIE
+from .tutv import TutvIE
  from .ustream import UstreamIE
  from .vbox7 import Vbox7IE
  from .vevo import VevoIE
  from .vimeo import VimeoIE
  from .vine import VineIE
+from .wat import WatIE
  from .wimp import WimpIE
  from .worldstarhiphop import WorldStarHipHopIE
  from .xhamster import XHamsterIE
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py

index b061b9566168758465ad56f43b1f74b89b2cce10..183274eb75f0947d11434867f30ba3ef9f6fcc0c 100644 (file)
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -11,11 +11,21 @@ from ..utils import (
  )
  
  class ArteTvIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
+    """
+    There are two sources of video in arte.tv: videos.arte.tv and
+    www.arte.tv/guide, the extraction process is different for each one.
+    The videos expire in 7 days, so we can't add tests.
+    """
+    _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
+    _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P<id>.*?).html'
      _LIVE_URL = r'index-[0-9]+\.html$'
  
      IE_NAME = u'arte.tv'
  
+    @classmethod
+    def suitable(cls, url):
+        return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
+
      # TODO implement Live Stream
      # def extractLiveStream(self, url):
      #     video_lang = url.split('/')[-4]
@@ -44,17 +54,26 @@ class ArteTvIE(InfoExtractor):
      #     video_url = u'%s/%s' % (info.get('url'), info.get('path'))
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
-        # This is not a real id, it can be for example AJT for the news
-        # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
-        video_id = mobj.group('id')
+        mobj = re.match(self._EMISSION_URL, url)
+        if mobj is not None:
+            name = mobj.group('name')
+            # This is not a real id, it can be for example AJT for the news
+            # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
+            video_id = mobj.group('id')
+            return self._extract_emission(url, video_id)
+
+        mobj = re.match(self._VIDEOS_URL, url)
+        if mobj is not None:
+            id = mobj.group('id')
+            return self._extract_video(url, id)
  
          if re.search(self._LIVE_URL, video_id) is not None:
              raise ExtractorError(u'Arte live streams are not yet supported, sorry')
              # self.extractLiveStream(url)
              # return
  
+    def _extract_emission(self, url, video_id):
+        """Extract from www.arte.tv/guide"""
          webpage = self._download_webpage(url, video_id)
          json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
  
@@ -68,6 +87,7 @@ class ArteTvIE(InfoExtractor):
                       'description': player_info['VDE'],
                       'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
                       'thumbnail': player_info['programImage'],
+                     'ext': 'flv',
                       }
  
          formats = player_info['VSR'].values()
@@ -78,9 +98,36 @@ class ArteTvIE(InfoExtractor):
          if format_info['mediaType'] == u'rtmp':
              info_dict['url'] = format_info['streamer']
              info_dict['play_path'] = 'mp4:' + format_info['url']
-            info_dict['ext'] = 'mp4'
          else:
              info_dict['url'] = format_info['url']
-            info_dict['ext'] = 'mp4'
  
          return info_dict
+
+    def _extract_video(self, url, video_id):
+        """Extract from videos.arte.tv"""
+        config_xml_url = url.replace('/videos/', '/do_delegate/videos/')
+        config_xml_url = config_xml_url.replace('.html', ',view,asPlayerXml.xml')
+        config_xml = self._download_webpage(config_xml_url, video_id)
+        config_xml_url = self._html_search_regex(r'<video lang=".*?" ref="(.*?)"', config_xml, 'config xml url')
+        config_xml = self._download_webpage(config_xml_url, video_id)
+
+        video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
+        def _key(m):
+            quality = m.group('quality')
+            if quality == 'hd':
+                return 2
+            else:
+                return 1
+        # We pick the best quality
+        video_urls = sorted(video_urls, key=_key)
+        video_url = list(video_urls)[-1].group('url')
+        
+        title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title')
+        thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>',
+                                            config_xml, 'thumbnail')
+        return {'id': video_id,
+                'title': title,
+                'thumbnail': thumbnail,
+                'url': video_url,
+                'ext': 'flv',
+                }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 64d63e109d8b9382503d7e0585f4ed589ba72200..5c6fd7945c34ca04aa37633250fcdee5ef3c460a 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -44,6 +44,7 @@ class InfoExtractor(object):
      location:       Physical location of the video.
      player_url:     SWF Player URL (used for rtmpdump).
      subtitles:      The subtitle file contents.
+    view_count:     How many users have watched the video on the platform.
      urlhandle:      [internal] The urlHandle to be used to download the file,
                      like returned by urllib.request.urlopen
  
diff --git a/youtube_dl/extractor/statigram.py b/youtube_dl/extractor/statigram.py

index 25bf5b85ce205fb936a534cc6b79a5233bb7d593..ae9a63e8b4e018c1cc3625aa8bc75fe37d62922a 100644 (file)
--- a/youtube_dl/extractor/statigram.py
+++ b/youtube_dl/extractor/statigram.py
@@ -10,7 +10,7 @@ class StatigramIE(InfoExtractor):
          u'md5': u'deda4ff333abe2e118740321e992605b',
          u'info_dict': {
              u"uploader_id": u"videoseconds", 
-            u"title": u"Instagram photo by @videoseconds (Videos)"
+            u"title": u"Instagram photo by @videoseconds"
          }
      }
  
@@ -27,7 +27,7 @@ class StatigramIE(InfoExtractor):
          html_title = self._html_search_regex(
              r'<title>(.+?)</title>',
              webpage, u'title')
-        title = html_title.rpartition(u' | Statigram')[0]
+        title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
          uploader_id = self._html_search_regex(
              r'@([^ ]+)', title, u'uploader name', fatal=False)
          ext = 'mp4'
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index 46b66582ced7edbb1cf4752f192b9df003031fc0..8b73b8340c40badad0023a53cc5b10b363e57b6a 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -17,7 +17,7 @@ class TEDIE(InfoExtractor):
      _TEST = {
          u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
          u'file': u'102.mp4',
-        u'md5': u'8cd9dfa41ee000ce658fd48fb5d89a61',
+        u'md5': u'2d76ee1576672e0bd8f187513267adf6',
          u'info_dict': {
              u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922", 
              u"title": u"Dan Dennett: The illusion of consciousness"
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py

new file mode 100644 (file)

index 0000000..e0ffece
--- /dev/null
+++ b/youtube_dl/extractor/tf1.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+
+import json
+import re
+
+from .common import InfoExtractor
+
+class TF1IE(InfoExtractor):
+    """
+    TF1 uses the wat.tv player, currently it can only download videos with the
+    html5 player enabled, it cannot download HD videos.
+    """
+    _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
+    _TEST = {
+        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
+        u'file': u'10635995.mp4',
+        u'md5': u'66789d3e91278d332f75e1feb7aea327',
+        u'info_dict': {
+            u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
+            u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        id = mobj.group(1)
+        webpage = self._download_webpage(url, id)
+        embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
+                                webpage, 'embed url')
+        embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
+        wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
+        wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
+        wat_info = json.loads(wat_info)['media']
+        wat_url = wat_info['url']
+        return self.url_result(wat_url, 'Wat')
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py

index 243f04bff869298f0674bdb68554f8bd19191660..ad5840ca2f4b8f2fe82c46cca261af86716f25dc 100644 (file)
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -9,11 +9,11 @@ from ..utils import (
  class TumblrIE(InfoExtractor):
      _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
      _TEST = {
-        u'url': u'http://resigno.tumblr.com/post/53364321212/e-de-extrema-importancia-que-esse-video-seja',
-        u'file': u'53364321212.mp4',
-        u'md5': u'0716d3dd51baf68a28b40fdf1251494e',
+        u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
+        u'file': u'54196191430.mp4',
+        u'md5': u'479bb068e5b16462f5176a6828829767',
          u'info_dict': {
-            u"title": u"Rafael Lemos"
+            u"title": u"tatiana maslany news"
          }
      }
  
diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py

new file mode 100644 (file)

index 0000000..fcaa6ac
--- /dev/null
+++ b/youtube_dl/extractor/tutv.py
@@ -0,0 +1,41 @@
+import base64
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_parse_qs,
+)
+
+class TutvIE(InfoExtractor):
+    _VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
+    _TEST = {
+        u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
+        u'file': u'2742556.flv',
+        u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
+        u'info_dict': {
+            u"title": u"Noah en pabellon cuahutemoc"
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(
+            r'<meta property="og:title" content="(.*?)">', webpage, u'title')
+        internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
+
+        data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
+        data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
+        data = compat_parse_qs(data_content)
+        video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
+        ext = video_url.partition(u'?')[0].rpartition(u'.')[2]
+
+        info = {
+            'id': internal_id,
+            'url': video_url,
+            'ext': ext,
+            'title': title,
+        }
+        return [info]
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index 11741e27d0033f548b277bcd7cf3bba2f2c1a2b4..7c4562790d1452f8bdad25a09156607b56e552fa 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -16,7 +16,7 @@ class VimeoIE(InfoExtractor):
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
      IE_NAME = u'vimeo'
      _TEST = {
          u'url': u'http://vimeo.com/56015672',
diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py

new file mode 100644 (file)

index 0000000..0d1302c
--- /dev/null
+++ b/youtube_dl/extractor/wat.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+
+import json
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+    compat_urllib_parse,
+    unified_strdate,
+)
+
+
+class WatIE(InfoExtractor):
+    _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
+    IE_NAME = 'wat.tv'
+    _TEST = {
+        u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
+        u'file': u'10631273.mp4',
+        u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a',
+        u'info_dict': {
+            u'title': u'World War Z - Philadelphia VOST',
+            u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+        }
+    }
+    
+    def download_video_info(self, real_id):
+        # 'contentv4' is used in the website, but it also returns the related
+        # videos, we don't need them
+        info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
+        info = json.loads(info)
+        return info['media']
+
+
+    def _real_extract(self, url):
+        def real_id_for_chapter(chapter):
+            return chapter['tc_start'].split('-')[0]
+        mobj = re.match(self._VALID_URL, url)
+        short_id = mobj.group('shortID')
+        webpage = self._download_webpage(url, short_id)
+        real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
+
+        video_info = self.download_video_info(real_id)
+        chapters = video_info['chapters']
+        first_chapter = chapters[0]
+
+        if real_id_for_chapter(first_chapter) != real_id:
+            self.to_screen('Multipart video detected')
+            chapter_urls = []
+            for chapter in chapters:
+                chapter_id = real_id_for_chapter(chapter)
+                # Yes, when we this chapter is processed by WatIE,
+                # it will download the info again
+                chapter_info = self.download_video_info(chapter_id)
+                chapter_urls.append(chapter_info['url'])
+            entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
+            return self.playlist_result(entries, real_id, video_info['title'])
+
+        # Otherwise we can continue and extract just one part, we have to use
+        # the short id for getting the video url
+        player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id,
+                                                     'html5': '1'})
+        player_info = self._download_webpage('http://www.wat.tv/player?' + player_data,
+                                             real_id, u'Downloading player info')
+        player = json.loads(player_info)['player']
+        html5_player = self._html_search_regex(r'iframe src="(.*?)"', player,
+                                               'html5 player')
+        player_webpage = self._download_webpage(html5_player, real_id,
+                                                u'Downloading player webpage')
+
+        video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage,
+                                       'video url')
+        info = {'id': real_id,
+                'url': video_url,
+                'ext': 'mp4',
+                'title': first_chapter['title'],
+                'thumbnail': first_chapter['preview'],
+                'description': first_chapter['description'],
+                'view_count': video_info['views'],
+                }
+        if 'date_diffusion' in first_chapter:
+            info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
+
+        return info
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 96d8257d9093fe0892faefe18fd789b6ef207e5a..109c8a93fa9ca7cb4caeca87462e1385a802bb69 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -168,7 +168,7 @@ class YoutubeIE(InfoExtractor):
          self.to_screen(u'RTMP download detected')
  
      def _decrypt_signature(self, s):
-        """Decrypt the key"""
+        """Turn the encrypted s field into a working signature"""
  
          if len(s) == 88:
              return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
@@ -402,6 +402,9 @@ class YoutubeIE(InfoExtractor):
          return video_id
  
      def _real_extract(self, url):
+        if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
+            self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
+
          # Extract original video URL from URL with redirection, like age verification, using next_url parameter
          mobj = re.search(self._NEXT_URL_RE, url)
          if mobj:
diff --git a/youtube_dl/update.py b/youtube_dl/update.py

index eab8417a50da957bf5da8c6f6d739a63599e79f6..ccab6f27f2a7c2faa7e2f1c047678ff763f62b22 100644 (file)
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -44,7 +44,7 @@ def update_self(to_screen, verbose, filename):
  
  
      if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
-        to_screen(u'It looks like you installed youtube-dl with pip, setup.py or a tarball. Please use that to update.')
+        to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
          return
  
      # Check if there is a new version
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index af2af2d8b73f6f2720ef48c0261424fddde9ce50..7bba3a8837ce38b374990030421e058322bd8292 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2013.06.34.2'
+__version__ = '2013.06.34.4'
author	Philipp Hagemeister <phihag@phihag.de>
	Mon, 1 Jul 2013 13:19:45 +0000 (15:19 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Mon, 1 Jul 2013 13:19:45 +0000 (15:19 +0200)
README.md		patch \| blob \| history
devscripts/youtube_genalgo.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/arte.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/statigram.py		patch \| blob \| history
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/tf1.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tumblr.py		patch \| blob \| history
youtube_dl/extractor/tutv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/wat.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/update.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history