Merge remote-tracking branch 'sagittarian/vimeo-no-desc'
authorPhilipp Hagemeister <phihag@phihag.de>
Thu, 11 Apr 2013 08:56:01 +0000 (10:56 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 11 Apr 2013 08:56:01 +0000 (10:56 +0200)
test/tests.json
youtube_dl/FileDownloader.py
youtube_dl/InfoExtractors.py
youtube_dl/__init__.py
youtube_dl/__main__.py

index 0c94c65bdee2d77cd3a2ffadb0b403444ba1d8e2..0c3b240549b92fbf49eb43cf43b67f63755081ed 100644 (file)
     "info_dict": {
         "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick! "
     }
+  },
+  {
+    "name": "ARD",
+    "url": "http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640",
+    "file": "14077640.mp4",
+    "md5": "6ca8824255460c787376353f9e20bbd8",
+    "info_dict": {
+        "title": "11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden"
+    }
   }
+
 ]
index 7c5a52be15fa449396fb931bb5911f108ec38493..7731db17f4daf65d16f60effb88f633014ecbe2a 100644 (file)
@@ -388,7 +388,9 @@ class FileDownloader(object):
             template_dict = dict(info_dict)
 
             template_dict['epoch'] = int(time.time())
-            template_dict['autonumber'] = u'%05d' % self._num_downloads
+            autonumber_size = self.params.get('autonumber_size', 5)
+            autonumber_templ = u'%0' + str(autonumber_size) + u'd'
+            template_dict['autonumber'] = autonumber_templ % self._num_downloads
 
             sanitize = lambda k,v: sanitize_filename(
                 u'NA' if v is None else compat_str(v),
@@ -629,7 +631,7 @@ class FileDownloader(object):
             except (IOError, OSError):
                 self.report_warning(u'Unable to remove downloaded video file')
 
-    def _download_with_rtmpdump(self, filename, url, player_url, page_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
 
@@ -648,6 +650,8 @@ class FileDownloader(object):
             basic_args += ['-W', player_url]
         if page_url is not None:
             basic_args += ['--pageUrl', page_url]
+        if play_path is not None:
+            basic_args += ['-y', play_path]
         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
         if self.params.get('verbose', False):
             try:
@@ -702,7 +706,8 @@ class FileDownloader(object):
         if url.startswith('rtmp'):
             return self._download_with_rtmpdump(filename, url,
                                                 info_dict.get('player_url', None),
-                                                info_dict.get('page_url', None))
+                                                info_dict.get('page_url', None),
+                                                info_dict.get('play_path', None))
 
         tmpfilename = self.temp_name(filename)
         stream = None
index 82ba634499660c8cacc0b4494596bbc99ecbb587..0807306609bcde085046b68b8486b5b8f40a1d11 100755 (executable)
@@ -4356,6 +4356,46 @@ class LiveLeakIE(InfoExtractor):
 
         return [info]
 
+class ARDIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+    _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
+    _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
+
+    def _real_extract(self, url):
+        # determine video id from url
+        m = re.match(self._VALID_URL, url)
+
+        numid = re.search(r'documentId=([0-9]+)', url)
+        if numid:
+            video_id = numid.group(1)
+        else:
+            video_id = m.group('video_id')
+
+        # determine title and media streams from webpage
+        html = self._download_webpage(url, video_id)
+        title = re.search(self._TITLE, html).group('title')
+        streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
+        if not streams:
+            assert '"fsk"' in html
+            self._downloader.report_error(u'this video is only available after 8:00 pm')
+            return
+
+        # choose default media type and highest quality for now
+        stream = max([s for s in streams if int(s["media_type"]) == 0],
+                     key=lambda s: int(s["quality"]))
+
+        # there's two possibilities: RTMP stream or HTTP download
+        info = {'id': video_id, 'title': title, 'ext': 'mp4'}
+        if stream['rtmp_url']:
+            self._downloader.to_screen(u'[%s] RTMP download detected' % self.IE_NAME)
+            assert stream['video_url'].startswith('mp4:')
+            info["url"] = stream["rtmp_url"]
+            info["play_path"] = stream['video_url']
+        else:
+            assert stream["video_url"].endswith('.mp4')
+            info["url"] = stream["video_url"]
+        return [info]
+
 
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
@@ -4409,5 +4449,6 @@ def gen_extractors():
         MySpassIE(),
         SpiegelIE(),
         LiveLeakIE(),
+        ARDIE(),
         GenericIE()
     ]
index 807b735412a674bb4d2098a2a057c0e91266ad68..489f8948aedc3a66c2b933c0b34e00996cc0efb1 100644 (file)
@@ -24,6 +24,7 @@ __authors__  = (
     'Jaime Marquínez Ferrándiz',
     'Jeff Crouse',
     'Osama Khalid',
+    'Michael Walter',
     )
 
 __license__ = 'Public Domain'
@@ -235,6 +236,9 @@ def parseOpts():
             help='number downloaded files starting from 00000', default=False)
     filesystem.add_option('-o', '--output',
             dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')
+    filesystem.add_option('--autonumber-size',
+            dest='autonumber_size', metavar='NUMBER',
+            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
     filesystem.add_option('--restrict-filenames',
             action='store_true', dest='restrictfilenames',
             help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
@@ -451,6 +455,7 @@ def _real_main():
         'format_limit': opts.format_limit,
         'listformats': opts.listformats,
         'outtmpl': outtmpl,
+        'autonumber_size': opts.autonumber_size,
         'restrictfilenames': opts.restrictfilenames,
         'ignoreerrors': opts.ignoreerrors,
         'ratelimit': opts.ratelimit,
index 7022ea4bec75fb864cd58c3c3c9b5f2a15bc5d7a..3fe29c91f416e0d6c957ed750d3f0f69950dc9c0 100755 (executable)
@@ -9,7 +9,8 @@ import sys
 if __package__ is None and not hasattr(sys, "frozen"):
     # direct call of __main__.py
     import os.path
-    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    path = os.path.realpath(os.path.abspath(__file__))
+    sys.path.append(os.path.dirname(os.path.dirname(path)))
 
 import youtube_dl