Merge branch 'extract_info_rewrite'
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 19 Apr 2013 19:57:08 +0000 (21:57 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 19 Apr 2013 19:57:08 +0000 (21:57 +0200)
12 files changed:
README.md
devscripts/gh-pages/update-feed.py
devscripts/transition_helper_exe/youtube-dl.py
test/test_download.py
test/tests.json
youtube-dl
youtube_dl/FileDownloader.py
youtube_dl/InfoExtractors.py
youtube_dl/__init__.py
youtube_dl/__main__.py
youtube_dl/update.py
youtube_dl/version.py

index e2958a9b06af767c2199cae1a289d631a85dc302..d42aab44a8ddbf9f13c5ffc3fa4a228fd850bff1 100644 (file)
--- a/README.md
+++ b/README.md
@@ -14,119 +14,125 @@ your Unix box, on Windows or on Mac OS X. It is released to the public domain,
 which means you can modify it, redistribute it or use it however you like.
 
 # OPTIONS
-    -h, --help               print this help text and exit
-    --version                print program version and exit
-    -U, --update             update this program to latest version
-    -i, --ignore-errors      continue on download errors
-    -r, --rate-limit LIMIT   maximum download rate (e.g. 50k or 44.6m)
-    -R, --retries RETRIES    number of retries (default is 10)
-    --buffer-size SIZE       size of download buffer (e.g. 1024 or 16k) (default
-                             is 1024)
-    --no-resize-buffer       do not automatically adjust the buffer size. By
-                             default, the buffer size is automatically resized
-                             from an initial value of SIZE.
-    --dump-user-agent        display the current browser identification
-    --user-agent UA          specify a custom user agent
-    --list-extractors        List all supported extractors and the URLs they
-                             would handle
+    -h, --help                 print this help text and exit
+    --version                  print program version and exit
+    -U, --update               update this program to latest version
+    -i, --ignore-errors        continue on download errors
+    -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m)
+    -R, --retries RETRIES      number of retries (default is 10)
+    --buffer-size SIZE         size of download buffer (e.g. 1024 or 16k)
+                               (default is 1024)
+    --no-resize-buffer         do not automatically adjust the buffer size. By
+                               default, the buffer size is automatically resized
+                               from an initial value of SIZE.
+    --dump-user-agent          display the current browser identification
+    --user-agent UA            specify a custom user agent
+    --list-extractors          List all supported extractors and the URLs they
+                               would handle
 
 ## Video Selection:
-    --playlist-start NUMBER  playlist video to start at (default is 1)
-    --playlist-end NUMBER    playlist video to end at (default is last)
-    --match-title REGEX      download only matching titles (regex or caseless
-                             sub-string)
-    --reject-title REGEX     skip download for matching titles (regex or
-                             caseless sub-string)
-    --max-downloads NUMBER   Abort after downloading NUMBER files
-    --min-filesize SIZE      Do not download any videos smaller than SIZE (e.g.
-                             50k or 44.6m)
-    --max-filesize SIZE      Do not download any videos larger than SIZE (e.g.
-                             50k or 44.6m)
+    --playlist-start NUMBER    playlist video to start at (default is 1)
+    --playlist-end NUMBER      playlist video to end at (default is last)
+    --match-title REGEX        download only matching titles (regex or caseless
+                               sub-string)
+    --reject-title REGEX       skip download for matching titles (regex or
+                               caseless sub-string)
+    --max-downloads NUMBER     Abort after downloading NUMBER files
+    --min-filesize SIZE        Do not download any videos smaller than SIZE
+                               (e.g. 50k or 44.6m)
+    --max-filesize SIZE        Do not download any videos larger than SIZE (e.g.
+                               50k or 44.6m)
 
 ## Filesystem Options:
-    -t, --title              use title in file name
-    --id                     use video ID in file name
-    -l, --literal            [deprecated] alias of --title
-    -A, --auto-number        number downloaded files starting from 00000
-    -o, --output TEMPLATE    output filename template. Use %(title)s to get the
-                             title, %(uploader)s for the uploader name,
-                             %(uploader_id)s for the uploader nickname if
-                             different, %(autonumber)s to get an automatically
-                             incremented number, %(ext)s for the filename
-                             extension, %(upload_date)s for the upload date
-                             (YYYYMMDD), %(extractor)s for the provider
-                             (youtube, metacafe, etc), %(id)s for the video id
-                             and %% for a literal percent. Use - to output to
-                             stdout. Can also be used to download to a different
-                             directory, for example with -o '/my/downloads/%(upl
-                             oader)s/%(title)s-%(id)s.%(ext)s' .
-    --restrict-filenames     Restrict filenames to only ASCII characters, and
-                             avoid "&" and spaces in filenames
-    -a, --batch-file FILE    file containing URLs to download ('-' for stdin)
-    -w, --no-overwrites      do not overwrite files
-    -c, --continue           resume partially downloaded files
-    --no-continue            do not resume partially downloaded files (restart
-                             from beginning)
-    --cookies FILE           file to read cookies from and dump cookie jar in
-    --no-part                do not use .part files
-    --no-mtime               do not use the Last-modified header to set the file
-                             modification time
-    --write-description      write video description to a .description file
-    --write-info-json        write video metadata to a .info.json file
+    -t, --title                use title in file name
+    --id                       use video ID in file name
+    -l, --literal              [deprecated] alias of --title
+    -A, --auto-number          number downloaded files starting from 00000
+    -o, --output TEMPLATE      output filename template. Use %(title)s to get
+                               the title, %(uploader)s for the uploader name,
+                               %(uploader_id)s for the uploader nickname if
+                               different, %(autonumber)s to get an automatically
+                               incremented number, %(ext)s for the filename
+                               extension, %(upload_date)s for the upload date
+                               (YYYYMMDD), %(extractor)s for the provider
+                               (youtube, metacafe, etc), %(id)s for the video id
+                               and %% for a literal percent. Use - to output to
+                               stdout. Can also be used to download to a
+                               different directory, for example with -o '/my/dow
+                               nloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+    --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s
+                               when it is present in output filename template or
+                               --autonumber option is given
+    --restrict-filenames       Restrict filenames to only ASCII characters, and
+                               avoid "&" and spaces in filenames
+    -a, --batch-file FILE      file containing URLs to download ('-' for stdin)
+    -w, --no-overwrites        do not overwrite files
+    -c, --continue             resume partially downloaded files
+    --no-continue              do not resume partially downloaded files (restart
+                               from beginning)
+    --cookies FILE             file to read cookies from and dump cookie jar in
+    --no-part                  do not use .part files
+    --no-mtime                 do not use the Last-modified header to set the
+                               file modification time
+    --write-description        write video description to a .description file
+    --write-info-json          write video metadata to a .info.json file
 
 ## Verbosity / Simulation Options:
-    -q, --quiet              activates quiet mode
-    -s, --simulate           do not download the video and do not write anything
-                             to disk
-    --skip-download          do not download the video
-    -g, --get-url            simulate, quiet but print URL
-    -e, --get-title          simulate, quiet but print title
-    --get-thumbnail          simulate, quiet but print thumbnail URL
-    --get-description        simulate, quiet but print video description
-    --get-filename           simulate, quiet but print output filename
-    --get-format             simulate, quiet but print output format
-    --newline                output progress bar as new lines
-    --no-progress            do not print progress bar
-    --console-title          display progress in console titlebar
-    -v, --verbose            print various debugging information
+    -q, --quiet                activates quiet mode
+    -s, --simulate             do not download the video and do not write
+                               anything to disk
+    --skip-download            do not download the video
+    -g, --get-url              simulate, quiet but print URL
+    -e, --get-title            simulate, quiet but print title
+    --get-thumbnail            simulate, quiet but print thumbnail URL
+    --get-description          simulate, quiet but print video description
+    --get-filename             simulate, quiet but print output filename
+    --get-format               simulate, quiet but print output format
+    --newline                  output progress bar as new lines
+    --no-progress              do not print progress bar
+    --console-title            display progress in console titlebar
+    -v, --verbose              print various debugging information
+    --dump-intermediate-pages  print downloaded pages to debug problems(very
+                               verbose)
 
 ## Video Format Options:
-    -f, --format FORMAT      video format code
-    --all-formats            download all available video formats
-    --prefer-free-formats    prefer free video formats unless a specific one is
-                             requested
-    --max-quality FORMAT     highest quality format to download
-    -F, --list-formats       list all available formats (currently youtube only)
-    --write-sub              write subtitle file (currently youtube only)
-    --only-sub               downloads only the subtitles (no video)
-    --all-subs               downloads all the available subtitles of the video
-                             (currently youtube only)
-    --list-subs              lists all available subtitles for the video
-                             (currently youtube only)
-    --sub-format LANG        subtitle format [srt/sbv] (default=srt) (currently
-                             youtube only)
-    --sub-lang LANG          language of the subtitles to download (optional)
-                             use IETF language tags like 'en'
+    -f, --format FORMAT        video format code
+    --all-formats              download all available video formats
+    --prefer-free-formats      prefer free video formats unless a specific one
+                               is requested
+    --max-quality FORMAT       highest quality format to download
+    -F, --list-formats         list all available formats (currently youtube
+                               only)
+    --write-sub                write subtitle file (currently youtube only)
+    --only-sub                 downloads only the subtitles (no video)
+    --all-subs                 downloads all the available subtitles of the
+                               video (currently youtube only)
+    --list-subs                lists all available subtitles for the video
+                               (currently youtube only)
+    --sub-format LANG          subtitle format [srt/sbv] (default=srt)
+                               (currently youtube only)
+    --sub-lang LANG            language of the subtitles to download (optional)
+                               use IETF language tags like 'en'
 
 ## Authentication Options:
-    -u, --username USERNAME  account username
-    -p, --password PASSWORD  account password
-    -n, --netrc              use .netrc authentication data
+    -u, --username USERNAME    account username
+    -p, --password PASSWORD    account password
+    -n, --netrc                use .netrc authentication data
 
 ## Post-processing Options:
-    -x, --extract-audio      convert video files to audio-only files (requires
-                             ffmpeg or avconv and ffprobe or avprobe)
-    --audio-format FORMAT    "best", "aac", "vorbis", "mp3", "m4a", "opus", or
-                             "wav"; best by default
-    --audio-quality QUALITY  ffmpeg/avconv audio quality specification, insert a
-                             value between 0 (better) and 9 (worse) for VBR or a
-                             specific bitrate like 128K (default 5)
-    --recode-video FORMAT    Encode the video to another format if necessary
-                             (currently supported: mp4|flv|ogg|webm)
-    -k, --keep-video         keeps the video file on disk after the post-
-                             processing; the video is erased by default
-    --no-post-overwrites     do not overwrite post-processed files; the post-
-                             processed files are overwritten by default
+    -x, --extract-audio        convert video files to audio-only files (requires
+                               ffmpeg or avconv and ffprobe or avprobe)
+    --audio-format FORMAT      "best", "aac", "vorbis", "mp3", "m4a", "opus", or
+                               "wav"; best by default
+    --audio-quality QUALITY    ffmpeg/avconv audio quality specification, insert
+                               a value between 0 (better) and 9 (worse) for VBR
+                               or a specific bitrate like 128K (default 5)
+    --recode-video FORMAT      Encode the video to another format if necessary
+                               (currently supported: mp4|flv|ogg|webm)
+    -k, --keep-video           keeps the video file on disk after the post-
+                               processing; the video is erased by default
+    --no-post-overwrites       do not overwrite post-processed files; the post-
+                               processed files are overwritten by default
 
 # CONFIGURATION
 
index e299429c1ff768d09a43322ee37a1c2d8f682af4..cfff05fc8f017cac11bc0293ca60734040127c39 100755 (executable)
@@ -19,7 +19,7 @@ entry_template=textwrap.dedent("""
                                                                <atom:entry>
                                                                        <atom:id>youtube-dl-@VERSION@</atom:id>
                                                                        <atom:title>New version @VERSION@</atom:title>
-                                                                       <atom:link href="http://rg3.github.com/youtube-dl" />
+                                                                       <atom:link href="http://rg3.github.io/youtube-dl" />
                                                                        <atom:content type="xhtml">
                                                                                <div xmlns="http://www.w3.org/1999/xhtml">
                                                                                        Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a>
index dbb4c99e1545fc2a0783c19e515bde720acbee60..6297dfd40f426cbbafbd2d362034c43cd464f8dc 100644 (file)
@@ -40,7 +40,7 @@ raw_input()
 
 filename = sys.argv[0]
 
-UPDATE_URL = "http://rg3.github.com/youtube-dl/update/"
+UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
 VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
 JSON_URL = UPDATE_URL + 'versions.json'
 UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
index 59a6e1498fd120bc6566f7256fcba4c20d313c7a..e3513efba19fc7ee00134226396ea451b6279cf9 100644 (file)
@@ -58,6 +58,7 @@ with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
 
 
 class TestDownload(unittest.TestCase):
+    maxDiff = None
     def setUp(self):
         self.parameters = parameters
         self.defs = defs
index 0c94c65bdee2d77cd3a2ffadb0b403444ba1d8e2..7808a07de24e7326ee7a690fb8d122e99fd6cfa1 100644 (file)
@@ -76,8 +76,7 @@
     "name": "StanfordOpenClassroom",
     "md5":  "544a9468546059d4e80d76265b0443b8",
     "url":  "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
-    "file":  "PracticalUnix_intro-environment.mp4",
-    "skip": "Currently offline"
+    "file":  "PracticalUnix_intro-environment.mp4"
   },
   {
     "name": "XNXX",
     "info_dict": {
         "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick! "
     }
+  },
+  {
+    "name": "ARD",
+    "url": "http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640",
+    "file": "14077640.mp4",
+    "md5": "6ca8824255460c787376353f9e20bbd8",
+    "info_dict": {
+        "title": "11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden"
+    },
+    "skip": "Requires rtmpdump"
   }
+
 ]
index e6f05c17327ed58f8db66e6dc7d2a38380355d61..e3eb8774caa258ddbaa51323938fbd36c4fc170d 100755 (executable)
@@ -38,7 +38,7 @@ def rsa_verify(message, signature, key):
 
 sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
 sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
-sys.stderr.write(u'From now on, get the binaries from http://rg3.github.com/youtube-dl/download.html, not from the git repository.\n\n')
+sys.stderr.write(u'From now on, get the binaries from http://rg3.github.io/youtube-dl/download.html, not from the git repository.\n\n')
 
 try:
        raw_input()
@@ -47,7 +47,7 @@ except NameError: # Python 3
 
 filename = sys.argv[0]
 
-UPDATE_URL = "http://rg3.github.com/youtube-dl/update/"
+UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
 VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
 JSON_URL = UPDATE_URL + 'versions.json'
 UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
index 4dabbb440837e0a3bbbf41032ef9d6001609f041..03346ab04263a9e7c11f829053f8e6d96101781e 100644 (file)
@@ -388,7 +388,11 @@ class FileDownloader(object):
             template_dict = dict(info_dict)
 
             template_dict['epoch'] = int(time.time())
-            template_dict['autonumber'] = u'%05d' % self._num_downloads
+            autonumber_size = self.params.get('autonumber_size')
+            if autonumber_size is None:
+                autonumber_size = 5
+            autonumber_templ = u'%0' + str(autonumber_size) + u'd'
+            template_dict['autonumber'] = autonumber_templ % self._num_downloads
             if template_dict['playlist_index'] is not None:
                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 
@@ -524,6 +528,10 @@ class FileDownloader(object):
         #We increment the download the download count here to match the previous behaviour.
         self.increment_downloads()
         
+        info_dict['fulltitle'] = info_dict['title']
+        if len(info_dict['title']) > 200:
+            info_dict['title'] = info_dict['title'][:197] + u'...'
+
         # Keep for backwards compatibility
         info_dict['stitle'] = info_dict['title']
 
@@ -692,7 +700,7 @@ class FileDownloader(object):
             except (IOError, OSError):
                 self.report_warning(u'Unable to remove downloaded video file')
 
-    def _download_with_rtmpdump(self, filename, url, player_url, page_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
 
@@ -711,6 +719,8 @@ class FileDownloader(object):
             basic_args += ['-W', player_url]
         if page_url is not None:
             basic_args += ['--pageUrl', page_url]
+        if play_path is not None:
+            basic_args += ['-y', play_path]
         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
         if self.params.get('verbose', False):
             try:
@@ -765,7 +775,8 @@ class FileDownloader(object):
         if url.startswith('rtmp'):
             return self._download_with_rtmpdump(filename, url,
                                                 info_dict.get('player_url', None),
-                                                info_dict.get('page_url', None))
+                                                info_dict.get('page_url', None),
+                                                info_dict.get('play_path', None))
 
         tmpfilename = self.temp_name(filename)
         stream = None
index a7fdf1607c4c73e4caef623a1598065e84b3a834..ae36558d75839f68facb72300efb5b4c22bcd809 100755 (executable)
@@ -115,7 +115,8 @@ class InfoExtractor(object):
         """ Returns the response handle """
         if note is None:
             note = u'Downloading video webpage'
-        self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
+        if note is not False:
+            self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
         try:
             return compat_urllib_request.urlopen(url_or_request)
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -133,6 +134,14 @@ class InfoExtractor(object):
         else:
             encoding = 'utf-8'
         webpage_bytes = urlh.read()
+        if self._downloader.params.get('dump_intermediate_pages', False):
+            try:
+                url = url_or_request.get_full_url()
+            except AttributeError:
+                url = url_or_request
+            self._downloader.to_screen(u'Dumping request to ' + url)
+            dump = base64.b64encode(webpage_bytes).decode('ascii')
+            self._downloader.to_screen(dump)
         return webpage_bytes.decode(encoding, 'replace')
         
     #Methods for following #608
@@ -485,18 +494,14 @@ class YoutubeIE(InfoExtractor):
         # Get video info
         self.report_video_info_webpage_download(video_id)
         for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-            video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+            video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
                     % (video_id, el_type))
-            request = compat_urllib_request.Request(video_info_url)
-            try:
-                video_info_webpage_bytes = compat_urllib_request.urlopen(request).read()
-                video_info_webpage = video_info_webpage_bytes.decode('utf-8', 'ignore')
-                video_info = compat_parse_qs(video_info_webpage)
-                if 'token' in video_info:
-                    break
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                self._downloader.report_error(u'unable to download video info webpage: %s' % compat_str(err))
-                return
+            video_info_webpage = self._download_webpage(video_info_url, video_id,
+                                    note=False,
+                                    errnote='unable to download video info webpage')
+            video_info = compat_parse_qs(video_info_webpage)
+            if 'token' in video_info:
+                break
         if 'token' not in video_info:
             if 'reason' in video_info:
                 self._downloader.report_error(u'YouTube said: %s' % video_info['reason'][0])
@@ -1151,7 +1156,7 @@ class VimeoIE(InfoExtractor):
         # Extract video description
         video_description = get_element_by_attribute("itemprop", "description", webpage)
         if video_description: video_description = clean_html(video_description)
-        else: video_description = ''
+        else: video_description = u''
 
         # Extract upload date
         video_upload_date = None
@@ -1794,9 +1799,13 @@ class YoutubePlaylistIE(InfoExtractor):
                 self._downloader.report_error(u'Invalid JSON in API response: ' + compat_str(err))
                 return
 
-            if not 'feed' in response or not 'entry' in response['feed']:
+            if 'feed' not in response:
                 self._downloader.report_error(u'Got a malformed response from YouTube API')
                 return
+            if 'entry' not in response['feed']:
+                # Number of videos is a multiple of self._MAX_RESULTS
+                break
+
             videos += [ (entry['yt$position']['$t'], entry['content']['src'])
                         for entry in response['feed']['entry']
                         if 'content' in entry ]
@@ -2144,7 +2153,7 @@ class FacebookIE(InfoExtractor):
         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
         webpage = self._download_webpage(url, video_id)
 
-        BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
+        BEFORE = '{swf.addParam(param[0], param[1]);});\n'
         AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
         m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
         if not m:
@@ -2152,12 +2161,14 @@ class FacebookIE(InfoExtractor):
         data = dict(json.loads(m.group(1)))
         params_raw = compat_urllib_parse.unquote(data['params'])
         params = json.loads(params_raw)
-        video_url = params['hd_src']
+        video_data = params['video_data'][0]
+        video_url = video_data.get('hd_src')
         if not video_url:
-            video_url = params['sd_src']
+            video_url = video_data['sd_src']
         if not video_url:
             raise ExtractorError(u'Cannot find video URL')
-        video_duration = int(params['video_duration'])
+        video_duration = int(video_data['video_duration'])
+        thumbnail = video_data['thumbnail_src']
 
         m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
         if not m:
@@ -2170,7 +2181,7 @@ class FacebookIE(InfoExtractor):
             'url': video_url,
             'ext': 'mp4',
             'duration': video_duration,
-            'thumbnail': params['thumbnail_src'],
+            'thumbnail': thumbnail,
         }
         return [info]
 
@@ -3685,7 +3696,9 @@ class FunnyOrDieIE(InfoExtractor):
 
         m = re.search(r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", webpage, flags=re.DOTALL)
         if not m:
-            self._downloader.trouble(u'Cannot find video title')
+            m = re.search(r'<title>(?P<title>[^<]+?)</title>', webpage)
+            if not m:
+                self._downloader.trouble(u'Cannot find video title')
         title = clean_html(m.group('title'))
 
         m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
@@ -4119,7 +4132,7 @@ class KeekIE(InfoExtractor):
         video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
         thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
         webpage = self._download_webpage(url, video_id)
-        m = re.search(r'<meta property="og:title" content="(?P<title>.+)"', webpage)
+        m = re.search(r'<meta property="og:title" content="(?P<title>.*?)"', webpage)
         title = unescapeHTML(m.group('title'))
         m = re.search(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', webpage)
         uploader = clean_html(m.group('uploader'))
@@ -4344,6 +4357,46 @@ class LiveLeakIE(InfoExtractor):
 
         return [info]
 
+class ARDIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+    _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
+    _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
+
+    def _real_extract(self, url):
+        # determine video id from url
+        m = re.match(self._VALID_URL, url)
+
+        numid = re.search(r'documentId=([0-9]+)', url)
+        if numid:
+            video_id = numid.group(1)
+        else:
+            video_id = m.group('video_id')
+
+        # determine title and media streams from webpage
+        html = self._download_webpage(url, video_id)
+        title = re.search(self._TITLE, html).group('title')
+        streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
+        if not streams:
+            assert '"fsk"' in html
+            self._downloader.report_error(u'this video is only available after 8:00 pm')
+            return
+
+        # choose default media type and highest quality for now
+        stream = max([s for s in streams if int(s["media_type"]) == 0],
+                     key=lambda s: int(s["quality"]))
+
+        # there's two possibilities: RTMP stream or HTTP download
+        info = {'id': video_id, 'title': title, 'ext': 'mp4'}
+        if stream['rtmp_url']:
+            self._downloader.to_screen(u'[%s] RTMP download detected' % self.IE_NAME)
+            assert stream['video_url'].startswith('mp4:')
+            info["url"] = stream["rtmp_url"]
+            info["play_path"] = stream['video_url']
+        else:
+            assert stream["video_url"].endswith('.mp4')
+            info["url"] = stream["video_url"]
+        return [info]
+
 
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
@@ -4397,5 +4450,6 @@ def gen_extractors():
         MySpassIE(),
         SpiegelIE(),
         LiveLeakIE(),
+        ARDIE(),
         GenericIE()
     ]
index 807b735412a674bb4d2098a2a057c0e91266ad68..f46143e01b38c5af7cc2999402bbfa2f2ae51d7e 100644 (file)
@@ -24,6 +24,7 @@ __authors__  = (
     'Jaime Marquínez Ferrándiz',
     'Jeff Crouse',
     'Osama Khalid',
+    'Michael Walter',
     )
 
 __license__ = 'Public Domain'
@@ -223,6 +224,9 @@ def parseOpts():
             help='display progress in console titlebar', default=False)
     verbosity.add_option('-v', '--verbose',
             action='store_true', dest='verbose', help='print various debugging information', default=False)
+    verbosity.add_option('--dump-intermediate-pages',
+            action='store_true', dest='dump_intermediate_pages', default=False,
+            help='print downloaded pages to debug problems(very verbose)')
 
     filesystem.add_option('-t', '--title',
             action='store_true', dest='usetitle', help='use title in file name', default=False)
@@ -235,6 +239,9 @@ def parseOpts():
             help='number downloaded files starting from 00000', default=False)
     filesystem.add_option('-o', '--output',
             dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')
+    filesystem.add_option('--autonumber-size',
+            dest='autonumber_size', metavar='NUMBER',
+            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
     filesystem.add_option('--restrict-filenames',
             action='store_true', dest='restrictfilenames',
             help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
@@ -451,6 +458,7 @@ def _real_main():
         'format_limit': opts.format_limit,
         'listformats': opts.listformats,
         'outtmpl': outtmpl,
+        'autonumber_size': opts.autonumber_size,
         'restrictfilenames': opts.restrictfilenames,
         'ignoreerrors': opts.ignoreerrors,
         'ratelimit': opts.ratelimit,
@@ -480,6 +488,7 @@ def _real_main():
         'max_downloads': opts.max_downloads,
         'prefer_free_formats': opts.prefer_free_formats,
         'verbose': opts.verbose,
+        'dump_intermediate_pages': opts.dump_intermediate_pages,
         'test': opts.test,
         'keepvideo': opts.keepvideo,
         'min_filesize': opts.min_filesize,
index 7022ea4bec75fb864cd58c3c3c9b5f2a15bc5d7a..3fe29c91f416e0d6c957ed750d3f0f69950dc9c0 100755 (executable)
@@ -9,7 +9,8 @@ import sys
 if __package__ is None and not hasattr(sys, "frozen"):
     # direct call of __main__.py
     import os.path
-    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    path = os.path.realpath(os.path.abspath(__file__))
+    sys.path.append(os.path.dirname(os.path.dirname(path)))
 
 import youtube_dl
 
index b446dd94c9741b3f20524d1908771e1ecfa62bef..d6e29387586194be2e23126f2cd430bae7a4c37b 100644 (file)
@@ -37,7 +37,7 @@ def rsa_verify(message, signature, key):
 def update_self(to_screen, verbose, filename):
     """Update the program file with the latest version from the repository"""
 
-    UPDATE_URL = "http://rg3.github.com/youtube-dl/update/"
+    UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
     VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
     JSON_URL = UPDATE_URL + 'versions.json'
     UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
index c433e2eaa293c43a2bce6898a6dade24044422e6..2fd5f40c80c9cfab4964ad7d524aa488bbef57de 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2013.04.03'
+__version__ = '2013.04.18'