From: Philipp Hagemeister Date: Tue, 27 Nov 2012 17:48:43 +0000 (+0100) Subject: Merge remote-tracking branch 'Asido/master' X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=1a9c655e3b1569f315d4193e877cba0b4a863c63;hp=bae611f216ac7b1f1a24a506da6dffc518d09d5b Merge remote-tracking branch 'Asido/master' --- diff --git a/.gitignore b/.gitignore index e51512d4f..b2163f118 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *~ wine-py2exe/ py2exe.log +*.kate-swp diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..03947b1eb --- /dev/null +++ b/.travis.yml @@ -0,0 +1,9 @@ +language: python +#specify the python version +python: + - "2.6" + - "2.7" +#command to install the setup +install: +# command to run tests +script: nosetests test --nocapture diff --git a/LATEST_VERSION b/LATEST_VERSION index 0c8dc502a..d645a4c7a 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2012.02.27 +2012.11.29 diff --git a/Makefile b/Makefile index e2da02e96..aea967148 100644 --- a/Makefile +++ b/Makefile @@ -1,26 +1,57 @@ -default: update +all: youtube-dl README.md youtube-dl.1 youtube-dl.bash-completion LATEST_VERSION +# TODO: re-add youtube-dl.exe, and make sure it's 1. safe and 2. doesn't need sudo -update: compile update-readme update-latest +clean: + rm -f youtube-dl youtube-dl.exe youtube-dl.1 LATEST_VERSION -update-latest: - ./youtube-dl.dev --version > LATEST_VERSION +PREFIX=/usr/local +BINDIR=$(PREFIX)/bin +MANDIR=$(PREFIX)/man +SYSCONFDIR=/etc -update-readme: - @options=$$(COLUMNS=80 ./youtube-dl.dev --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/### \1/') && \ - header=$$(sed -e '/.*## OPTIONS/,$$ d' README.md) && \ - footer=$$(sed -e '1,/.*## FAQ/ d' README.md) && \ +install: youtube-dl youtube-dl.1 youtube-dl.bash-completion + install -d $(DESTDIR)$(BINDIR) + install -m 755 youtube-dl $(DESTDIR)$(BINDIR) + install -d $(DESTDIR)$(MANDIR)/man1 + install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1 + install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d + install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl + +test: + nosetests2 --nocapture test + +.PHONY: all clean install test README.md youtube-dl.bash-completion +# TODO un-phony README.md and youtube-dl.bash_completion by reading from .in files and generating from them + +youtube-dl: youtube_dl/*.py + zip --quiet --junk-paths youtube-dl youtube_dl/*.py + echo '#!/usr/bin/env python' > youtube-dl + cat youtube-dl.zip >> youtube-dl + rm youtube-dl.zip + chmod a+x youtube-dl + +youtube-dl.exe: youtube_dl/*.py + bash devscripts/wine-py2exe.sh build_exe.py + +README.md: youtube_dl/*.py + @options=$$(COLUMNS=80 python -m youtube_dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/## \1/') && \ + header=$$(sed -e '/.*# OPTIONS/,$$ d' README.md) && \ + footer=$$(sed -e '1,/.*# CONFIGURATION/ d' README.md) && \ echo "$${header}" > README.md && \ echo >> README.md && \ - echo '## OPTIONS' >> README.md && \ + echo '# OPTIONS' >> README.md && \ echo "$${options}" >> README.md&& \ echo >> README.md && \ - echo '## FAQ' >> README.md && \ + echo '# CONFIGURATION' >> README.md && \ echo "$${footer}" >> README.md -compile: - zip --quiet --junk-paths youtube-dl youtube_dl/*.py - echo '#!/usr/bin/env python' > youtube-dl - cat youtube-dl.zip >> youtube-dl - rm youtube-dl.zip +youtube-dl.1: README.md + pandoc -s -w man README.md -o youtube-dl.1 + +youtube-dl.bash-completion: README.md + @options=`egrep -o '(--[a-z-]+) ' README.md | sort -u | xargs echo` && \ + content=`sed "s/opts=\"[^\"]*\"/opts=\"$${options}\"/g" youtube-dl.bash-completion` && \ + echo "$${content}" > youtube-dl.bash-completion -.PHONY: default compile update update-latest update-readme +LATEST_VERSION: youtube_dl/__init__.py + python -m youtube_dl --version > LATEST_VERSION diff --git a/README.md b/README.md index f04b96128..f2567e077 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,36 @@ -# youtube-dl +% YOUTUBE-DL(1) -## USAGE -youtube-dl [options] url [url...] +# NAME +youtube-dl -## DESCRIPTION +# SYNOPSIS +**youtube-dl** [OPTIONS] URL [URL...] + +# DESCRIPTION **youtube-dl** is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 6), and it is not platform specific. It should work in your Unix box, in Windows or in Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. -## OPTIONS +# OPTIONS -h, --help print this help text and exit --version print program version and exit -U, --update update this program to latest version -i, --ignore-errors continue on download errors -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) -R, --retries RETRIES number of retries (default is 10) + --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) (default + is 1024) + --no-resize-buffer do not automatically adjust the buffer size. By + default, the buffer size is automatically resized + from an initial value of SIZE. --dump-user-agent display the current browser identification + --user-agent UA specify a custom user agent --list-extractors List all supported extractors and the URLs they would handle -### Video Selection: +## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) --playlist-end NUMBER playlist video to end at (default is last) --match-title REGEX download only matching titles (regex or caseless @@ -30,17 +39,21 @@ which means you can modify it, redistribute it or use it however you like. caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files -### Filesystem Options: +## Filesystem Options: -t, --title use title in file name - -l, --literal use literal title in file name + --id use video ID in file name + -l, --literal [deprecated] alias of --title -A, --auto-number number downloaded files starting from 00000 - -o, --output TEMPLATE output filename template. Use %(stitle)s to get the + -o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, - %(upload_date)s for the upload date (YYYYMMDD), and - %% for a literal percent. Use - to output to - stdout. + %(upload_date)s for the upload date (YYYYMMDD), + %(extractor)s for the provider (youtube, metacafe, + etc), %(id)s for the video id and %% for a literal + percent. Use - to output to stdout. + --restrict-filenames Restrict filenames to only ASCII characters, and + avoid "&" and spaces in filenames -a, --batch-file FILE file containing URLs to download ('-' for stdin) -w, --no-overwrites do not overwrite files -c, --continue resume partially downloaded files @@ -53,7 +66,7 @@ which means you can modify it, redistribute it or use it however you like. --write-description write video description to a .description file --write-info-json write video metadata to a .info.json file -### Verbosity / Simulation Options: +## Verbosity / Simulation Options: -q, --quiet activates quiet mode -s, --simulate do not download the video and do not write anything to disk @@ -68,7 +81,7 @@ which means you can modify it, redistribute it or use it however you like. --console-title display progress in console titlebar -v, --verbose print various debugging information -### Video Format Options: +## Video Format Options: -f, --format FORMAT video format code --all-formats download all available video formats --prefer-free-formats prefer free video formats unless a specific one is @@ -80,22 +93,49 @@ which means you can modify it, redistribute it or use it however you like. --srt-lang LANG language of the closed captions to download (optional) use IETF language tags like 'en' -### Authentication Options: +## Authentication Options: -u, --username USERNAME account username -p, --password PASSWORD account password -n, --netrc use .netrc authentication data -### Post-processing Options: - --extract-audio convert video files to audio-only files (requires +## Post-processing Options: + -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default - --audio-quality QUALITY ffmpeg/avconv audio bitrate specification, 128k by - default + --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a + value between 0 (better) and 9 (worse) for VBR or a + specific bitrate like 128K (default 5) -k, --keep-video keeps the video file on disk after the post- processing; the video is erased by default -## FAQ +# CONFIGURATION + +You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`. + +# OUTPUT TEMPLATE + +The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are: + + - `id`: The sequence will be replaced by the video identifier. + - `url`: The sequence will be replaced by the video URL. + - `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video. + - `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format. + - `title`: The sequence will be replaced by the video title. + - `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4). + - `epoch`: The sequence will be replaced by the Unix epoch when creating the file. + - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. + +The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment). + +In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: + + $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc + youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters + $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames + youtube-dl_test_video_.mp4 # A simple file name + +# FAQ ### Can you please put the -b option back? @@ -117,13 +157,42 @@ The URLs youtube-dl outputs require the downloader to have the correct cookies. youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. -## COPYRIGHT +### ERROR: unable to download video ### + +youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. + +### SyntaxError: Non-ASCII character ### + +The error + + File "youtube-dl", line 2 + SyntaxError: Non-ASCII character '\x93' ... + +means you're using an outdated version of Python. Please update to Python 2.6 or 2.7. + +To run youtube-dl under Python 2.5, you'll have to manually check it out like this: + + git clone git://github.com/rg3/youtube-dl.git + cd youtube-dl + python -m youtube_dl --help + +Please note that Python 2.5 is not supported anymore. + +### What is this binary file? Where has the code gone? + +Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`. + +### The exe throws a *Runtime error from Visual C++* + +To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). + +# COPYRIGHT youtube-dl is released into the public domain by the copyright holders. This README file was originally written by Daniel Bolton () and is likewise released into the public domain. -## BUGS +# BUGS Bugs and suggestions should be reported at: diff --git a/devscripts/posix-locale.sh b/devscripts/posix-locale.sh old mode 100644 new mode 100755 diff --git a/devscripts/release.sh b/devscripts/release.sh new file mode 100755 index 000000000..963a6c22b --- /dev/null +++ b/devscripts/release.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi +version="$1" +if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi +if [ ! -z "`git status --porcelain`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi +sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/__init__.py +make all +git add -A +git commit -m "release $version" +git tag -m "Release $version" "$version" \ No newline at end of file diff --git a/devscripts/wine-py2exe.sh b/devscripts/wine-py2exe.sh old mode 100644 new mode 100755 diff --git a/test/parameters.json b/test/parameters.json new file mode 100644 index 000000000..cc2b017eb --- /dev/null +++ b/test/parameters.json @@ -0,0 +1 @@ +{"username": null, "listformats": null, "skip_download": false, "usenetrc": false, "max_downloads": null, "noprogress": false, "forcethumbnail": false, "forceformat": false, "format_limit": null, "ratelimit": null, "nooverwrites": false, "forceurl": false, "writeinfojson": false, "simulate": false, "playliststart": 1, "continuedl": true, "password": null, "prefer_free_formats": false, "nopart": false, "retries": 10, "updatetime": true, "consoletitle": false, "verbose": true, "forcefilename": false, "ignoreerrors": false, "logtostderr": false, "format": null, "subtitleslang": null, "quiet": false, "outtmpl": "%(id)s.%(ext)s", "rejecttitle": null, "playlistend": -1, "writedescription": false, "forcetitle": false, "forcedescription": false, "writesubtitles": false, "matchtitle": null} \ No newline at end of file diff --git a/test/test_div.py b/test/test_div.py deleted file mode 100644 index 4d4819b3c..000000000 --- a/test/test_div.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- - -# Various small unit tests - -import os,sys -sys.path.append(os.path.dirname(os.path.dirname(__file__))) - -import youtube_dl - -def test_simplify_title(): - assert youtube_dl._simplify_title(u'abc') == u'abc' - assert youtube_dl._simplify_title(u'abc_d-e') == u'abc_d-e' - - assert youtube_dl._simplify_title(u'123') == u'123' - - assert u'/' not in youtube_dl._simplify_title(u'abc/de') - assert u'abc' in youtube_dl._simplify_title(u'abc/de') - assert u'de' in youtube_dl._simplify_title(u'abc/de') - assert u'/' not in youtube_dl._simplify_title(u'abc/de///') - - assert u'\\' not in youtube_dl._simplify_title(u'abc\\de') - assert u'abc' in youtube_dl._simplify_title(u'abc\\de') - assert u'de' in youtube_dl._simplify_title(u'abc\\de') - - assert youtube_dl._simplify_title(u'ä') == u'ä' - assert youtube_dl._simplify_title(u'кириллица') == u'кириллица' - - # Strip underlines - assert youtube_dl._simplify_title(u'\'a_') == u'a' diff --git a/test/test_download.py b/test/test_download.py new file mode 100644 index 000000000..d1d6b119b --- /dev/null +++ b/test/test_download.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python2 +import unittest +import hashlib +import os +import json + +from youtube_dl.FileDownloader import FileDownloader +from youtube_dl.InfoExtractors import YoutubeIE, DailymotionIE +from youtube_dl.InfoExtractors import MetacafeIE, BlipTVIE +from youtube_dl.InfoExtractors import XVideosIE, VimeoIE +from youtube_dl.InfoExtractors import SoundcloudIE, StanfordOpenClassroomIE +from youtube_dl.InfoExtractors import CollegeHumorIE, XNXXIE + + +class DownloadTest(unittest.TestCase): + PARAMETERS_FILE = "test/parameters.json" + #calculated with md5sum: + #md5sum (GNU coreutils) 8.19 + + YOUTUBE_SIZE = 1993883 + YOUTUBE_URL = "http://www.youtube.com/watch?v=BaW_jenozKc" + YOUTUBE_FILE = "BaW_jenozKc.mp4" + + DAILYMOTION_MD5 = "d363a50e9eb4f22ce90d08d15695bb47" + DAILYMOTION_URL = "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech" + DAILYMOTION_FILE = "x33vw9.mp4" + + METACAFE_SIZE = 5754305 + METACAFE_URL = "http://www.metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/" + METACAFE_FILE = "_aUehQsCQtM.flv" + + BLIP_MD5 = "93c24d2f4e0782af13b8a7606ea97ba7" + BLIP_URL = "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352" + BLIP_FILE = "5779306.m4v" + + XVIDEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf" + XVIDEO_URL = "http://www.xvideos.com/video939581/funny_porns_by_s_-1" + XVIDEO_FILE = "939581.flv" + + VIMEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf" + VIMEO_URL = "http://vimeo.com/14160053" + VIMEO_FILE = "" + + VIMEO2_MD5 = "" + VIMEO2_URL = "http://player.vimeo.com/video/47019590" + VIMEO2_FILE = "" + + SOUNDCLOUD_MD5 = "ce3775768ebb6432fa8495d446a078ed" + SOUNDCLOUD_URL = "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy" + SOUNDCLOUD_FILE = "n6FLbx6ZzMiu.mp3" + + STANDFORD_MD5 = "22c8206291368c4e2c9c1a307f0ea0f4" + STANDFORD_URL = "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100" + STANDFORD_FILE = "PracticalUnix_intro-environment.mp4" + + COLLEGEHUMOR_MD5 = "" + COLLEGEHUMOR_URL = "http://www.collegehumor.com/video/6830834/mitt-romney-style-gangnam-style-parody" + COLLEGEHUMOR_FILE = "" + + XNXX_MD5 = "5f0469c8d1dfd1bc38c8e6deb5e0a21d" + XNXX_URL = "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_" + XNXX_FILE = "1135332.flv" + + def test_youtube(self): + #let's download a file from youtube + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(YoutubeIE()) + fd.download([DownloadTest.YOUTUBE_URL]) + self.assertTrue(os.path.exists(DownloadTest.YOUTUBE_FILE)) + self.assertEqual(os.path.getsize(DownloadTest.YOUTUBE_FILE), DownloadTest.YOUTUBE_SIZE) + + def test_dailymotion(self): + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(DailymotionIE()) + fd.download([DownloadTest.DAILYMOTION_URL]) + self.assertTrue(os.path.exists(DownloadTest.DAILYMOTION_FILE)) + md5_down_file = md5_for_file(DownloadTest.DAILYMOTION_FILE) + self.assertEqual(md5_down_file, DownloadTest.DAILYMOTION_MD5) + + def test_metacafe(self): + #this emulate a skip,to be 2.6 compatible + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(MetacafeIE()) + fd.add_info_extractor(YoutubeIE()) + fd.download([DownloadTest.METACAFE_URL]) + self.assertTrue(os.path.exists(DownloadTest.METACAFE_FILE)) + self.assertEqual(os.path.getsize(DownloadTest.METACAFE_FILE), DownloadTest.METACAFE_SIZE) + + def test_blip(self): + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(BlipTVIE()) + fd.download([DownloadTest.BLIP_URL]) + self.assertTrue(os.path.exists(DownloadTest.BLIP_FILE)) + md5_down_file = md5_for_file(DownloadTest.BLIP_FILE) + self.assertEqual(md5_down_file, DownloadTest.BLIP_MD5) + + def test_xvideo(self): + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(XVideosIE()) + fd.download([DownloadTest.XVIDEO_URL]) + self.assertTrue(os.path.exists(DownloadTest.XVIDEO_FILE)) + md5_down_file = md5_for_file(DownloadTest.XVIDEO_FILE) + self.assertEqual(md5_down_file, DownloadTest.XVIDEO_MD5) + + def test_vimeo(self): + #skipped for the moment produce an error + return + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(VimeoIE()) + fd.download([DownloadTest.VIMEO_URL]) + self.assertTrue(os.path.exists(DownloadTest.VIMEO_FILE)) + md5_down_file = md5_for_file(DownloadTest.VIMEO_FILE) + self.assertEqual(md5_down_file, DownloadTest.VIMEO_MD5) + + def test_vimeo2(self): + #skipped for the moment produce an error + return + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(VimeoIE()) + fd.download([DownloadTest.VIMEO2_URL]) + self.assertTrue(os.path.exists(DownloadTest.VIMEO2_FILE)) + md5_down_file = md5_for_file(DownloadTest.VIMEO2_FILE) + self.assertEqual(md5_down_file, DownloadTest.VIMEO2_MD5) + + def test_soundcloud(self): + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(SoundcloudIE()) + fd.download([DownloadTest.SOUNDCLOUD_URL]) + self.assertTrue(os.path.exists(DownloadTest.SOUNDCLOUD_FILE)) + md5_down_file = md5_for_file(DownloadTest.SOUNDCLOUD_FILE) + self.assertEqual(md5_down_file, DownloadTest.SOUNDCLOUD_MD5) + + def test_standford(self): + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(StanfordOpenClassroomIE()) + fd.download([DownloadTest.STANDFORD_URL]) + self.assertTrue(os.path.exists(DownloadTest.STANDFORD_FILE)) + md5_down_file = md5_for_file(DownloadTest.STANDFORD_FILE) + self.assertEqual(md5_down_file, DownloadTest.STANDFORD_MD5) + + def test_collegehumor(self): + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(CollegeHumorIE()) + fd.download([DownloadTest.COLLEGEHUMOR_URL]) + self.assertTrue(os.path.exists(DownloadTest.COLLEGEHUMOR_FILE)) + md5_down_file = md5_for_file(DownloadTest.COLLEGEHUMOR_FILE) + self.assertEqual(md5_down_file, DownloadTest.COLLEGEHUMOR_MD5) + + def test_xnxx(self): + with open(DownloadTest.PARAMETERS_FILE) as f: + fd = FileDownloader(json.load(f)) + fd.add_info_extractor(XNXXIE()) + fd.download([DownloadTest.XNXX_URL]) + self.assertTrue(os.path.exists(DownloadTest.XNXX_FILE)) + md5_down_file = md5_for_file(DownloadTest.XNXX_FILE) + self.assertEqual(md5_down_file, DownloadTest.XNXX_MD5) + + def tearDown(self): + if os.path.exists(DownloadTest.YOUTUBE_FILE): + os.remove(DownloadTest.YOUTUBE_FILE) + if os.path.exists(DownloadTest.DAILYMOTION_FILE): + os.remove(DownloadTest.DAILYMOTION_FILE) + if os.path.exists(DownloadTest.METACAFE_FILE): + os.remove(DownloadTest.METACAFE_FILE) + if os.path.exists(DownloadTest.BLIP_FILE): + os.remove(DownloadTest.BLIP_FILE) + if os.path.exists(DownloadTest.XVIDEO_FILE): + os.remove(DownloadTest.XVIDEO_FILE) + if os.path.exists(DownloadTest.VIMEO_FILE): + os.remove(DownloadTest.VIMEO_FILE) + if os.path.exists(DownloadTest.SOUNDCLOUD_FILE): + os.remove(DownloadTest.SOUNDCLOUD_FILE) + if os.path.exists(DownloadTest.STANDFORD_FILE): + os.remove(DownloadTest.STANDFORD_FILE) + if os.path.exists(DownloadTest.COLLEGEHUMOR_FILE): + os.remove(DownloadTest.COLLEGEHUMOR_FILE) + if os.path.exists(DownloadTest.XNXX_FILE): + os.remove(DownloadTest.XNXX_FILE) + +def md5_for_file(filename, block_size=2**20): + with open(filename) as f: + md5 = hashlib.md5() + while True: + data = f.read(block_size) + if not data: + break + md5.update(data) + return md5.hexdigest() diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 000000000..a3a23fbb4 --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- + +# Various small unit tests + +import unittest + +#from youtube_dl.utils import htmlentity_transform +from youtube_dl.utils import timeconvert +from youtube_dl.utils import sanitize_filename +from youtube_dl.utils import unescapeHTML +from youtube_dl.utils import orderedSet + + +class TestUtil(unittest.TestCase): + def test_timeconvert(self): + self.assertTrue(timeconvert('') is None) + self.assertTrue(timeconvert('bougrg') is None) + + def test_sanitize_filename(self): + self.assertEqual(sanitize_filename(u'abc'), u'abc') + self.assertEqual(sanitize_filename(u'abc_d-e'), u'abc_d-e') + + self.assertEqual(sanitize_filename(u'123'), u'123') + + self.assertEqual(u'abc_de', sanitize_filename(u'abc/de')) + self.assertFalse(u'/' in sanitize_filename(u'abc/de///')) + + self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de')) + self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|')) + self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) + self.assertEqual(u'this - that', sanitize_filename(u'this: that')) + + self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T') + self.assertEqual(sanitize_filename(u'ä'), u'ä') + self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') + + forbidden = u'"\0\\/' + for fc in forbidden: + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc)) + + def test_sanitize_filename_restricted(self): + self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc') + self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e') + + self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') + + self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True)) + self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) + + self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) + self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True)) + self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True)) + self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) + + self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c') + self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename + + forbidden = u'"\0\\/&!: \'\t\n' + for fc in forbidden: + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) + + # Handle a common case more neatly + self.assertEqual(sanitize_filename(u'大声带 - Song', restricted=True), u'Song') + self.assertEqual(sanitize_filename(u'总统: Speech', restricted=True), u'Speech') + # .. but make sure the file name is never empty + self.assertTrue(sanitize_filename(u'-', restricted=True) != u'') + self.assertTrue(sanitize_filename(u':', restricted=True) != u'') + + def test_ordered_set(self): + self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7]) + self.assertEqual(orderedSet([]), []) + self.assertEqual(orderedSet([1]), [1]) + #keep the list ordered + self.assertEqual(orderedSet([135,1,1,1]), [135,1]) + + def test_unescape_html(self): + self.assertEqual(unescapeHTML(u"%20;"), u"%20;") diff --git a/youtube-dl b/youtube-dl index d89b5a984..ca4e467ff 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.1 b/youtube-dl.1 new file mode 100644 index 000000000..4508622d2 --- /dev/null +++ b/youtube-dl.1 @@ -0,0 +1,306 @@ +.TH YOUTUBE-DL 1 "" +.SH NAME +.PP +youtube-dl +.SH SYNOPSIS +.PP +\f[B]youtube-dl\f[] [OPTIONS] URL [URL...] +.SH DESCRIPTION +.PP +\f[B]youtube-dl\f[] is a small command-line program to download videos +from YouTube.com and a few more sites. +It requires the Python interpreter, version 2.x (x being at least 6), +and it is not platform specific. +It should work in your Unix box, in Windows or in Mac OS X. +It is released to the public domain, which means you can modify it, +redistribute it or use it however you like. +.SH OPTIONS +.IP +.nf +\f[C] +-h,\ --help\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ this\ help\ text\ and\ exit +--version\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ program\ version\ and\ exit +-U,\ --update\ \ \ \ \ \ \ \ \ \ \ \ \ update\ this\ program\ to\ latest\ version +-i,\ --ignore-errors\ \ \ \ \ \ continue\ on\ download\ errors +-r,\ --rate-limit\ LIMIT\ \ \ download\ rate\ limit\ (e.g.\ 50k\ or\ 44.6m) +-R,\ --retries\ RETRIES\ \ \ \ number\ of\ retries\ (default\ is\ 10) +--buffer-size\ SIZE\ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16k)\ (default +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ 1024) +--no-resize-buffer\ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer\ size.\ By +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ the\ buffer\ size\ is\ automatically\ resized +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ an\ initial\ value\ of\ SIZE. +--dump-user-agent\ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification +--user-agent\ UA\ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent +--list-extractors\ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ would\ handle +\f[] +.fi +.SS Video Selection: +.IP +.nf +\f[C] +--playlist-start\ NUMBER\ \ playlist\ video\ to\ start\ at\ (default\ is\ 1) +--playlist-end\ NUMBER\ \ \ \ playlist\ video\ to\ end\ at\ (default\ is\ last) +--match-title\ REGEX\ \ \ \ \ \ download\ only\ matching\ titles\ (regex\ or\ caseless +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ sub-string) +--reject-title\ REGEX\ \ \ \ \ skip\ download\ for\ matching\ titles\ (regex\ or +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ caseless\ sub-string) +--max-downloads\ NUMBER\ \ \ Abort\ after\ downloading\ NUMBER\ files +\f[] +.fi +.SS Filesystem Options: +.IP +.nf +\f[C] +-t,\ --title\ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ title\ in\ file\ name +--id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ video\ ID\ in\ file\ name +-l,\ --literal\ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ --title +-A,\ --auto-number\ \ \ \ \ \ \ \ number\ downloaded\ files\ starting\ from\ 00000 +-o,\ --output\ TEMPLATE\ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to\ get\ the +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ title,\ %(uploader)s\ for\ the\ uploader\ name, +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(autonumber)s\ to\ get\ an\ automatically\ incremented +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ number,\ %(ext)s\ for\ the\ filename\ extension, +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(upload_date)s\ for\ the\ upload\ date\ (YYYYMMDD), +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. +--restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames +-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) +-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files +-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files +--no-continue\ \ \ \ \ \ \ \ \ \ \ \ do\ not\ resume\ partially\ downloaded\ files\ (restart +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ beginning) +--cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ file\ to\ read\ cookies\ from\ and\ dump\ cookie\ jar\ in +--no-part\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ .part\ files +--no-mtime\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ the\ Last-modified\ header\ to\ set\ the\ file +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ modification\ time +--write-description\ \ \ \ \ \ write\ video\ description\ to\ a\ .description\ file +--write-info-json\ \ \ \ \ \ \ \ write\ video\ metadata\ to\ a\ .info.json\ file +\f[] +.fi +.SS Verbosity / Simulation Options: +.IP +.nf +\f[C] +-q,\ --quiet\ \ \ \ \ \ \ \ \ \ \ \ \ \ activates\ quiet\ mode +-s,\ --simulate\ \ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video\ and\ do\ not\ write\ anything +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ to\ disk +--skip-download\ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video +-g,\ --get-url\ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ URL +-e,\ --get-title\ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ title +--get-thumbnail\ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ thumbnail\ URL +--get-description\ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ video\ description +--get-filename\ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ filename +--get-format\ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ format +--no-progress\ \ \ \ \ \ \ \ \ \ \ \ do\ not\ print\ progress\ bar +--console-title\ \ \ \ \ \ \ \ \ \ display\ progress\ in\ console\ titlebar +-v,\ --verbose\ \ \ \ \ \ \ \ \ \ \ \ print\ various\ debugging\ information +\f[] +.fi +.SS Video Format Options: +.IP +.nf +\f[C] +-f,\ --format\ FORMAT\ \ \ \ \ \ video\ format\ code +--all-formats\ \ \ \ \ \ \ \ \ \ \ \ download\ all\ available\ video\ formats +--prefer-free-formats\ \ \ \ prefer\ free\ video\ formats\ unless\ a\ specific\ one\ is +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ requested +--max-quality\ FORMAT\ \ \ \ \ highest\ quality\ format\ to\ download +-F,\ --list-formats\ \ \ \ \ \ \ list\ all\ available\ formats\ (currently\ youtube\ only) +--write-srt\ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ video\ closed\ captions\ to\ a\ .srt\ file +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only) +--srt-lang\ LANG\ \ \ \ \ \ \ \ \ \ language\ of\ the\ closed\ captions\ to\ download +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (optional)\ use\ IETF\ language\ tags\ like\ \[aq]en\[aq] +\f[] +.fi +.SS Authentication Options: +.IP +.nf +\f[C] +-u,\ --username\ USERNAME\ \ account\ username +-p,\ --password\ PASSWORD\ \ account\ password +-n,\ --netrc\ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ .netrc\ authentication\ data +\f[] +.fi +.SS Post-processing Options: +.IP +.nf +\f[C] +-x,\ --extract-audio\ \ \ \ \ \ convert\ video\ files\ to\ audio-only\ files\ (requires +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ffmpeg\ or\ avconv\ and\ ffprobe\ or\ avprobe) +--audio-format\ FORMAT\ \ \ \ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ or\ "wav"; +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ best\ by\ default +--audio-quality\ QUALITY\ \ ffmpeg/avconv\ audio\ quality\ specification,\ insert\ a +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ value\ between\ 0\ (better)\ and\ 9\ (worse)\ for\ VBR\ or\ a +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ specific\ bitrate\ like\ 128K\ (default\ 5) +-k,\ --keep-video\ \ \ \ \ \ \ \ \ keeps\ the\ video\ file\ on\ disk\ after\ the\ post- +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processing;\ the\ video\ is\ erased\ by\ default +\f[] +.fi +.SH CONFIGURATION +.PP +You can configure youtube-dl by placing default arguments (such as +\f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not +copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or +\f[C]~/.local/config/youtube-dl.conf\f[]. +.SH OUTPUT TEMPLATE +.PP +The \f[C]-o\f[] option allows users to indicate a template for the +output file names. +The basic usage is not to set any template arguments when downloading a +single file, like in +\f[C]youtube-dl\ -o\ funny_video.flv\ "http://some/video"\f[]. +However, it may contain special sequences that will be replaced when +downloading each video. +The special sequences have the format \f[C]%(NAME)s\f[]. +To clarify, that is a percent symbol followed by a name in parenthesis, +followed by a lowercase S. +Allowed names are: +.IP \[bu] 2 +\f[C]id\f[]: The sequence will be replaced by the video identifier. +.IP \[bu] 2 +\f[C]url\f[]: The sequence will be replaced by the video URL. +.IP \[bu] 2 +\f[C]uploader\f[]: The sequence will be replaced by the nickname of the +person who uploaded the video. +.IP \[bu] 2 +\f[C]upload_date\f[]: The sequence will be replaced by the upload date +in YYYYMMDD format. +.IP \[bu] 2 +\f[C]title\f[]: The sequence will be replaced by the video title. +.IP \[bu] 2 +\f[C]ext\f[]: The sequence will be replaced by the appropriate extension +(like flv or mp4). +.IP \[bu] 2 +\f[C]epoch\f[]: The sequence will be replaced by the Unix epoch when +creating the file. +.IP \[bu] 2 +\f[C]autonumber\f[]: The sequence will be replaced by a five-digit +number that will be increased with each download, starting at zero. +.PP +The current default template is \f[C]%(id)s.%(ext)s\f[], but that will +be switchted to \f[C]%(title)s-%(id)s.%(ext)s\f[] (which can be +requested with \f[C]-t\f[] at the moment). +.PP +In some cases, you don\[aq]t want special characters such as 中, spaces, +or &, such as when transferring the downloaded filename to a Windows +system or the filename through an 8bit-unsafe channel. +In these cases, add the \f[C]--restrict-filenames\f[] flag to get a +shorter title: +.IP +.nf +\f[C] +$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc +youtube-dl\ test\ video\ \[aq]\[aq]_ä↭𝕐.mp4\ \ \ \ #\ All\ kinds\ of\ weird\ characters +$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc\ --restrict-filenames +youtube-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name +\f[] +.fi +.SH FAQ +.SS Can you please put the -b option back? +.PP +Most people asking this question are not aware that youtube-dl now +defaults to downloading the highest available quality as reported by +YouTube, which will be 1080p or 720p in some cases, so you no longer +need the -b option. +For some specific videos, maybe YouTube does not report them to be +available in a specific high quality format you\[aq]\[aq]re interested +in. +In that case, simply request it with the -f option and youtube-dl will +try to download it. +.SS I get HTTP error 402 when trying to download a video. What\[aq]s +this? +.PP +Apparently YouTube requires you to pass a CAPTCHA test if you download +too much. +We\[aq]\[aq]re considering to provide a way to let you solve the +CAPTCHA (https://github.com/rg3/youtube-dl/issues/154), but at the +moment, your best course of action is pointing a webbrowser to the +youtube URL, solving the CAPTCHA, and restart youtube-dl. +.SS I have downloaded a video but how can I play it? +.PP +Once the video is fully downloaded, use any video player, such as +vlc (http://www.videolan.org) or mplayer (http://www.mplayerhq.hu/). +.SS The links provided by youtube-dl -g are not working anymore +.PP +The URLs youtube-dl outputs require the downloader to have the correct +cookies. +Use the \f[C]--cookies\f[] option to write the required cookies into a +file, and advise your downloader to read cookies from that file. +Some sites also require a common user agent to be used, use +\f[C]--dump-user-agent\f[] to see the one in use by youtube-dl. +.SS ERROR: no fmt_url_map or conn information found in video info +.PP +youtube has switched to a new video info format in July 2011 which is +not supported by old versions of youtube-dl. +You can update youtube-dl with \f[C]sudo\ youtube-dl\ --update\f[]. +.SS ERROR: unable to download video +.PP +youtube requires an additional signature since September 2012 which is +not supported by old versions of youtube-dl. +You can update youtube-dl with \f[C]sudo\ youtube-dl\ --update\f[]. +.SS SyntaxError: Non-ASCII character +.PP +The error +.IP +.nf +\f[C] +File\ "youtube-dl",\ line\ 2 +SyntaxError:\ Non-ASCII\ character\ \[aq]\\x93\[aq]\ ... +\f[] +.fi +.PP +means you\[aq]re using an outdated version of Python. +Please update to Python 2.6 or 2.7. +.PP +To run youtube-dl under Python 2.5, you\[aq]ll have to manually check it +out like this: +.IP +.nf +\f[C] +git\ clone\ git://github.com/rg3/youtube-dl.git +cd\ youtube-dl +python\ -m\ youtube_dl\ --help +\f[] +.fi +.PP +Please note that Python 2.5 is not supported anymore. +.SS What is this binary file? Where has the code gone? +.PP +Since June 2012 (#342) youtube-dl is packed as an executable zipfile, +simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on +some systems) or clone the git repository, as laid out above. +If you modify the code, you can run it by executing the +\f[C]__main__.py\f[] file. +To recompile the executable, run \f[C]make\ youtube-dl\f[]. +.SS The exe throws a \f[I]Runtime error from Visual C++\f[] +.PP +To run the exe you need to install first the Microsoft Visual C++ 2008 +Redistributable +Package (http://www.microsoft.com/en-us/download/details.aspx?id=29). +.SH COPYRIGHT +.PP +youtube-dl is released into the public domain by the copyright holders. +.PP +This README file was originally written by Daniel Bolton +() and is likewise released into the public +domain. +.SH BUGS +.PP +Bugs and suggestions should be reported at: + +.PP +Please include: +.IP \[bu] 2 +Your exact command line, like +\f[C]youtube-dl\ -t\ "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"\f[]. +A common mistake is not to escape the \f[C]&\f[]. +Putting URLs in quotes should solve this problem. +.IP \[bu] 2 +The output of \f[C]youtube-dl\ --version\f[] +.IP \[bu] 2 +The output of \f[C]python\ --version\f[] +.IP \[bu] 2 +The name and version of your Operating System ("Ubuntu 11.04 x64" or +"Windows 7 x64" is usually enough). diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion new file mode 100644 index 000000000..3a2f62efb --- /dev/null +++ b/youtube-dl.bash-completion @@ -0,0 +1,14 @@ +__youtube-dl() +{ + local cur prev opts + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --buffer-size --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --no-resize-buffer --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" + + if [[ ${cur} == * ]] ; then + COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) + return 0 + fi +} + +complete -F __youtube-dl youtube-dl diff --git a/youtube-dl.dev b/youtube-dl.dev deleted file mode 100755 index 473b7f42d..000000000 --- a/youtube-dl.dev +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import youtube_dl - -youtube_dl.main() diff --git a/youtube-dl.exe b/youtube-dl.exe old mode 100755 new mode 100644 index c55f5fa8d..2ee57c593 Binary files a/youtube-dl.exe and b/youtube-dl.exe differ diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 14e872a98..d7d5b1521 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -13,7 +13,7 @@ import urllib2 if os.name == 'nt': import ctypes - + from utils import * @@ -44,37 +44,40 @@ class FileDownloader(object): Available options: - username: Username for authentication purposes. - password: Password for authentication purposes. - usenetrc: Use netrc for authentication instead. - quiet: Do not print messages to stdout. - forceurl: Force printing final URL. - forcetitle: Force printing title. - forcethumbnail: Force printing thumbnail URL. - forcedescription: Force printing description. - forcefilename: Force printing final filename. - simulate: Do not download the video files. - format: Video format code. - format_limit: Highest quality format to try. - outtmpl: Template for output names. - ignoreerrors: Do not stop on download errors. - ratelimit: Download speed limit, in bytes/sec. - nooverwrites: Prevent overwriting files. - retries: Number of times to retry for HTTP error 5xx - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. - playliststart: Playlist item to start at. - playlistend: Playlist item to end at. - matchtitle: Download only matching titles. - rejecttitle: Reject downloads for matching titles. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. - nopart: Do not use temporary .part files. - updatetime: Use the Last-modified header to set output file timestamps. - writedescription: Write the video description to a .description file - writeinfojson: Write the video description to a .info.json file - writesubtitles: Write the video subtitles to a .srt file - subtitleslang: Language of the subtitles to download + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + forcethumbnail: Force printing thumbnail URL. + forcedescription: Force printing description. + forcefilename: Force printing final filename. + simulate: Do not download the video files. + format: Video format code. + format_limit: Highest quality format to try. + outtmpl: Template for output names. + restrictfilenames: Do not allow "&" and spaces in file names + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 5xx + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + playliststart: Playlist item to start at. + playlistend: Playlist item to end at. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file + writesubtitles: Write the video subtitles to a .srt file + subtitleslang: Language of the subtitles to download """ params = None @@ -93,6 +96,9 @@ class FileDownloader(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params + if '%(stitle)s' in self.params['outtmpl']: + self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') + @staticmethod def format_bytes(bytes): if bytes is None: @@ -139,23 +145,23 @@ class FileDownloader(object): new_min = max(bytes / 2.0, 1.0) new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB if elapsed_time < 0.001: - return long(new_max) + return int(new_max) rate = bytes / elapsed_time if rate > new_max: - return long(new_max) + return int(new_max) if rate < new_min: - return long(new_min) - return long(rate) + return int(new_min) + return int(rate) @staticmethod def parse_bytes(bytestr): - """Parse a string indicating a byte quantity into a long integer.""" + """Parse a string indicating a byte quantity into an integer.""" matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return long(round(number * multiplier)) + return int(round(number * multiplier)) def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -173,7 +179,6 @@ class FileDownloader(object): if not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator - if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr output = output.encode(preferredencoding(), 'ignore') self._screen_file.write(output) @@ -181,7 +186,8 @@ class FileDownloader(object): def to_stderr(self, message): """Print message to stderr.""" - print >>sys.stderr, message.encode(preferredencoding()) + assert type(message) == type(u'') + sys.stderr.write((message + u'\n').encode(preferredencoding())) def to_cons_title(self, message): """Set console/terminal window title to message.""" @@ -321,8 +327,10 @@ class FileDownloader(object): """Generate the output filename.""" try: template_dict = dict(info_dict) - template_dict['epoch'] = unicode(long(time.time())) - template_dict['autonumber'] = unicode('%05d' % self._num_downloads) + template_dict['epoch'] = int(time.time()) + template_dict['autonumber'] = u'%05d' % self._num_downloads + + template_dict = dict((k, sanitize_filename(compat_str(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items()) filename = self.params['outtmpl'] % template_dict return filename except (ValueError, KeyError), err: @@ -334,17 +342,22 @@ class FileDownloader(object): title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) - if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): - return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' + if matchtitle: + matchtitle = matchtitle.decode('utf8') + if not re.search(matchtitle, title, re.IGNORECASE): + return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) - if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): - return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' + if rejecttitle: + rejecttitle = rejecttitle.decode('utf8') + if re.search(rejecttitle, title, re.IGNORECASE): + return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" - info_dict['stitle'] = sanitize_filename(info_dict['title']) + # Keep for backwards compatibility + info_dict['stitle'] = info_dict['title'] reason = self._match_entry(info_dict) if reason is not None: @@ -357,20 +370,20 @@ class FileDownloader(object): raise MaxDownloadsReached() filename = self.prepare_filename(info_dict) - + # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceurl', False): - print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: - print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcedescription', False) and 'description' in info_dict: - print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcefilename', False) and filename is not None: - print filename.encode(preferredencoding(), 'xmlcharrefreplace') + print(filename.encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceformat', False): - print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')) # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -399,10 +412,10 @@ class FileDownloader(object): except (OSError, IOError): self.trouble(u'ERROR: Cannot write description file ' + descfn) return - + if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE - # that way it will silently go on when used with unsupporting IE + # that way it will silently go on when used with unsupporting IE try: srtfn = filename.rsplit('.', 1)[0] + u'.srt' self.report_writesubtitles(srtfn) @@ -448,7 +461,7 @@ class FileDownloader(object): except (ContentTooShortError, ), err: self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return - + if success: try: self.post_process(filename, info_dict) @@ -474,6 +487,7 @@ class FileDownloader(object): # Extract information from URL and process it videos = ie.extract(url) for video in videos or []: + video['extractor'] = ie.IE_NAME try: self.increment_downloads() self.process_info(video) @@ -633,7 +647,7 @@ class FileDownloader(object): data_len = long(data_len) + resume_len data_len_str = self.format_bytes(data_len) byte_counter = 0 + resume_len - block_size = 1024 + block_size = self.params.get('buffersize', 1024) start = time.time() while True: # Download and write @@ -659,7 +673,8 @@ class FileDownloader(object): except (IOError, OSError), err: self.trouble(u'\nERROR: unable to write data: %s' % str(err)) return False - block_size = self.best_block_size(after - before, len(data_block)) + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) # Progress message speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index baf859ea8..cea30dad8 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -13,6 +13,8 @@ import urllib import urllib2 import email.utils import xml.etree.ElementTree +import random +import math from urlparse import parse_qs try: @@ -95,7 +97,26 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r"""^ + ( + (?:https?://)? # http(s):// (optional) + (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/| + tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains + (?:.*?\#/)? # handle anchor (#/) redirect urls + (?!view_play_list|my_playlists|artist|playlist) # ignore playlist URLs + (?: # the various things that can precede the ID: + (?:(?:v|embed|e)/) # v/ or embed/ or e/ + |(?: # or the v= param in all its forms + (?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) + (?:\?|\#!?) # the params delimiter ? or # or #! + (?:.+&)? # any other preceding param (like /?s=tuff&v=xxxx) + v= + ) + )? # optional -> youtube.com/xxxx is OK + )? # all until now is optional -> you can pass the naked ID + ([0-9A-Za-z_-]+) # here is it! the YouTube video ID + (?(1).+)? # if we found the ID, everything can follow + $""" _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' @@ -134,6 +155,10 @@ class YoutubeIE(InfoExtractor): } IE_NAME = u'youtube' + def suitable(self, url): + """Receives a URL and returns True if suitable for this IE.""" + return re.match(self._VALID_URL, url, re.VERBOSE) is not None + def report_lang(self): """Report attempt to set language.""" self._downloader.to_screen(u'[youtube] Setting language') @@ -188,9 +213,9 @@ class YoutubeIE(InfoExtractor): return srt def _print_formats(self, formats): - print 'Available formats:' + print('Available formats:') for x in formats: - print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')) + print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) def _real_initialize(self): if self._downloader is None: @@ -213,7 +238,7 @@ class YoutubeIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) return # Set language @@ -222,7 +247,7 @@ class YoutubeIE(InfoExtractor): self.report_lang() urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) return # No authentication to be performed @@ -245,7 +270,7 @@ class YoutubeIE(InfoExtractor): self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) return # Confirm age @@ -258,7 +283,7 @@ class YoutubeIE(InfoExtractor): self.report_age_confirmation() age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return def _real_extract(self, url): @@ -268,7 +293,7 @@ class YoutubeIE(InfoExtractor): url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/') # Extract video id from URL - mobj = re.match(self._VALID_URL, url) + mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return @@ -280,7 +305,7 @@ class YoutubeIE(InfoExtractor): try: video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return # Attempt to extract SWF player URL @@ -302,7 +327,7 @@ class YoutubeIE(InfoExtractor): if 'token' in video_info: break except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) return if 'token' not in video_info: if 'reason' in video_info: @@ -365,8 +390,9 @@ class YoutubeIE(InfoExtractor): try: srt_list = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err)) - srt_lang_list = re.findall(r'lang_code="([\w\-]+)"', srt_list) + raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) + srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) + srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) if not srt_lang_list: raise Trouble(u'WARNING: video has no closed captions') if self._downloader.params.get('subtitleslang', False): @@ -374,18 +400,26 @@ class YoutubeIE(InfoExtractor): elif 'en' in srt_lang_list: srt_lang = 'en' else: - srt_lang = srt_lang_list[0] + srt_lang = srt_lang_list.keys()[0] if not srt_lang in srt_lang_list: raise Trouble(u'WARNING: no closed captions found in the specified language') - request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id)) + request = urllib2.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) try: srt_xml = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err)) + raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) + if not srt_xml: + raise Trouble(u'WARNING: unable to download video subtitles') video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8')) except Trouble as trouble: self._downloader.trouble(trouble[0]) + if 'length_seconds' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video duration') + video_duration = '' + else: + video_duration = urllib.unquote_plus(video_info['length_seconds'][0]) + # token video_token = urllib.unquote_plus(video_info['token'][0]) @@ -399,7 +433,7 @@ class YoutubeIE(InfoExtractor): url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') url_data = [parse_qs(uds) for uds in url_data_strs] url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) - url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) + url_map = dict((ud['itag'][0], ud['url'][0] + '&signature=' + ud['sig'][0]) for ud in url_data) format_limit = self._downloader.params.get('format_limit', None) available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats @@ -452,7 +486,8 @@ class YoutubeIE(InfoExtractor): 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'player_url': player_url, - 'subtitles': video_subtitles + 'subtitles': video_subtitles, + 'duration': video_duration }) return results @@ -491,7 +526,7 @@ class MetacafeIE(InfoExtractor): self.report_disclaimer() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err)) return # Confirm age @@ -504,7 +539,7 @@ class MetacafeIE(InfoExtractor): self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return def _real_extract(self, url): @@ -528,7 +563,7 @@ class MetacafeIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader and title from webpage @@ -568,7 +603,7 @@ class MetacafeIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') - mobj = re.search(r'(?ms)By:\s*(.+?)<', webpage) + mobj = re.search(r'submitter=(.*?);', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -589,7 +624,7 @@ class MetacafeIE(InfoExtractor): class DailymotionIE(InfoExtractor): """Information Extractor for Dailymotion""" - _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' + _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' IE_NAME = u'dailymotion' def __init__(self, downloader=None): @@ -610,9 +645,9 @@ class DailymotionIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - video_id = mobj.group(1) + video_id = mobj.group(1).split('_')[0].split('?')[0] - video_extension = 'flv' + video_extension = 'mp4' # Retrieve video webpage to extract further information request = urllib2.Request(url) @@ -621,25 +656,34 @@ class DailymotionIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader and title from webpage self.report_extraction(video_id) - mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage) + mobj = re.search(r'\s*var flashvars = (.*)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - sequence = urllib.unquote(mobj.group(1)) - mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence) + flashvars = urllib.unquote(mobj.group(1)) + + for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']: + if key in flashvars: + max_quality = key + self._downloader.to_screen(u'[dailymotion] Using %s' % key) + break + else: + self._downloader.trouble(u'ERROR: unable to extract video URL') + return + + mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract media URL') + self._downloader.trouble(u'ERROR: unable to extract video URL') return - mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '') - # if needed add http://www.dailymotion.com/ if relative URL + video_url = urllib.unquote(mobj.group(1)).replace('\\/', '/') - video_url = mediaURL + # TODO: support choosing qualities mobj = re.search(r'', webpage) if mobj is None: @@ -647,17 +691,28 @@ class DailymotionIE(InfoExtractor): return video_title = unescapeHTML(mobj.group('title').decode('utf-8')) - mobj = re.search(r'(?im)[^<]+?]+?>([^<]+?)', webpage) + video_uploader = u'NA' + mobj = re.search(r'(?im)[^<]+?]+?>([^<]+?)', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = mobj.group(1) + # lookin for official user + mobj_official = re.search(r'', webpage) + if mobj_official is None: + self._downloader.trouble(u'WARNING: unable to extract uploader nickname') + else: + video_uploader = mobj_official.group(1) + else: + video_uploader = mobj.group(1) + + video_upload_date = u'NA' + mobj = re.search(r'
([0-9]{2})-([0-9]{2})-([0-9]{4})
', webpage) + if mobj is not None: + video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) return [{ 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), - 'upload_date': u'NA', + 'upload_date': video_upload_date, 'title': video_title, 'ext': video_extension.decode('utf-8'), 'format': u'NA', @@ -699,7 +754,7 @@ class GoogleIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader, and title from webpage @@ -738,7 +793,7 @@ class GoogleIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'', webpage) if mobj is None: @@ -794,7 +849,7 @@ class PhotobucketIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader, and title from webpage @@ -864,7 +919,7 @@ class YahooIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) @@ -888,7 +943,7 @@ class YahooIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract uploader and title from webpage @@ -946,7 +1001,7 @@ class YahooIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract media URL from playlist XML @@ -975,7 +1030,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)' IE_NAME = u'vimeo' def __init__(self, downloader=None): @@ -1004,7 +1059,7 @@ class VimeoIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Now we begin extracting as much information as we can from what we @@ -1045,21 +1100,32 @@ class VimeoIE(InfoExtractor): timestamp = config['request']['timestamp'] # Vimeo specific: extract video codec and quality information + # First consider quality, then codecs, then take everything # TODO bind to format param codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')] - for codec in codecs: - if codec[0] in config["video"]["files"]: - video_codec = codec[0] - video_extension = codec[1] - if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd' - else: quality = 'sd' + files = { 'hd': [], 'sd': [], 'other': []} + for codec_name, codec_extension in codecs: + if codec_name in config["video"]["files"]: + if 'hd' in config["video"]["files"][codec_name]: + files['hd'].append((codec_name, codec_extension, 'hd')) + elif 'sd' in config["video"]["files"][codec_name]: + files['sd'].append((codec_name, codec_extension, 'sd')) + else: + files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0])) + + for quality in ('hd', 'sd', 'other'): + if len(files[quality]) > 0: + video_quality = files[quality][0][2] + video_codec = files[quality][0][0] + video_extension = files[quality][0][1] + self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality)) break else: self._downloader.trouble(u'ERROR: no known codec found') return video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ - %(video_id, sig, timestamp, quality, video_codec.upper()) + %(video_id, sig, timestamp, video_quality, video_codec.upper()) return [{ 'id': video_id, @@ -1074,6 +1140,143 @@ class VimeoIE(InfoExtractor): }] +class ArteTvIE(InfoExtractor): + """arte.tv information extractor.""" + + _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*' + _LIVE_URL = r'index-[0-9]+\.html$' + + IE_NAME = u'arte.tv' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id) + + def fetch_webpage(self, url): + self._downloader.increment_downloads() + request = urllib2.Request(url) + try: + self.report_download_webpage(url) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + except ValueError, err: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + return webpage + + def grep_webpage(self, url, regex, regexFlags, matchTuples): + page = self.fetch_webpage(url) + mobj = re.search(regex, page, regexFlags) + info = {} + + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + for (i, key, err) in matchTuples: + if mobj.group(i) is None: + self._downloader.trouble(err) + return + else: + info[key] = mobj.group(i) + + return info + + def extractLiveStream(self, url): + video_lang = url.split('/')[-4] + info = self.grep_webpage( + url, + r'src="(.*?/videothek_js.*?\.js)', + 0, + [ + (1, 'url', u'ERROR: Invalid URL: %s' % url) + ] + ) + http_host = url.split('/')[2] + next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url'))) + info = self.grep_webpage( + next_url, + r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + + '(http://.*?\.swf).*?' + + '(rtmp://.*?)\'', + re.DOTALL, + [ + (1, 'path', u'ERROR: could not extract video path: %s' % url), + (2, 'player', u'ERROR: could not extract video player: %s' % url), + (3, 'url', u'ERROR: could not extract video url: %s' % url) + ] + ) + video_url = u'%s/%s' % (info.get('url'), info.get('path')) + + def extractPlus7Stream(self, url): + video_lang = url.split('/')[-3] + info = self.grep_webpage( + url, + r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)', + 0, + [ + (1, 'url', u'ERROR: Invalid URL: %s' % url) + ] + ) + next_url = urllib.unquote(info.get('url')) + info = self.grep_webpage( + next_url, + r'