Merge branch 'crooksandliars' of https://github.com/fstirlitz/youtube-dl into fstirli...
authorSergey M․ <dstftw@gmail.com>
Sat, 11 Apr 2015 13:34:06 +0000 (19:34 +0600)
committerSergey M․ <dstftw@gmail.com>
Sat, 11 Apr 2015 13:34:06 +0000 (19:34 +0600)
80 files changed:
AUTHORS
Makefile
README.md
devscripts/check-porn.py
devscripts/generate_aes_testdata.py [new file with mode: 0644]
docs/supportedsites.md
test/test_aes.py [new file with mode: 0644]
test/test_all_urls.py
test/test_execution.py
test/test_utils.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/compat.py
youtube_dl/downloader/common.py
youtube_dl/downloader/http.py
youtube_dl/downloader/rtmp.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/addanime.py
youtube_dl/extractor/aftonbladet.py
youtube_dl/extractor/bliptv.py
youtube_dl/extractor/bloomberg.py
youtube_dl/extractor/cnn.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/dhm.py [new file with mode: 0644]
youtube_dl/extractor/douyutv.py
youtube_dl/extractor/dreisat.py
youtube_dl/extractor/drtv.py
youtube_dl/extractor/dump.py
youtube_dl/extractor/dumpert.py [new file with mode: 0644]
youtube_dl/extractor/eagleplatform.py
youtube_dl/extractor/ellentv.py
youtube_dl/extractor/eroprofile.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/gamersyde.py [new file with mode: 0644]
youtube_dl/extractor/generic.py
youtube_dl/extractor/hitbox.py
youtube_dl/extractor/libsyn.py [new file with mode: 0644]
youtube_dl/extractor/livestream.py
youtube_dl/extractor/miomio.py [new file with mode: 0644]
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mlb.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/phoenix.py
youtube_dl/extractor/playfm.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/pornovoisines.py [new file with mode: 0644]
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/radiojavan.py [new file with mode: 0644]
youtube_dl/extractor/rai.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/safari.py [new file with mode: 0644]
youtube_dl/extractor/slideshare.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spankbang.py [new file with mode: 0644]
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/twentytwotracks.py [new file with mode: 0644]
youtube_dl/extractor/twitch.py
youtube_dl/extractor/udn.py [new file with mode: 0644]
youtube_dl/extractor/ultimedia.py
youtube_dl/extractor/varzesh3.py [new file with mode: 0644]
youtube_dl/extractor/vessel.py [new file with mode: 0644]
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vine.py
youtube_dl/extractor/xuite.py
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/postprocessor/common.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/update.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
index 512469f4c9c9e50ce945372ca1dd5e704efc177b..9c65dc1d436f6415ef928baad5dbabf133ad0616 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -117,3 +117,8 @@ Alexander Mamay
 Devin J. Pohly
 Eduardo Ferro Aldama
 Jeff Buchbinder
+Amish Bhadeshia
+Joram Schrijver
+Will W.
+Mohammad Teimori Pabandi
+Roman Le Négrate
index c6c76274f995a85185290d35868b974c13240aa2..fdb1abb60cacfe49295a7438e3d0f4f51c248359 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
 
 clean:
        rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
-       find -name "*.pyc" -delete
+       find -name "*.pyc" -delete
 
 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
index 4f9fc8174db8c4ced337bc8f33798f1cf8922b3d..caa1478d9951f3a8799eb05f979dade6c96fab52 100644 (file)
--- a/README.md
+++ b/README.md
@@ -45,21 +45,21 @@ which means you can modify it, redistribute it or use it however you like.
     youtube-dl [OPTIONS] URL [URL...]
 
 # OPTIONS
-    -h, --help                       print this help text and exit
-    --version                        print program version and exit
-    -U, --update                     update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
-    -i, --ignore-errors              continue on download errors, for example to skip unavailable videos in a playlist
+    -h, --help                       Print this help text and exit
+    --version                        Print program version and exit
+    -U, --update                     Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
+    -i, --ignore-errors              Continue on download errors, for example to skip unavailable videos in a playlist
     --abort-on-error                 Abort downloading of further videos (in the playlist or the command line) if an error occurs
-    --dump-user-agent                display the current browser identification
+    --dump-user-agent                Display the current browser identification
     --list-extractors                List all supported extractors and the URLs they would handle
     --extractor-descriptions         Output descriptions of all supported extractors
-    --default-search PREFIX          Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple".
+    --default-search PREFIX          Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
                                      Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
                                      default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
     --ignore-config                  Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
                                      in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
     --flat-playlist                  Do not extract the videos of a playlist, only list them.
-    --no-color                       Do not emit color codes in output.
+    --no-color                       Do not emit color codes in output
 
 ## Network Options:
     --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
@@ -71,70 +71,70 @@ which means you can modify it, redistribute it or use it however you like.
                                      not present) is used for the actual downloading. (experimental)
 
 ## Video Selection:
-    --playlist-start NUMBER          playlist video to start at (default is 1)
-    --playlist-end NUMBER            playlist video to end at (default is last)
-    --playlist-items ITEM_SPEC       playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+    --playlist-start NUMBER          Playlist video to start at (default is 1)
+    --playlist-end NUMBER            Playlist video to end at (default is last)
+    --playlist-items ITEM_SPEC       Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
                                      if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
                                      download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
-    --match-title REGEX              download only matching titles (regex or caseless sub-string)
-    --reject-title REGEX             skip download for matching titles (regex or caseless sub-string)
+    --match-title REGEX              Download only matching titles (regex or caseless sub-string)
+    --reject-title REGEX             Skip download for matching titles (regex or caseless sub-string)
     --max-downloads NUMBER           Abort after downloading NUMBER files
     --min-filesize SIZE              Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
     --max-filesize SIZE              Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
-    --date DATE                      download only videos uploaded in this date
-    --datebefore DATE                download only videos uploaded on or before this date (i.e. inclusive)
-    --dateafter DATE                 download only videos uploaded on or after this date (i.e. inclusive)
+    --date DATE                      Download only videos uploaded in this date
+    --datebefore DATE                Download only videos uploaded on or before this date (i.e. inclusive)
+    --dateafter DATE                 Download only videos uploaded on or after this date (i.e. inclusive)
     --min-views COUNT                Do not download any videos with less than COUNT views
     --max-views COUNT                Do not download any videos with more than COUNT views
-    --match-filter FILTER            (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present,
+    --match-filter FILTER            Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present,
                                      !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
                                      a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
                                      operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
                                      functionality is not available at the given service), but who also have a description, use  --match-filter "like_count > 100 &
                                      dislike_count <? 50 & description" .
-    --no-playlist                    If the URL refers to a video and a playlist, download only the video.
-    --yes-playlist                   If the URL refers to a video and a playlist, download the playlist.
-    --age-limit YEARS                download only videos suitable for the given age
+    --no-playlist                    Download only the video, if the URL refers to a video and a playlist.
+    --yes-playlist                   Download the playlist, if the URL refers to a video and a playlist.
+    --age-limit YEARS                Download only videos suitable for the given age
     --download-archive FILE          Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
     --include-ads                    Download advertisements as well (experimental)
 
 ## Download Options:
-    -r, --rate-limit LIMIT           maximum download rate in bytes per second (e.g. 50K or 4.2M)
-    -R, --retries RETRIES            number of retries (default is 10), or "infinite".
-    --buffer-size SIZE               size of download buffer (e.g. 1024 or 16K) (default is 1024)
-    --no-resize-buffer               do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
+    -r, --rate-limit LIMIT           Maximum download rate in bytes per second (e.g. 50K or 4.2M)
+    -R, --retries RETRIES            Number of retries (default is 10), or "infinite".
+    --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K) (default is 1024)
+    --no-resize-buffer               Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
     --playlist-reverse               Download playlist videos in reverse order
-    --xattr-set-filesize             (experimental) set file xattribute ytdl.filesize with expected filesize
-    --hls-prefer-native              (experimental) Use the native HLS downloader instead of ffmpeg.
+    --xattr-set-filesize             Set file xattribute ytdl.filesize with expected filesize (experimental)
+    --hls-prefer-native              Use the native HLS downloader instead of ffmpeg (experimental)
     --external-downloader COMMAND    Use the specified external downloader. Currently supports aria2c,curl,wget
-    --external-downloader-args ARGS  Give these arguments to the external downloader.
+    --external-downloader-args ARGS  Give these arguments to the external downloader
 
 ## Filesystem Options:
-    -a, --batch-file FILE            file containing URLs to download ('-' for stdin)
-    --id                             use only video ID in file name
-    -o, --output TEMPLATE            output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
+    -a, --batch-file FILE            File containing URLs to download ('-' for stdin)
+    --id                             Use only video ID in file name
+    -o, --output TEMPLATE            Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
                                      nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
-                                     the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"),
+                                     the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"),
                                      %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
                                      %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
                                      %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
                                      %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
                                      Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
-    --autonumber-size NUMBER         Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
+    --autonumber-size NUMBER         Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
     --restrict-filenames             Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
-    -A, --auto-number                [deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000
-    -t, --title                      [deprecated] use title in file name (default)
-    -l, --literal                    [deprecated] alias of --title
-    -w, --no-overwrites              do not overwrite files
-    -c, --continue                   force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
-    --no-continue                    do not resume partially downloaded files (restart from beginning)
-    --no-part                        do not use .part files - write directly into output file
-    --no-mtime                       do not use the Last-modified header to set the file modification time
-    --write-description              write video description to a .description file
-    --write-info-json                write video metadata to a .info.json file
-    --write-annotations              write video annotations to a .annotation file
-    --load-info FILE                 json file containing the video information (created with the "--write-json" option)
-    --cookies FILE                   file to read cookies from and dump cookie jar in
+    -A, --auto-number                [deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000
+    -t, --title                      [deprecated] Use title in file name (default)
+    -l, --literal                    [deprecated] Alias of --title
+    -w, --no-overwrites              Do not overwrite files
+    -c, --continue                   Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
+    --no-continue                    Do not resume partially downloaded files (restart from beginning)
+    --no-part                        Do not use .part files - write directly into output file
+    --no-mtime                       Do not use the Last-modified header to set the file modification time
+    --write-description              Write video description to a .description file
+    --write-info-json                Write video metadata to a .info.json file
+    --write-annotations              Write video annotations to a .annotation file
+    --load-info FILE                 JSON file containing the video information (created with the "--write-info-json" option)
+    --cookies FILE                   File to read cookies from and dump cookie jar in
     --cache-dir DIR                  Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
                                      or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
                                      change.
@@ -142,49 +142,49 @@ which means you can modify it, redistribute it or use it however you like.
     --rm-cache-dir                   Delete all filesystem cache files
 
 ## Thumbnail images:
-    --write-thumbnail                write thumbnail image to disk
-    --write-all-thumbnails           write all thumbnail image formats to disk
+    --write-thumbnail                Write thumbnail image to disk
+    --write-all-thumbnails           Write all thumbnail image formats to disk
     --list-thumbnails                Simulate and list all available thumbnail formats
 
 ## Verbosity / Simulation Options:
-    -q, --quiet                      activates quiet mode
+    -q, --quiet                      Activate quiet mode
     --no-warnings                    Ignore warnings
-    -s, --simulate                   do not download the video and do not write anything to disk
-    --skip-download                  do not download the video
-    -g, --get-url                    simulate, quiet but print URL
-    -e, --get-title                  simulate, quiet but print title
-    --get-id                         simulate, quiet but print id
-    --get-thumbnail                  simulate, quiet but print thumbnail URL
-    --get-description                simulate, quiet but print video description
-    --get-duration                   simulate, quiet but print video length
-    --get-filename                   simulate, quiet but print output filename
-    --get-format                     simulate, quiet but print output format
-    -j, --dump-json                  simulate, quiet but print JSON information. See --output for a description of available keys.
-    -J, --dump-single-json           simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
+    -s, --simulate                   Do not download the video and do not write anything to disk
+    --skip-download                  Do not download the video
+    -g, --get-url                    Simulate, quiet but print URL
+    -e, --get-title                  Simulate, quiet but print title
+    --get-id                         Simulate, quiet but print id
+    --get-thumbnail                  Simulate, quiet but print thumbnail URL
+    --get-description                Simulate, quiet but print video description
+    --get-duration                   Simulate, quiet but print video length
+    --get-filename                   Simulate, quiet but print output filename
+    --get-format                     Simulate, quiet but print output format
+    -j, --dump-json                  Simulate, quiet but print JSON information. See --output for a description of available keys.
+    -J, --dump-single-json           Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
                                      information in a single line.
     --print-json                     Be quiet and print the video information as JSON (video is still being downloaded).
-    --newline                        output progress bar as new lines
-    --no-progress                    do not print progress bar
-    --console-title                  display progress in console titlebar
-    -v, --verbose                    print various debugging information
-    --dump-pages                     print downloaded pages to debug problems (very verbose)
+    --newline                        Output progress bar as new lines
+    --no-progress                    Do not print progress bar
+    --console-title                  Display progress in console titlebar
+    -v, --verbose                    Print various debugging information
+    --dump-pages                     Print downloaded pages to debug problems (very verbose)
     --write-pages                    Write downloaded intermediary pages to files in the current directory to debug problems
     --print-traffic                  Display sent and read HTTP traffic
-    -C, --call-home                  Contact the youtube-dl server for debugging.
-    --no-call-home                   Do NOT contact the youtube-dl server for debugging.
+    -C, --call-home                  Contact the youtube-dl server for debugging
+    --no-call-home                   Do NOT contact the youtube-dl server for debugging
 
 ## Workarounds:
     --encoding ENCODING              Force the specified encoding (experimental)
-    --no-check-certificate           Suppress HTTPS certificate validation.
+    --no-check-certificate           Suppress HTTPS certificate validation
     --prefer-insecure                Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
-    --user-agent UA                  specify a custom user agent
-    --referer URL                    specify a custom referer, use if the video access is restricted to one domain
-    --add-header FIELD:VALUE         specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
+    --user-agent UA                  Specify a custom user agent
+    --referer URL                    Specify a custom referer, use if the video access is restricted to one domain
+    --add-header FIELD:VALUE         Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
     --bidi-workaround                Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
     --sleep-interval SECONDS         Number of seconds to sleep before each download.
 
 ## Video Format Options:
-    -f, --format FORMAT              video format code, specify the order of preference using slashes, as in -f 22/17/18 .  Instead of format codes, you can select by
+    -f, --format FORMAT              Video format code, specify the order of preference using slashes, as in -f 22/17/18 .  Instead of format codes, you can select by
                                      extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio",
                                      "worst".  You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
                                      This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec,
@@ -194,44 +194,44 @@ which means you can modify it, redistribute it or use it however you like.
                                      Use commas to download multiple audio formats, such as -f  136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio
                                      of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f
                                      bestvideo+bestaudio.
-    --all-formats                    download all available video formats
-    --prefer-free-formats            prefer free video formats unless a specific one is requested
-    --max-quality FORMAT             highest quality format to download
-    -F, --list-formats               list all available formats
+    --all-formats                    Download all available video formats
+    --prefer-free-formats            Prefer free video formats unless a specific one is requested
+    --max-quality FORMAT             Highest quality format to download
+    -F, --list-formats               List all available formats
     --youtube-skip-dash-manifest     Do not download the DASH manifest on YouTube videos
     --merge-output-format FORMAT     If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
                                      merge is required
 
 ## Subtitle Options:
-    --write-sub                      write subtitle file
-    --write-auto-sub                 write automatic subtitle file (youtube only)
-    --all-subs                       downloads all the available subtitles of the video
-    --list-subs                      lists all available subtitles for the video
-    --sub-format FORMAT              subtitle format, accepts formats preference, for example: "ass/srt/best"
-    --sub-lang LANGS                 languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
+    --write-sub                      Write subtitle file
+    --write-auto-sub                 Write automatic subtitle file (YouTube only)
+    --all-subs                       Download all the available subtitles of the video
+    --list-subs                      List all available subtitles for the video
+    --sub-format FORMAT              Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"
+    --sub-lang LANGS                 Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
 
 ## Authentication Options:
-    -u, --username USERNAME          login with this account ID
-    -p, --password PASSWORD          account password. If this option is left out, youtube-dl will ask interactively.
-    -2, --twofactor TWOFACTOR        two-factor auth code
-    -n, --netrc                      use .netrc authentication data
-    --video-password PASSWORD        video password (vimeo, smotri)
+    -u, --username USERNAME          Login with this account ID
+    -p, --password PASSWORD          Account password. If this option is left out, youtube-dl will ask interactively.
+    -2, --twofactor TWOFACTOR        Two-factor auth code
+    -n, --netrc                      Use .netrc authentication data
+    --video-password PASSWORD        Video password (vimeo, smotri)
 
 ## Post-processing Options:
-    -x, --extract-audio              convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
-    --audio-format FORMAT            "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
-    --audio-quality QUALITY          ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K
-                                     (default 5)
+    -x, --extract-audio              Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
+    --audio-format FORMAT            Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
+    --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
+                                     5)
     --recode-video FORMAT            Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
-    -k, --keep-video                 keeps the video file on disk after the post-processing; the video is erased by default
-    --no-post-overwrites             do not overwrite post-processed files; the post-processed files are overwritten by default
-    --embed-subs                     embed subtitles in the video (only for mp4 videos)
-    --embed-thumbnail                embed thumbnail in the audio as cover art
-    --add-metadata                   write metadata to the video file
-    --metadata-from-title FORMAT     parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
+    -k, --keep-video                 Keep the video file on disk after the post-processing; the video is erased by default
+    --no-post-overwrites             Do not overwrite post-processed files; the post-processed files are overwritten by default
+    --embed-subs                     Embed subtitles in the video (only for mp4 videos)
+    --embed-thumbnail                Embed thumbnail in the audio as cover art
+    --add-metadata                   Write metadata to the video file
+    --metadata-from-title FORMAT     Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
                                      parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
                                      %(title)s" matches a title like "Coldplay - Paradise"
-    --xattrs                         write metadata to the video file's xattrs (using dublin core and xdg standards)
+    --xattrs                         Write metadata to the video file's xattrs (using dublin core and xdg standards)
     --fixup POLICY                   Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
                                      fix file if we can, warn otherwise)
     --prefer-avconv                  Prefer avconv over ffmpeg for running the postprocessors (default)
index 6a5bd9eda333246c47064bf84cfc03da09de4caf..7a219ebe97c555be79a55cf4dc30bf2cb823ca28 100644 (file)
@@ -28,7 +28,7 @@ for test in get_testcases():
     if METHOD == 'EURISTIC':
         try:
             webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
-        except:
+        except Exception:
             print('\nFail: {0}'.format(test['name']))
             continue
 
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
new file mode 100644 (file)
index 0000000..2e389fc
--- /dev/null
@@ -0,0 +1,42 @@
+from __future__ import unicode_literals
+
+import codecs
+import subprocess
+
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import intlist_to_bytes
+from youtube_dl.aes import aes_encrypt, key_expansion
+
+secret_msg = b'Secret message goes here'
+
+
+def hex_str(int_list):
+    return codecs.encode(intlist_to_bytes(int_list), 'hex')
+
+
+def openssl_encode(algo, key, iv):
+    cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
+    prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    out, _ = prog.communicate(secret_msg)
+    return out
+
+iv = key = [0x20, 0x15] + 14 * [0]
+
+r = openssl_encode('aes-128-cbc', key, iv)
+print('aes_cbc_decrypt')
+print(repr(r))
+
+password = key
+new_key = aes_encrypt(password, key_expansion(password))
+r = openssl_encode('aes-128-ctr', new_key, iv)
+print('aes_decrypt_text 16')
+print(repr(r))
+
+password = key + 16 * [0]
+new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
+r = openssl_encode('aes-256-ctr', new_key, iv)
+print('aes_decrypt_text 32')
+print(repr(r))
index 72b365305a1b4ec40eb7d0043750d39a16ad66d7..c85a3991800e5c86d19bee8ed8fb8d1a9c6d96b9 100644 (file)
@@ -2,6 +2,8 @@
  - **1tv**: Первый канал
  - **1up.com**
  - **220.ro**
+ - **22tracks:genre**
+ - **22tracks:track**
  - **24video**
  - **3sat**
  - **4tube**
  - **DctpTv**
  - **DeezerPlaylist**
  - **defense.gouv.fr**
+ - **DHM**: Filmarchiv - Deutsches Historisches Museum
  - **Discovery**
  - **divxstage**: DivxStage
  - **Dotsub**
  - **DrTuber**
  - **DRTV**
  - **Dump**
+ - **Dumpert**
  - **dvtv**: http://video.aktualne.cz/
  - **EaglePlatform**
  - **EbaumsWorld**
  - **Gamekings**
  - **GameOne**
  - **gameone:playlist**
+ - **Gamersyde**
  - **GameSpot**
  - **GameStar**
  - **Gametrailers**
  - **Letv**
  - **LetvPlaylist**
  - **LetvTv**
+ - **Libsyn**
  - **lifenews**: LIFE | NEWS
  - **LiveLeak**
  - **livestream**
  - **Mgoon**
  - **Minhateca**
  - **MinistryGrid**
+ - **miomio.tv**
  - **mitele.es**
  - **mixcloud**
  - **MLB**
  - **NBA**
  - **NBC**
  - **NBCNews**
+ - **NBCSports**
+ - **NBCSportsVPlayer**
  - **ndr**: NDR.de - Mediathek
  - **NDTV**
  - **NerdCubedFeed**
  - **npo.nl:radio**
  - **npo.nl:radio:fragment**
  - **NRK**
+ - **NRKPlaylist**
  - **NRKTV**
  - **ntv.ru**
  - **Nuvid**
  - **PornHub**
  - **PornHubPlaylist**
  - **Pornotube**
+ - **PornoVoisines**
  - **PornoXO**
  - **PrimeShareTV**
  - **PromptFile**
  - **radio.de**
  - **radiobremen**
  - **radiofrance**
+ - **RadioJavan**
  - **Rai**
  - **RBMARadio**
  - **RedTube**
  - **rutube:movie**: Rutube movies
  - **rutube:person**: Rutube person videos
  - **RUTV**: RUTV.RU
+ - **safari**: safaribooksonline.com online video
+ - **safari:course**: safaribooksonline.com online courses
  - **Sandia**: Sandia National Laboratories
  - **Sapo**: SAPO Vídeos
  - **savefrom.net**
  - **southpark.cc.com**
  - **southpark.de**
  - **Space**
+ - **SpankBang**
  - **Spankwire**
  - **Spiegel**
  - **Spiegel:Article**: Articles on spiegel.de
  - **Ubu**
  - **udemy**
  - **udemy:course**
+ - **UDNEmbed**
  - **Ultimedia**
  - **Unistra**
  - **Urort**: NRK P3 Urørt
  - **ustream**
  - **ustream:channel**
+ - **Varzesh3**
  - **Vbox7**
  - **VeeHD**
  - **Veoh**
+ - **Vessel**
  - **Vesti**: Вести.Ru
  - **Vevo**
  - **VGTV**
  - **youtube:show**: YouTube.com (multi-season) shows
  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
  - **Zapiks**
  - **ZDF**
  - **ZDFChannel**
diff --git a/test/test_aes.py b/test/test_aes.py
new file mode 100644 (file)
index 0000000..4dc7de7
--- /dev/null
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
+from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
+import base64
+
+# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
+
+
+class TestAES(unittest.TestCase):
+    def setUp(self):
+        self.key = self.iv = [0x20, 0x15] + 14 * [0]
+        self.secret_msg = b'Secret message goes here'
+
+    def test_encrypt(self):
+        msg = b'message'
+        key = list(range(16))
+        encrypted = aes_encrypt(bytes_to_intlist(msg), key)
+        decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
+        self.assertEqual(decrypted, msg)
+
+    def test_cbc_decrypt(self):
+        data = bytes_to_intlist(
+            b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
+        )
+        decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
+        self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+    def test_decrypt_text(self):
+        password = intlist_to_bytes(self.key).decode('utf-8')
+        encrypted = base64.b64encode(
+            intlist_to_bytes(self.iv[:8]) +
+            b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
+        )
+        decrypted = (aes_decrypt_text(encrypted, password, 16))
+        self.assertEqual(decrypted, self.secret_msg)
+
+        password = intlist_to_bytes(self.key).decode('utf-8')
+        encrypted = base64.b64encode(
+            intlist_to_bytes(self.iv[:8]) +
+            b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
+        )
+        decrypted = (aes_decrypt_text(encrypted, password, 32))
+        self.assertEqual(decrypted, self.secret_msg)
+
+if __name__ == '__main__':
+    unittest.main()
index 6ae168b7f472938a3a69344fac123e047478ee73..a9db42b300864180c10dca730f772f7f5a26aad8 100644 (file)
@@ -59,7 +59,7 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
 
     def test_youtube_feeds(self):
-        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
+        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
         self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
         self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
         self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
index 60df187de4921dfa7df808302f55f1ccd66bcb13..620db080e9bd836c7239a93e86e0944b95f793e0 100644 (file)
@@ -1,4 +1,6 @@
 #!/usr/bin/env python
+# coding: utf-8
+
 from __future__ import unicode_literals
 
 import unittest
@@ -6,6 +8,9 @@ import unittest
 import sys
 import os
 import subprocess
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import encodeArgument
 
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
@@ -27,5 +32,12 @@ class TestExecution(unittest.TestCase):
     def test_main_exec(self):
         subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
+    def test_cmdline_umlauts(self):
+        p = subprocess.Popen(
+            [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
+            cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
+        _, stderr = p.communicate()
+        self.assertFalse(stderr)
+
 if __name__ == '__main__':
     unittest.main()
index 3431ad24e72d4076990b2e400f6a7322875819ea..2e3a6480cb15e3c762fa68533d6dad988740a6c4 100644 (file)
@@ -24,6 +24,7 @@ from youtube_dl.utils import (
     encodeFilename,
     escape_rfc3986,
     escape_url,
+    ExtractorError,
     find_xpath_attr,
     fix_xml_ampersands,
     InAdvancePagedList,
@@ -54,6 +55,7 @@ from youtube_dl.utils import (
     urlencode_postdata,
     version_tuple,
     xpath_with_ns,
+    xpath_text,
     render_table,
     match_str,
 )
@@ -198,6 +200,8 @@ class TestUtil(unittest.TestCase):
 
     def test_unescape_html(self):
         self.assertEqual(unescapeHTML('%20;'), '%20;')
+        self.assertEqual(unescapeHTML('&#x2F;'), '/')
+        self.assertEqual(unescapeHTML('&#47;'), '/')
         self.assertEqual(
             unescapeHTML('&eacute;'), 'é')
 
@@ -223,6 +227,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(
             unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
             '20150202')
+        self.assertEqual(unified_strdate('25-09-2014'), '20140925')
 
     def test_find_xpath_attr(self):
         testxml = '''<root>
@@ -250,6 +255,17 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(find('media:song/media:author').text, 'The Author')
         self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
 
+    def test_xpath_text(self):
+        testxml = '''<root>
+            <div>
+                <p>Foo</p>
+            </div>
+        </root>'''
+        doc = xml.etree.ElementTree.fromstring(testxml)
+        self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
+        self.assertTrue(xpath_text(doc, 'div/bar') is None)
+        self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
+
     def test_smuggle_url(self):
         data = {"ö": "ö", "abc": [3]}
         url = 'https://foo.bar/baz?x=y#a'
@@ -455,6 +471,12 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(d['x'], 1)
         self.assertEqual(d['y'], 'a')
 
+        on = js_to_json('["abc", "def",]')
+        self.assertEqual(json.loads(on), ['abc', 'def'])
+
+        on = js_to_json('{"abc": "def",}')
+        self.assertEqual(json.loads(on), {'abc': 'def'})
+
     def test_clean_html(self):
         self.assertEqual(clean_html('a:\nb'), 'a: b')
         self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
index 5a83bc95662b92e786ed1088155321645eadea98..640b8c99d75d6215b232c0d2d678dd903f41e8bb 100755 (executable)
@@ -328,9 +328,6 @@ class YoutubeDL(object):
                 'Parameter outtmpl is bytes, but should be a unicode string. '
                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 
-        if '%(stitle)s' in self.params.get('outtmpl', ''):
-            self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
-
         self._setup_opener()
 
         if auto_init:
@@ -1218,9 +1215,6 @@ class YoutubeDL(object):
         if len(info_dict['title']) > 200:
             info_dict['title'] = info_dict['title'][:197] + '...'
 
-        # Keep for backwards compatibility
-        info_dict['stitle'] = info_dict['title']
-
         if 'format' not in info_dict:
             info_dict['format'] = info_dict['ext']
 
@@ -1707,10 +1701,10 @@ class YoutubeDL(object):
             out = out.decode().strip()
             if re.match('[0-9a-f]+', out):
                 self._write_string('[debug] Git HEAD: ' + out + '\n')
-        except:
+        except Exception:
             try:
                 sys.exc_clear()
-            except:
+            except Exception:
                 pass
         self._write_string('[debug] Python version %s - %s\n' % (
             platform.python_version(), platform_name()))
index 852b2fc3db24b85138b44e7a5cf8f4338d787ce7..1c8b411b7f037d4bce2face086f0743f4f23003a 100644 (file)
@@ -189,10 +189,6 @@ def _real_main(argv=None):
     if opts.allsubtitles and not opts.writeautomaticsub:
         opts.writesubtitles = True
 
-    if sys.version_info < (3,):
-        # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
-        if opts.outtmpl is not None:
-            opts.outtmpl = opts.outtmpl.decode(preferredencoding())
     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
                (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
                (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
index b2bf149ef63ffb1c31bfb02f976f5cce2dbadad3..973bcd32074107f70c1b781e95b97ef34501b88f 100644 (file)
@@ -389,7 +389,7 @@ else:
                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
             out, err = sp.communicate()
             lines, columns = map(int, out.split())
-        except:
+        except Exception:
             pass
         return _terminal_size(columns, lines)
 
index 8ed5c19a6b8079c9807faa4fa7a505a398974975..a0fc5ead06a4e8adba7886d2a4087d28ca9cdbcb 100644 (file)
@@ -204,7 +204,7 @@ class FileDownloader(object):
             return
         try:
             os.utime(filename, (time.time(), filetime))
-        except:
+        except Exception:
             pass
         return filetime
 
@@ -318,7 +318,7 @@ class FileDownloader(object):
         )
 
         continuedl_and_exists = (
-            self.params.get('continuedl', False) and
+            self.params.get('continuedl', True) and
             os.path.isfile(encodeFilename(filename)) and
             not self.params.get('nopart', False)
         )
index 4047d7167478b34c9cacc79dcbbfeee1bb31d317..d136bebd1fe45761312bd90c31a95ddaf1754271 100644 (file)
@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
 
         open_mode = 'wb'
         if resume_len != 0:
-            if self.params.get('continuedl', False):
+            if self.params.get('continuedl', True):
                 self.report_resuming_byte(resume_len)
                 request.add_header('Range', 'bytes=%d-' % resume_len)
                 open_mode = 'ab'
index 89e98ae61e128c80eab5b0e04109b1baa2ecff7e..ddf5724ae6f05259194c67473bfd212ddeb896ff 100644 (file)
@@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):
         protocol = info_dict.get('rtmp_protocol', None)
         real_time = info_dict.get('rtmp_real_time', False)
         no_resume = info_dict.get('no_resume', False)
-        continue_dl = info_dict.get('continuedl', False)
+        continue_dl = info_dict.get('continuedl', True)
 
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
index dc272af8268cf8c5f86d050e3ea4fba5ad979e28..894aa5b43f60ab481787f17038c1723dda4322e5 100644 (file)
@@ -107,6 +107,7 @@ from .dbtv import DBTVIE
 from .dctp import DctpTvIE
 from .deezer import DeezerPlaylistIE
 from .dfb import DFBIE
+from .dhm import DHMIE
 from .dotsub import DotsubIE
 from .douyutv import DouyuTVIE
 from .dreisat import DreiSatIE
@@ -115,6 +116,7 @@ from .drtuber import DrTuberIE
 from .drtv import DRTVIE
 from .dvtv import DVTVIE
 from .dump import DumpIE
+from .dumpert import DumpertIE
 from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
 from .divxstage import DivxStageIE
@@ -176,6 +178,7 @@ from .gameone import (
     GameOneIE,
     GameOnePlaylistIE,
 )
+from .gamersyde import GamersydeIE
 from .gamespot import GameSpotIE
 from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
@@ -251,6 +254,7 @@ from .letv import (
     LetvTvIE,
     LetvPlaylistIE
 )
+from .libsyn import LibsynIE
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
 from .livestream import (
@@ -274,6 +278,7 @@ from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
 from .minhateca import MinhatecaIE
 from .ministrygrid import MinistryGridIE
+from .miomio import MioMioIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mitele import MiTeleIE
 from .mixcloud import MixcloudIE
@@ -309,6 +314,8 @@ from .nba import NBAIE
 from .nbc import (
     NBCIE,
     NBCNewsIE,
+    NBCSportsIE,
+    NBCSportsVPlayerIE,
 )
 from .ndr import NDRIE
 from .ndtv import NDTVIE
@@ -347,6 +354,7 @@ from .npo import (
 )
 from .nrk import (
     NRKIE,
+    NRKPlaylistIE,
     NRKTVIE,
 )
 from .ntvde import NTVDeIE
@@ -381,6 +389,7 @@ from .pornhub import (
     PornHubPlaylistIE,
 )
 from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
 from .pornoxo import PornoXOIE
 from .primesharetv import PrimeShareTVIE
 from .promptfile import PromptFileIE
@@ -390,6 +399,7 @@ from .pyvideo import PyvideoIE
 from .quickvid import QuickVidIE
 from .r7 import R7IE
 from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
 from .radiofrance import RadioFranceIE
 from .rai import RaiIE
@@ -419,6 +429,10 @@ from .rutube import (
 )
 from .rutv import RUTVIE
 from .sandia import SandiaIE
+from .safari import (
+    SafariIE,
+    SafariCourseIE,
+)
 from .sapo import SapoIE
 from .savefrom import SaveFromIE
 from .sbs import SBSIE
@@ -458,6 +472,7 @@ from .southpark import (
     SouthparkDeIE,
 )
 from .space import SpaceIE
+from .spankbang import SpankBangIE
 from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE, SpiegelArticleIE
 from .spiegeltv import SpiegeltvIE
@@ -525,6 +540,10 @@ from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
+from .twentytwotracks import (
+    TwentyTwoTracksIE,
+    TwentyTwoTracksGenreIE
+)
 from .twitch import (
     TwitchVideoIE,
     TwitchChapterIE,
@@ -539,13 +558,16 @@ from .udemy import (
     UdemyIE,
     UdemyCourseIE
 )
+from .udn import UDNEmbedIE
 from .ultimedia import UltimediaIE
 from .unistra import UnistraIE
 from .urort import UrortIE
 from .ustream import UstreamIE, UstreamChannelIE
+from .varzesh3 import Varzesh3IE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
+from .vessel import VesselIE
 from .vesti import VestiIE
 from .vevo import VevoIE
 from .vgtv import VGTVIE
index 203936e54a3797ae37535022ad02757a925f24d7..e3e6d21137994593d593fbc51313bf38032ce7f8 100644 (file)
@@ -11,12 +11,13 @@ from ..compat import (
 )
 from ..utils import (
     ExtractorError,
+    qualities,
 )
 
 
 class AddAnimeIE(InfoExtractor):
-    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
-    _TEST = {
+    _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
+    _TESTS = [{
         'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
         'md5': '72954ea10bc979ab5e2eb288b21425a0',
         'info_dict': {
@@ -25,7 +26,10 @@ class AddAnimeIE(InfoExtractor):
             'description': 'One Piece 606',
             'title': 'One Piece 606',
         }
-    }
+    }, {
+        'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -63,8 +67,10 @@ class AddAnimeIE(InfoExtractor):
                 note='Confirming after redirect')
             webpage = self._download_webpage(url, video_id)
 
+        FORMATS = ('normal', 'hq')
+        quality = qualities(FORMATS)
         formats = []
-        for format_id in ('normal', 'hq'):
+        for format_id in FORMATS:
             rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
             video_url = self._search_regex(rex, webpage, 'video file URLx',
                                            fatal=False)
@@ -73,6 +79,7 @@ class AddAnimeIE(InfoExtractor):
             formats.append({
                 'format_id': format_id,
                 'url': video_url,
+                'quality': quality(format_id),
             })
         self._sort_formats(formats)
         video_title = self._og_search_title(webpage)
index 8442019eac3eaa0a373140d494ffa2ca420f4606..a117502bc0ad7bfec11592ec57da575898cacc3d 100644 (file)
@@ -2,10 +2,11 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import int_or_none
 
 
 class AftonbladetIE(InfoExtractor):
-    _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
+    _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
     _TEST = {
         'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
         'info_dict': {
@@ -43,9 +44,9 @@ class AftonbladetIE(InfoExtractor):
             formats.append({
                 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
                 'ext': 'mp4',
-                'width': fmt['width'],
-                'height': fmt['height'],
-                'tbr': fmt['bitrate'],
+                'width': int_or_none(fmt.get('width')),
+                'height': int_or_none(fmt.get('height')),
+                'tbr': int_or_none(fmt.get('bitrate')),
                 'protocol': 'http',
             })
         self._sort_formats(formats)
@@ -54,9 +55,9 @@ class AftonbladetIE(InfoExtractor):
             'id': video_id,
             'title': internal_meta_json['title'],
             'formats': formats,
-            'thumbnail': internal_meta_json['imageUrl'],
-            'description': internal_meta_json['shortPreamble'],
-            'timestamp': internal_meta_json['timePublished'],
-            'duration': internal_meta_json['duration'],
-            'view_count': internal_meta_json['views'],
+            'thumbnail': internal_meta_json.get('imageUrl'),
+            'description': internal_meta_json.get('shortPreamble'),
+            'timestamp': int_or_none(internal_meta_json.get('timePublished')),
+            'duration': int_or_none(internal_meta_json.get('duration')),
+            'view_count': int_or_none(internal_meta_json.get('views')),
         }
index 8c7ba4b910bcc78e5e3fa02d7168a9e4f443bf65..b632ce967d515e8c819d1361afcaeddae99f8e9e 100644 (file)
@@ -172,6 +172,7 @@ class BlipTVIE(InfoExtractor):
                     'width': int_or_none(media_content.get('width')),
                     'height': int_or_none(media_content.get('height')),
                 })
+        self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
         subtitles = self.extract_subtitles(video_id, subtitles_urls)
index 4a88ccd13caf604f3ea892c6784d603434fb06ee..0dca29b712c79a27fb621f094a6f64ab503ba3df 100644 (file)
@@ -6,32 +6,39 @@ from .common import InfoExtractor
 
 
 class BloombergIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
+    _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
 
     _TEST = {
-        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
         # The md5 checksum changes
         'info_dict': {
             'id': 'qurhIVlJSB6hzkVi229d8g',
             'ext': 'flv',
             'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
-            'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
+            'description': 'md5:a8ba0302912d03d246979735c17d2761',
         },
     }
 
     def _real_extract(self, url):
         name = self._match_id(url)
         webpage = self._download_webpage(url, name)
-
-        f4m_url = self._search_regex(
-            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
-            'f4m url')
+        video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
         title = re.sub(': Video$', '', self._og_search_title(webpage))
 
+        embed_info = self._download_json(
+            'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+        formats = []
+        for stream in embed_info['streams']:
+            if stream["muxing_format"] == "TS":
+                formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
+            else:
+                formats.extend(self._extract_f4m_formats(stream['url'], video_id))
+        self._sort_formats(formats)
+
         return {
-            'id': name.split('-')[-1],
+            'id': video_id,
             'title': title,
-            'formats': self._extract_f4m_formats(f4m_url, name),
+            'formats': formats,
             'description': self._og_search_description(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
         }
index 90ea074387ef6afe4aaa87a41c13ec6cf5a1aa7b..5efc5f4fe556a4424542b441a83f2d6dbd5bc8e7 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class CNNIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
-        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
 
     _TESTS = [{
         'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
@@ -45,6 +45,12 @@ class CNNIE(InfoExtractor):
             'description': 'md5:e7223a503315c9f150acac52e76de086',
             'upload_date': '20141222',
         }
+    }, {
+        'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+        'only_matching': True,
+    }, {
+        'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index e5245ec3f29eb914b4a311bdf5f3c735e32d2f61..530c449c1b48e48d0f8fcf4e7223bff9d60138b1 100644 (file)
@@ -822,7 +822,7 @@ class InfoExtractor(object):
                                 (media_el.attrib.get('href') or media_el.attrib.get('url')))
             tbr = int_or_none(media_el.attrib.get('bitrate'))
             formats.append({
-                'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
+                'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
                 'url': manifest_url,
                 'ext': 'flv',
                 'tbr': tbr,
index e64b88fbc4f796accae6816c5fcec2f2a1aa3ecd..6ded723c96ddad70ebf95b9fb6b73c811bcc6746 100644 (file)
@@ -23,7 +23,6 @@ from ..utils import (
 )
 from ..aes import (
     aes_cbc_decrypt,
-    inc,
 )
 
 
@@ -102,13 +101,6 @@ class CrunchyrollIE(InfoExtractor):
 
         key = obfuscate_key(id)
 
-        class Counter:
-            __value = iv
-
-            def next_value(self):
-                temp = self.__value
-                self.__value = inc(self.__value)
-                return temp
         decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
         return zlib.decompress(decrypted_data)
 
index 4f67c3aacc5dec5410b76d08728eecaf429f222b..7615ecd4ba3a9d55720f697cb792ca01e92bd1df 100644 (file)
@@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
     def _build_request(url):
         """Build a request with the family filter disabled"""
         request = compat_urllib_request.Request(url)
-        request.add_header('Cookie', 'family_filter=off')
-        request.add_header('Cookie', 'ff=off')
+        request.add_header('Cookie', 'family_filter=off; ff=off')
         return request
 
 
@@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
             video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
 
         embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
-        embed_page = self._download_webpage(embed_url, video_id,
-                                            'Downloading embed page')
+        embed_request = self._build_request(embed_url)
+        embed_page = self._download_webpage(
+            embed_request, video_id, 'Downloading embed page')
         info = self._search_regex(r'var info = ({.*?}),$', embed_page,
                                   'video info', flags=re.MULTILINE)
         info = json.loads(info)
@@ -224,7 +224,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
 
 class DailymotionUserIE(DailymotionPlaylistIE):
     IE_NAME = 'dailymotion:user'
-    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
     _TESTS = [{
         'url': 'https://www.dailymotion.com/user/nqtv',
diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py
new file mode 100644 (file)
index 0000000..3ed1f16
--- /dev/null
@@ -0,0 +1,73 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    parse_duration,
+)
+
+
+class DHMIE(InfoExtractor):
+    IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+    _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+    _TESTS = [{
+        'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+        'md5': '11c475f670209bf6acca0b2b7ef51827',
+        'info_dict': {
+            'id': 'the-marshallplan-at-work-in-west-germany',
+            'ext': 'flv',
+            'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+            'description': 'md5:1fabd480c153f97b07add61c44407c82',
+            'duration': 660,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+        'md5': '09890226332476a3e3f6f2cb74734aa5',
+        'info_dict': {
+            'id': 'rolle-1',
+            'ext': 'flv',
+            'title': 'ROLLE 1',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        playlist_url = self._search_regex(
+            r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+        playlist = self._download_xml(playlist_url, video_id)
+
+        track = playlist.find(
+            './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
+
+        video_url = xpath_text(
+            track, './{http://xspf.org/ns/0/}location',
+            'video url', fatal=True)
+        thumbnail = xpath_text(
+            track, './{http://xspf.org/ns/0/}image',
+            'thumbnail')
+
+        title = self._search_regex(
+            [r'dc:title="([^"]+)"', r'<title> &raquo;([^<]+)</title>'],
+            webpage, 'title').strip()
+        description = self._html_search_regex(
+            r'<p><strong>Description:</strong>(.+?)</p>',
+            webpage, 'description', default=None)
+        duration = parse_duration(self._search_regex(
+            r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+            webpage, 'duration', default=None))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }
index d7956e6e4a20d733947f70a2933e06c2f0d144de..479430c51072ab91e976df4d459af372c5608cdd 100644 (file)
@@ -1,19 +1,23 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import hashlib
+import time
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (ExtractorError, unescapeHTML)
+from ..compat import (compat_str, compat_basestring)
 
 
 class DouyuTVIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.douyutv.com/iseven',
         'info_dict': {
-            'id': 'iseven',
+            'id': '17732',
+            'display_id': 'iseven',
             'ext': 'flv',
             'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': 'md5:9e525642c25a0a24302869937cf69d17',
+            'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': '7师傅',
             'uploader_id': '431925',
@@ -22,22 +26,52 @@ class DouyuTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         }
-    }
+    }, {
+        'url': 'http://www.douyutv.com/85982',
+        'info_dict': {
+            'id': '85982',
+            'display_id': '85982',
+            'ext': 'flv',
+            'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'douyu小漠',
+            'uploader_id': '3769985',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        if video_id.isdigit():
+            room_id = video_id
+        else:
+            page = self._download_webpage(url, video_id)
+            room_id = self._html_search_regex(
+                r'"room_id"\s*:\s*(\d+),', page, 'room id')
+
+        prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
+            room_id, int(time.time()))
+
+        auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
         config = self._download_json(
-            'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
+            'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
+            video_id)
 
         data = config['data']
 
         error_code = config.get('error', 0)
-        show_status = data.get('show_status')
         if error_code is not 0:
-            raise ExtractorError(
-                'Server reported error %i' % error_code, expected=True)
+            error_desc = 'Server reported error %i' % error_code
+            if isinstance(data, (compat_str, compat_basestring)):
+                error_desc += ': ' + data
+            raise ExtractorError(error_desc, expected=True)
 
+        show_status = data.get('show_status')
         # 1 = live, 2 = offline
         if show_status == '2':
             raise ExtractorError(
@@ -46,7 +80,7 @@ class DouyuTVIE(InfoExtractor):
         base_url = data['rtmp_url']
         live_path = data['rtmp_live']
 
-        title = self._live_title(data['room_name'])
+        title = self._live_title(unescapeHTML(data['room_name']))
         description = data.get('show_details')
         thumbnail = data.get('room_src')
 
@@ -66,7 +100,8 @@ class DouyuTVIE(InfoExtractor):
         self._sort_formats(formats)
 
         return {
-            'id': video_id,
+            'id': room_id,
+            'display_id': video_id,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
index 69ca75423cb1d4692f1958829dc61bcf5c2bac73..05bb22ddf3f0a3b095e1108b88d47f6766484d09 100644 (file)
@@ -3,22 +3,25 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+    ExtractorError,
+    unified_strdate,
+)
 
 
 class DreiSatIE(InfoExtractor):
     IE_NAME = '3sat'
     _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
     _TEST = {
-        'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
-        'md5': '9dcfe344732808dbfcc901537973c922',
+        'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
+        'md5': 'be37228896d30a88f315b638900a026e',
         'info_dict': {
-            'id': '36983',
+            'id': '45918',
             'ext': 'mp4',
-            'title': 'Kaffeeland Schweiz',
-            'description': 'md5:cc4424b18b75ae9948b13929a0814033',
+            'title': 'Waidmannsheil',
+            'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
             'uploader': '3sat',
-            'upload_date': '20130622'
+            'upload_date': '20140913'
         }
     }
 
@@ -28,6 +31,15 @@ class DreiSatIE(InfoExtractor):
         details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
         details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
 
+        status_code = details_doc.find('./status/statuscode')
+        if status_code is not None and status_code.text != 'ok':
+            code = status_code.text
+            if code == 'notVisibleAnymore':
+                message = 'Video %s is not available' % video_id
+            else:
+                message = '%s returned error: %s' % (self.IE_NAME, code)
+            raise ExtractorError(message, expected=True)
+
         thumbnail_els = details_doc.findall('.//teaserimage')
         thumbnails = [{
             'width': int(te.attrib['key'].partition('x')[0]),
index 8257e35a437b075461114fbaf1b4dd2d578f56d8..f25ab319e66d4d5b151cd9a9d4509807b6a88617 100644 (file)
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor, ExtractorError
@@ -8,16 +9,16 @@ class DRTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
 
     _TEST = {
-        'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
-        'md5': '4a7e1dd65cdb2643500a3f753c942f25',
+        'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
+        'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
         'info_dict': {
-            'id': 'partiets-mand-7-8',
+            'id': 'panisk-paske-5',
             'ext': 'mp4',
-            'title': 'Partiets mand (7:8)',
-            'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
-            'timestamp': 1403047940,
-            'upload_date': '20140617',
-            'duration': 1299.040,
+            'title': 'Panisk Påske (5)',
+            'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
+            'timestamp': 1426984612,
+            'upload_date': '20150322',
+            'duration': 1455,
         },
     }
 
@@ -26,6 +27,10 @@ class DRTVIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        if '>Programmet er ikke længere tilgængeligt' in webpage:
+            raise ExtractorError(
+                'Video %s is not available' % video_id, expected=True)
+
         video_id = self._search_regex(
             r'data-(?:material-identifier|episode-slug)="([^"]+)"',
             webpage, 'video id')
index 6b651778afa2faa57237314cde7a25f3ebb06278..ff78d4fd2e84c390a929f29a052b1dd87abe03d1 100644 (file)
@@ -28,12 +28,12 @@ class DumpIE(InfoExtractor):
         video_url = self._search_regex(
             r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
 
-        thumb = self._og_search_thumbnail(webpage)
-        title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
 
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
-            'thumbnail': thumb,
+            'thumbnail': thumbnail,
         }
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
new file mode 100644 (file)
index 0000000..9c594b7
--- /dev/null
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import qualities
+
+
+class DumpertIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+    _TEST = {
+        'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+        'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+        'info_dict': {
+            'id': '6646981/951bc60f',
+            'ext': 'mp4',
+            'title': 'Ik heb nieuws voor je',
+            'description': 'Niet schrikken hoor',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'nsfw=1')
+        webpage = self._download_webpage(req, video_id)
+
+        files_base64 = self._search_regex(
+            r'data-files="([^"]+)"', webpage, 'data files')
+
+        files = self._parse_json(
+            base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
+            video_id)
+
+        quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+
+        formats = [{
+            'url': video_url,
+            'format_id': format_id,
+            'quality': quality(format_id),
+        } for format_id, video_url in files.items() if format_id != 'still']
+        self._sort_formats(formats)
+
+        title = self._html_search_meta(
+            'title', webpage) or self._og_search_title(webpage)
+        description = self._html_search_meta(
+            'description', webpage) or self._og_search_description(webpage)
+        thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats
+        }
index 7173371eef85da1be53a5476312b36ffdedb0c8d..688dfc2f7f34d15e712481351a6073987325add5 100644 (file)
@@ -45,6 +45,7 @@ class EaglePlatformIE(InfoExtractor):
             'duration': 216,
             'view_count': int,
         },
+        'skip': 'Georestricted',
     }]
 
     def _handle_error(self, response):
index fc92ff8253734f151fa973ea4979adbe5c6063bf..5154bbd7f8e5a8447a24d8274780648a7eae0ca4 100644 (file)
@@ -13,15 +13,15 @@ from ..utils import (
 class EllenTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
     _TESTS = [{
-        'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
-        'md5': 'e4af06f3bf0d5f471921a18db5764642',
+        'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
+        'md5': '8e3c576bf2e9bfff4d76565f56f94c9c',
         'info_dict': {
-            'id': '0-7jqrsr18',
+            'id': '0-ipq1gsai',
             'ext': 'mp4',
-            'title': 'What\'s Wrong with These Photos? A Whole Lot',
-            'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
-            'timestamp': 1406876400,
-            'upload_date': '20140801',
+            'title': 'Fast Fingers of Fate',
+            'description': 'md5:686114ced0a032926935e9015ee794ac',
+            'timestamp': 1428033600,
+            'upload_date': '20150403',
         }
     }, {
         'url': 'http://ellentube.com/videos/0-dvzmabd5/',
@@ -40,14 +40,15 @@ class EllenTVIE(InfoExtractor):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
-        video_url = self._html_search_meta('VideoURL', webpage, 'url')
+
+        video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True)
         title = self._og_search_title(webpage, default=None) or self._search_regex(
             r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
         description = self._html_search_meta(
             'description', webpage, 'description') or self._og_search_description(webpage)
         timestamp = parse_iso8601(self._search_regex(
             r'<span class="publish-date"><time datetime="([^"]+)">',
-            webpage, 'timestamp'))
+            webpage, 'timestamp', fatal=False))
 
         return {
             'id': video_id,
index 79e2fbd394681283e07a7146bc51f39a7499324d..0cbca90b061cf2358600146f37f6da5b61d71709 100644 (file)
@@ -1,11 +1,17 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import ExtractorError
 
 
 class EroProfileIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
-    _TEST = {
+    _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+    _NETRC_MACHINE = 'eroprofile'
+    _TESTS = [{
         'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
         'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
         'info_dict': {
@@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor):
             'thumbnail': 're:https?://.*\.jpg',
             'age_limit': 18,
         }
-    }
+    }, {
+        'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+        'md5': '1baa9602ede46ce904c431f5418d8916',
+        'info_dict': {
+            'id': '1133519',
+            'ext': 'm4v',
+            'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+            'thumbnail': 're:https?://.*\.jpg',
+            'age_limit': 18,
+        },
+        'skip': 'Requires login',
+    }]
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        query = compat_urllib_parse.urlencode({
+            'username': username,
+            'password': password,
+            'url': 'http://www.eroprofile.com/',
+        })
+        login_url = self._LOGIN_URL + query
+        login_page = self._download_webpage(login_url, None, False)
+
+        m = re.search(r'Your username or password was incorrect\.', login_page)
+        if m:
+            raise ExtractorError(
+                'Wrong username and/or password.', expected=True)
+
+        self.report_login()
+        redirect_url = self._search_regex(
+            r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+        self._download_webpage(redirect_url, None, False)
+
+    def _real_initialize(self):
+        self._login()
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
         webpage = self._download_webpage(url, display_id)
 
+        m = re.search(r'You must be logged in to view this video\.', webpage)
+        if m:
+            raise ExtractorError(
+                'This video requires login. Please specify a username and password and try again.', expected=True)
+
         video_id = self._search_regex(
             [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
             webpage, 'video id', default=None)
index 170d6807529ac9b121187786cf9329b3b3525dc3..edf555b2987520618b70bf8bd423c5fc1f60e5a9 100644 (file)
@@ -14,7 +14,9 @@ from ..utils import (
     clean_html,
     ExtractorError,
     int_or_none,
+    float_or_none,
     parse_duration,
+    determine_ext,
 )
 
 
@@ -50,7 +52,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
             if not video_url:
                 continue
             format_id = video['format']
-            if video_url.endswith('.f4m'):
+            ext = determine_ext(video_url)
+            if ext == 'f4m':
                 if georestricted:
                     # See https://github.com/rg3/youtube-dl/issues/3963
                     # m3u8 urls work fine
@@ -60,12 +63,9 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                     'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
                     video_id, 'Downloading f4m manifest token', fatal=False)
                 if f4m_url:
-                    f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
-                    for f4m_format in f4m_formats:
-                        f4m_format['preference'] = 1
-                    formats.extend(f4m_formats)
-            elif video_url.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
+                    formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
+            elif ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
             elif video_url.startswith('rtmp'):
                 formats.append({
                     'url': video_url,
@@ -86,7 +86,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
             'title': info['titre'],
             'description': clean_html(info['synopsis']),
             'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
-            'duration': parse_duration(info['duree']),
+            'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
             'timestamp': int_or_none(info['diffusion']['timestamp']),
             'formats': formats,
         }
@@ -260,22 +260,28 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
     _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
 
     _TEST = {
-        'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
-        'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
+        'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
+        'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
         'info_dict': {
-            'id': 'EV_22853',
+            'id': 'EV_50111',
             'ext': 'flv',
-            'title': 'Dans les jardins de William Christie - Le Camus',
-            'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
-            'upload_date': '20140829',
-            'timestamp': 1409317200,
+            'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
+            'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
+            'upload_date': '20150320',
+            'timestamp': 1426892400,
+            'duration': 2760.9,
         },
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         name = mobj.group('name')
+
         webpage = self._download_webpage(url, name)
+
+        if ">Ce live n'est plus disponible en replay<" in webpage:
+            raise ExtractorError('Video %s is not available' % name, expected=True)
+
         video_id, catalogue = self._search_regex(
             r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
 
diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py
new file mode 100644 (file)
index 0000000..d545e01
--- /dev/null
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    parse_duration,
+    remove_start,
+)
+
+
+class GamersydeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
+    _TEST = {
+        'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
+        'md5': 'f38d400d32f19724570040d5ce3a505f',
+        'info_dict': {
+            'id': '34371',
+            'ext': 'mp4',
+            'duration': 372,
+            'title': 'Bloodborne - Birth of a hero',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
+            display_id, transform_source=js_to_json)
+
+        formats = []
+        for source in playlist['sources']:
+            video_url = source.get('file')
+            if not video_url:
+                continue
+            format_id = source.get('label')
+            f = {
+                'url': video_url,
+                'format_id': format_id,
+            }
+            m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
+            if m:
+                f.update({
+                    'height': int(m.group('height')),
+                    'fps': int(m.group('fps')),
+                })
+            formats.append(f)
+        self._sort_formats(formats)
+
+        title = remove_start(playlist['title'], '%s - ' % video_id)
+        thumbnail = playlist.get('image')
+        duration = parse_duration(self._search_regex(
+            r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
index 8716e4503ad4ae61a57745ec1eafe96fe5e25ddc..6c212efac4b0f5869f93e37a5c34b42dbe2ca88a 100644 (file)
@@ -29,10 +29,12 @@ from ..utils import (
     xpath_text,
 )
 from .brightcove import BrightcoveIE
+from .nbc import NBCSportsVPlayerIE
 from .ooyala import OoyalaIE
 from .rutv import RUTVIE
 from .smotri import SmotriIE
 from .condenast import CondeNastIE
+from .udn import UDNEmbedIE
 
 
 class GenericIE(InfoExtractor):
@@ -527,6 +529,17 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Viddler'],
         },
+        # Libsyn embed
+        {
+            'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+            'info_dict': {
+                'id': '3377616',
+                'ext': 'mp3',
+                'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+                'description': 'md5:601cb790edd05908957dae8aaa866465',
+                'upload_date': '20150220',
+            },
+        },
         # jwplayer YouTube
         {
             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
@@ -609,6 +622,16 @@ class GenericIE(InfoExtractor):
                 'age_limit': 0,
             },
         },
+        # 5min embed
+        {
+            'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+            'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+            'info_dict': {
+                'id': '518726732',
+                'ext': 'mp4',
+                'title': 'Facebook Creates "On This Day" | Crunch Report',
+            },
+        },
         # RSS feed with enclosure
         {
             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
@@ -618,6 +641,27 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20150228',
                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
             }
+        },
+        # NBC Sports vplayer embed
+        {
+            'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+            'info_dict': {
+                'id': 'ln7x1qSThw4k',
+                'ext': 'flv',
+                'title': "PFT Live: New leader in the 'new-look' defense",
+                'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+            },
+        },
+        # UDN embed
+        {
+            'url': 'http://www.udn.com/news/story/7314/822787',
+            'md5': 'de06b4c90b042c128395a88f0384817e',
+            'info_dict': {
+                'id': '300040',
+                'ext': 'mp4',
+                'title': '生物老師男變女 全校挺"做自己"',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            }
         }
     ]
 
@@ -1013,6 +1057,12 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'))
 
+        # Look for Libsyn player
+        mobj = re.search(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
         # Look for Ooyala videos
         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
@@ -1219,6 +1269,24 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'Pladform')
 
+        # Look for 5min embeds
+        mobj = re.search(
+            r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+        if mobj is not None:
+            return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+        # Look for NBC Sports VPlayer embeds
+        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+        if nbc_sports_url:
+            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
+        # Look for UDN embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
+        if mobj is not None:
+            return self.url_result(
+                compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
index 84bd7c0804eb2966c7062288a1a190c8509f62fc..d606429ca88a51138f3108594e7933f74638de34 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     float_or_none,
     int_or_none,
     compat_str,
+    determine_ext,
 )
 
 
@@ -147,12 +148,27 @@ class HitboxLiveIE(HitboxIE):
                 servers.append(base_url)
                 for stream in cdn.get('bitrates'):
                     label = stream.get('label')
-                    if label != 'Auto':
+                    if label == 'Auto':
+                        continue
+                    stream_url = stream.get('url')
+                    if not stream_url:
+                        continue
+                    bitrate = int_or_none(stream.get('bitrate'))
+                    if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
+                        if not stream_url.startswith('http'):
+                            continue
                         formats.append({
-                            'url': '%s/%s' % (base_url, stream.get('url')),
+                            'url': stream_url,
                             'ext': 'mp4',
-                            'vbr': stream.get('bitrate'),
-                            'resolution': label,
+                            'tbr': bitrate,
+                            'format_note': label,
+                            'rtmp_live': True,
+                        })
+                    else:
+                        formats.append({
+                            'url': '%s/%s' % (base_url, stream_url),
+                            'ext': 'mp4',
+                            'tbr': bitrate,
                             'rtmp_live': True,
                             'format_note': host,
                             'page_url': url,
diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py
new file mode 100644 (file)
index 0000000..9ab1416
--- /dev/null
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class LibsynIE(InfoExtractor):
+    _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
+        'md5': '443360ee1b58007bc3dcf09b41d093bb',
+        'info_dict': {
+            'id': '3377616',
+            'ext': 'mp3',
+            'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+            'description': 'md5:601cb790edd05908957dae8aaa866465',
+            'upload_date': '20150220',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        formats = [{
+            'url': media_url,
+        } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
+
+        podcast_title = self._search_regex(
+            r'<h2>([^<]+)</h2>', webpage, 'title')
+        episode_title = self._search_regex(
+            r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
+
+        title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+
+        description = self._html_search_regex(
+            r'<div id="info_text_body">(.+?)</div>', webpage,
+            'description', fatal=False)
+
+        thumbnail = self._search_regex(
+            r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+
+        release_date = unified_strdate(self._search_regex(
+            r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': release_date,
+            'formats': formats,
+        }
index 2467f8bdd35304a57cc6bd951fa14fd32a25cd4c..ec309dadd848f7c1ae46b28c3be329c32cef48c9 100644 (file)
@@ -21,7 +21,7 @@ from ..utils import (
 
 class LivestreamIE(InfoExtractor):
     IE_NAME = 'livestream'
-    _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
+    _VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
     _TESTS = [{
         'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
         'md5': '53274c76ba7754fb0e8d072716f2292b',
@@ -51,6 +51,9 @@ class LivestreamIE(InfoExtractor):
     }, {
         'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
         'only_matching': True,
+    }, {
+        'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
+        'only_matching': True,
     }]
 
     def _parse_smil(self, video_id, smil_url):
diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py
new file mode 100644 (file)
index 0000000..cc3f271
--- /dev/null
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    int_or_none,
+)
+
+
+class MioMioIE(InfoExtractor):
+    IE_NAME = 'miomio.tv'
+    _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.miomio.tv/watch/cc179734/',
+        'md5': '48de02137d0739c15b440a224ad364b9',
+        'info_dict': {
+            'id': '179734',
+            'ext': 'flv',
+            'title': '手绘动漫鬼泣但丁全程画法',
+            'duration': 354,
+        },
+    }, {
+        'url': 'http://www.miomio.tv/watch/cc184024/',
+        'info_dict': {
+            'id': '43729',
+            'title': '《动漫同人插画绘制》',
+        },
+        'playlist_mincount': 86,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta(
+            'description', webpage, 'title', fatal=True)
+
+        mioplayer_path = self._search_regex(
+            r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
+
+        xml_config = self._search_regex(
+            r'flashvars="type=sina&amp;(.+?)&amp;',
+            webpage, 'xml config')
+
+        # skipping the following page causes lags and eventually connection drop-outs
+        self._request_webpage(
+            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
+            video_id)
+
+        # the following xml contains the actual configuration information on the video file(s)
+        vid_config = self._download_xml(
+            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
+            video_id)
+
+        http_headers = {
+            'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
+        }
+
+        entries = []
+        for f in vid_config.findall('./durl'):
+            segment_url = xpath_text(f, 'url', 'video url')
+            if not segment_url:
+                continue
+            order = xpath_text(f, 'order', 'order')
+            segment_id = video_id
+            segment_title = title
+            if order:
+                segment_id += '-%s' % order
+                segment_title += ' part %s' % order
+            entries.append({
+                'id': segment_id,
+                'url': segment_url,
+                'title': segment_title,
+                'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
+                'http_headers': http_headers,
+            })
+
+        if len(entries) == 1:
+            segment = entries[0]
+            segment['id'] = video_id
+            segment['title'] = title
+            return segment
+
+        return {
+            '_type': 'multi_video',
+            'id': video_id,
+            'entries': entries,
+            'title': title,
+            'http_headers': http_headers,
+        }
index 21aea0c5513ab30cbe27384351229c42e864fab3..84f29155841007f3088a86470040407073726067 100644 (file)
@@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):
             r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
         description = self._og_search_description(webpage)
         like_count = str_to_int(self._search_regex(
-            r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
+            r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
             webpage, 'like count', fatal=False))
         view_count = str_to_int(self._search_regex(
             [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
index 1a241aca77983ac9626a53e59bf85ad4394cc8fd..e369551c2fb5730377d863cf3bc2bdde31eb5f60 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 
 class MLBIE(InfoExtractor):
-    _VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
+    _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
     _TESTS = [
         {
             'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -80,6 +80,10 @@ class MLBIE(InfoExtractor):
             'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
             'only_matching': True,
         },
+        {
+            'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
index 3645d3033f74ae174e3eaa85ad55bbe677d9daba..ecd0ac8b1b501d9ad97261f57a5b0fee1cd68ce7 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class NBCIE(InfoExtractor):
-    _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+    _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
 
     _TESTS = [
         {
@@ -50,6 +50,57 @@ class NBCIE(InfoExtractor):
         return self.url_result(theplatform_url)
 
 
+class NBCSportsVPlayerIE(InfoExtractor):
+    _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+    _TESTS = [{
+        'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
+        'info_dict': {
+            'id': '9CsDKds0kvHI',
+            'ext': 'flv',
+            'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+            'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+        }
+    }, {
+        'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_url(webpage):
+        iframe_m = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+        if iframe_m:
+            return iframe_m.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        theplatform_url = self._og_search_video_url(webpage)
+        return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+    # Does not include https becuase its certificate is invalid
+    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+    _TEST = {
+        'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+        'info_dict': {
+            'id': 'PHJSaFWbrTY9',
+            'ext': 'flv',
+            'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+            'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        return self.url_result(
+            NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
 class NBCNewsIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
         (?:video/.+?/(?P<id>\d+)|
index 557dffa46846ff8a4c94f6bb102a186fa1ce5eb8..5d84485714b9f360d47c8676710e9c3e6d9578c7 100644 (file)
@@ -231,7 +231,10 @@ class NPOLiveIE(NPOBaseIE):
                 stream_url = self._download_json(
                     stream_info['stream'], display_id,
                     'Downloading %s URL' % stream_type,
-                    transform_source=strip_jsonp)
+                    'Unable to download %s URL' % stream_type,
+                    transform_source=strip_jsonp, fatal=False)
+                if not stream_url:
+                    continue
                 if stream_type == 'hds':
                     f4m_formats = self._extract_f4m_formats(stream_url, display_id)
                     # f4m downloader downloads only piece of live stream
index bff36f9d3f24cad293144d8e216faf5eeefed92c..e91d3a248ec3367af8b7e9a8f60be0d503877481 100644 (file)
@@ -14,46 +14,48 @@ from ..utils import (
 
 
 class NRKIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
+    _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
 
     _TESTS = [
         {
-            'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
-            'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
+            'url': 'http://www.nrk.no/video/PS*150533',
+            'md5': 'bccd850baebefe23b56d708a113229c2',
             'info_dict': {
                 'id': '150533',
                 'ext': 'flv',
                 'title': 'Dompap og andre fugler i Piip-Show',
-                'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
+                'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+                'duration': 263,
             }
         },
         {
-            'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
-            'md5': '3471f2a51718195164e88f46bf427668',
+            'url': 'http://www.nrk.no/video/PS*154915',
+            'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
             'info_dict': {
                 'id': '154915',
                 'ext': 'flv',
                 'title': 'Slik høres internett ut når du er blind',
                 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+                'duration': 20,
             }
         },
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage(url, video_id)
-
-        video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
+        video_id = self._match_id(url)
 
         data = self._download_json(
-            'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
+            'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
+            video_id, 'Downloading media JSON')
 
         if data['usageRights']['isGeoBlocked']:
-            raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
+            raise ExtractorError(
+                'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
+                expected=True)
+
+        video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
 
-        video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
+        duration = parse_duration(data.get('duration'))
 
         images = data.get('images')
         if images:
@@ -69,10 +71,51 @@ class NRKIE(InfoExtractor):
             'ext': 'flv',
             'title': data['title'],
             'description': data['description'],
+            'duration': duration,
             'thumbnail': thumbnail,
         }
 
 
+class NRKPlaylistIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+
+    _TESTS = [{
+        'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+        'info_dict': {
+            'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+            'title': 'Gjenopplev den historiske solformørkelsen',
+            'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+        },
+        'playlist_count': 2,
+    }, {
+        'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+        'info_dict': {
+            'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+            'title': 'Rivertonprisen til Karin Fossum',
+            'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+        },
+        'playlist_count': 5,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('nrk:%s' % video_id, 'NRK')
+            for video_id in re.findall(
+                r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+                webpage)
+        ]
+
+        playlist_title = self._og_search_title(webpage)
+        playlist_description = self._og_search_description(webpage)
+
+        return self.playlist_result(
+            entries, playlist_id, playlist_title, playlist_description)
+
+
 class NRKTVIE(InfoExtractor):
     _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
 
index a20672c0cc7fea5309e77bb193b887ced2b8d7d5..46cebc0d7b05080491d5f1d32ee8a709b549debc 100644 (file)
@@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url
 
 
 class PhoenixIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://www.phoenix.de/content/884301',
-        'md5': 'ed249f045256150c92e72dbb70eadec6',
-        'info_dict': {
-            'id': '884301',
-            'ext': 'mp4',
-            'title': 'Michael Krons mit Hans-Werner Sinn',
-            'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
-            'upload_date': '20141025',
-            'uploader': 'Im Dialog',
-        }
-    }
+    _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
+        (?:
+            phoenix/die_sendungen/(?:[^/]+/)?
+        )?
+        (?P<id>[0-9]+)'''
+    _TESTS = [
+        {
+            'url': 'http://www.phoenix.de/content/884301',
+            'md5': 'ed249f045256150c92e72dbb70eadec6',
+            'info_dict': {
+                'id': '884301',
+                'ext': 'mp4',
+                'title': 'Michael Krons mit Hans-Werner Sinn',
+                'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
+                'upload_date': '20141025',
+                'uploader': 'Im Dialog',
+            }
+        },
+        {
+            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
+            'only_matching': True,
+        },
+    ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index 9576aed0e6668189c1959df3166b1e550facc7b0..e766ccca322da0e17389e949cd11fed3a5cb1910 100644 (file)
@@ -4,85 +4,72 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
-    float_or_none,
     int_or_none,
-    str_to_int,
+    parse_iso8601,
 )
 
 
 class PlayFMIE(InfoExtractor):
     IE_NAME = 'play.fm'
-    _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
 
     _TEST = {
-        'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
+        'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
         'md5': 'c505f8307825a245d0c7ad1850001f22',
         'info_dict': {
-            'id': '137220',
+            'id': '71276',
             'ext': 'mp3',
-            'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
-            'uploader': 'Sven Tasnadi',
-            'uploader_id': 'sventasnadi',
-            'duration': 5627.428,
-            'upload_date': '20140712',
+            'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+            'description': '',
+            'duration': 5627,
+            'timestamp': 1406033781,
+            'upload_date': '20140722',
+            'uploader': 'Dan Drastic',
+            'uploader_id': '71170',
             'view_count': int,
             'comment_count': int,
-            'thumbnail': 're:^https?://.*\.jpg$',
         },
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        upload_date = mobj.group('upload_date')
-
-        rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
-        req = compat_urllib_request.Request(
-            'http://www.play.fm/flexRead/recording', data=rec_data)
-        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        rec_doc = self._download_xml(req, video_id)
+        slug = mobj.group('slug')
 
-        error_node = rec_doc.find('./error')
-        if error_node is not None:
-            raise ExtractorError('An error occured: %s (code %s)' % (
-                error_node.text, rec_doc.find('./status').text))
+        recordings = self._download_json(
+            'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
 
-        recording = rec_doc.find('./recording')
-        title = recording.find('./title').text
-        view_count = str_to_int(recording.find('./stats/playcount').text)
-        comment_count = str_to_int(recording.find('./stats/comments').text)
-        duration = float_or_none(recording.find('./duration').text, scale=1000)
-        thumbnail = recording.find('./image').text
+        error = recordings.get('error')
+        if isinstance(error, dict):
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error.get('message')),
+                expected=True)
 
-        artist = recording.find('./artists/artist')
-        uploader = artist.find('./name').text
-        uploader_id = artist.find('./slug').text
-
-        video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
-            'http:', recording.find('./url').text,
-            recording.find('./_class').text, recording.find('./file_id').text,
-            rec_doc.find('./uuid').text, video_id,
-            rec_doc.find('./jingle/file_id').text,
-            'http%3A%2F%2Fwww.play.fm%2Fplayer',
-        )
+        audio_url = recordings['audio']
+        video_id = compat_str(recordings.get('id') or video_id)
+        title = recordings['title']
+        description = recordings.get('description')
+        duration = int_or_none(recordings.get('recordingDuration'))
+        timestamp = parse_iso8601(recordings.get('created_at'))
+        uploader = recordings.get('page', {}).get('title')
+        uploader_id = compat_str(recordings.get('page', {}).get('id'))
+        view_count = int_or_none(recordings.get('playCount'))
+        comment_count = int_or_none(recordings.get('commentCount'))
+        categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
 
         return {
             'id': video_id,
-            'url': video_url,
-            'ext': 'mp3',
-            'filesize': int_or_none(recording.find('./size').text),
+            'url': audio_url,
             'title': title,
-            'upload_date': upload_date,
-            'view_count': view_count,
-            'comment_count': comment_count,
+            'description': description,
             'duration': duration,
-            'thumbnail': thumbnail,
+            'timestamp': timestamp,
             'uploader': uploader,
             'uploader_id': uploader_id,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'categories': categories,
         }
index 3a27e37890dc78b26af866c9884807c97c56ccb9..0c8b731cf47267568e43ccd09ff21f1683b4d992 100644 (file)
@@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
     }
 
     def _extract_count(self, pattern, webpage, name):
-        count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
-        if count:
-            count = str_to_int(count)
-        return count
+        return str_to_int(self._search_regex(
+            pattern, webpage, '%s count' % name, fatal=False))
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
         if thumbnail:
             thumbnail = compat_urllib_parse.unquote(thumbnail)
 
-        view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
-        like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
-        dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+        view_count = self._extract_count(
+            r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+        like_count = self._extract_count(
+            r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+        dislike_count = self._extract_count(
+            r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
         comment_count = self._extract_count(
-            r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
 
         video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
         if webpage.find('"encrypted":true') != -1:
diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py
new file mode 100644 (file)
index 0000000..9688ed9
--- /dev/null
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    unified_strdate,
+)
+
+
+class PornoVoisinesIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
+
+    _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
+        '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
+
+    _SERVER_NUMBERS = (1, 2)
+
+    _TEST = {
+        'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
+        'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
+        'info_dict': {
+            'id': '1285',
+            'display_id': 'recherche-appartement',
+            'ext': 'mp4',
+            'title': 'Recherche appartement',
+            'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20140925',
+            'duration': 120,
+            'view_count': int,
+            'average_rating': float,
+            'categories': ['Débutante', 'Scénario', 'Sodomie'],
+            'age_limit': 18,
+        }
+    }
+
+    @classmethod
+    def build_video_url(cls, num):
+        return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self.build_video_url(video_id)
+
+        title = self._html_search_regex(
+            r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
+        description = self._html_search_regex(
+            r'<article id="descriptif">(.+?)</article>',
+            webpage, "description", fatal=False, flags=re.DOTALL)
+
+        thumbnail = self._search_regex(
+            r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
+            webpage, 'thumbnail', fatal=False)
+        if thumbnail:
+            thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
+
+        upload_date = unified_strdate(self._search_regex(
+            r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
+        duration = int_or_none(self._search_regex(
+            'Durée (\d+)', webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._search_regex(
+            r'(\d+) vues', webpage, 'view count', fatal=False))
+        average_rating = self._search_regex(
+            r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
+        if average_rating:
+            average_rating = float_or_none(average_rating.replace(',', '.'))
+
+        categories = self._html_search_meta(
+            'keywords', webpage, 'categories', fatal=False)
+        if categories:
+            categories = [category.strip() for category in categories.split(',')]
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'duration': duration,
+            'view_count': view_count,
+            'average_rating': average_rating,
+            'categories': categories,
+            'age_limit': 18,
+        }
index 385681d06e3dda356193d9f89c7ccbdd4cbde453..7cc7996642cae1de1ca2a585391d167025b92162 100644 (file)
@@ -10,6 +10,7 @@ from ..compat import (
 )
 from ..utils import (
     unified_strdate,
+    int_or_none,
 )
 
 
@@ -24,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor):
             'info_dict': {
                 'id': '2104602',
                 'ext': 'mp4',
-                'title': 'Staffel 2, Episode 18 - Jahresrückblick',
+                'title': 'Episode 18 - Staffel 2',
                 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
                 'upload_date': '20131231',
                 'duration': 5845.04,
@@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor):
             urls_sources = urls_sources.values()
 
         def fix_bitrate(bitrate):
+            bitrate = int_or_none(bitrate)
+            if not bitrate:
+                return None
             return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
 
         for source in urls_sources:
diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py
new file mode 100644 (file)
index 0000000..884c284
--- /dev/null
@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import(
+    unified_strdate,
+    str_to_int,
+)
+
+
+class RadioJavanIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
+    _TEST = {
+        'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
+        'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
+        'info_dict': {
+            'id': 'chaartaar-ashoobam',
+            'ext': 'mp4',
+            'title': 'Chaartaar - Ashoobam',
+            'thumbnail': 're:^https?://.*\.jpe?g$',
+            'upload_date': '20150215',
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        formats = [{
+            'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
+            'format_id': '%sp' % height,
+            'height': int(height),
+        } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
+        self._sort_formats(formats)
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        upload_date = unified_strdate(self._search_regex(
+            r'class="date_added">Date added: ([^<]+)<',
+            webpage, 'upload date', fatal=False))
+
+        view_count = str_to_int(self._search_regex(
+            r'class="views">Plays: ([\d,]+)',
+            webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._search_regex(
+            r'class="rating">([\d,]+) likes',
+            webpage, 'like count', fatal=False))
+        dislike_count = str_to_int(self._search_regex(
+            r'class="rating">([\d,]+) dislikes',
+            webpage, 'dislike count', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'formats': formats,
+        }
index 144e3398259179e396206d0ea059e334953dcfd7..1631faf29f61c9cc15bca99394966c1917ca1a08 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class RaiIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
+    _VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
     _TESTS = [
         {
             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
@@ -62,34 +62,78 @@ class RaiIE(InfoExtractor):
                 'description': 'Edizione delle ore 20:30 ',
             }
         },
+        {
+            'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
+            'md5': '02b64456f7cc09f96ff14e7dd489017e',
+            'info_dict': {
+                'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
+                'ext': 'flv',
+                'title': 'Il Candidato - Primo episodio: "Le Primarie"',
+                'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
+                'uploader': 'RaiTre',
+            }
+        }
     ]
 
+    def _extract_relinker_url(self, webpage):
+        return self._proto_relative_url(self._search_regex(
+            [r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'],
+            webpage, 'relinker url', default=None))
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
+        host = mobj.group('host')
 
-        media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
+        webpage = self._download_webpage(url, video_id)
 
-        title = media.get('name')
-        description = media.get('desc')
-        thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
-        duration = parse_duration(media.get('length'))
-        uploader = media.get('author')
-        upload_date = unified_strdate(media.get('date'))
+        relinker_url = self._extract_relinker_url(webpage)
 
-        formats = []
+        if not relinker_url:
+            iframe_path = self._search_regex(
+                r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
+                webpage, 'iframe')
+            webpage = self._download_webpage(
+                '%s/%s' % (host, iframe_path), video_id)
+            relinker_url = self._extract_relinker_url(webpage)
 
-        for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
-            media_url = media.get(format_id)
-            if not media_url:
-                continue
-            formats.append({
+        relinker = self._download_json(
+            '%s&output=47' % relinker_url, video_id)
+
+        media_url = relinker['video'][0]
+        ct = relinker.get('ct')
+        if ct == 'f4m':
+            formats = self._extract_f4m_formats(
+                media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id)
+        else:
+            formats = [{
                 'url': media_url,
-                'format_id': format_id,
-                'ext': 'mp4',
-            })
+                'format_id': ct,
+            }]
 
-        subtitles = self.extract_subtitles(video_id, url)
+        json_link = self._html_search_meta(
+            'jsonlink', webpage, 'JSON link', default=None)
+        if json_link:
+            media = self._download_json(
+                host + json_link, video_id, 'Downloading video JSON')
+            title = media.get('name')
+            description = media.get('desc')
+            thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
+            duration = parse_duration(media.get('length'))
+            uploader = media.get('author')
+            upload_date = unified_strdate(media.get('date'))
+        else:
+            title = (self._search_regex(
+                r'var\s+videoTitolo\s*=\s*"(.+?)";',
+                webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"')
+            description = self._og_search_description(webpage)
+            thumbnail = self._og_search_thumbnail(webpage)
+            duration = None
+            uploader = self._html_search_meta('Editore', webpage, 'uploader')
+            upload_date = unified_strdate(self._html_search_meta(
+                'item-date', webpage, 'upload date', default=None))
+
+        subtitles = self.extract_subtitles(video_id, webpage)
 
         return {
             'id': video_id,
@@ -103,8 +147,7 @@ class RaiIE(InfoExtractor):
             'subtitles': subtitles,
         }
 
-    def _get_subtitles(self, video_id, url):
-        webpage = self._download_webpage(url, video_id)
+    def _get_subtitles(self, video_id, webpage):
         subtitles = {}
         m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
         if m:
index 846b76c81528431c0faf8ea3fc9bbd6b017db099..d6054d7175fd49a22117dd357bea7905f6e739be 100644 (file)
@@ -1,17 +1,19 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import ExtractorError
 
 
 class RedTubeIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.redtube.com/66418',
+        'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
         'info_dict': {
             'id': '66418',
             'ext': 'mp4',
-            "title": "Sucked on a toilet",
-            "age_limit": 18,
+            'title': 'Sucked on a toilet',
+            'age_limit': 18,
         }
     }
 
@@ -19,6 +21,9 @@ class RedTubeIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
+        if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
+            raise ExtractorError('Video %s has been removed' % video_id, expected=True)
+
         video_url = self._html_search_regex(
             r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
         video_title = self._html_search_regex(
index 13f07107753e0945e4d9e203a2bc6dd8814d4868..849300140ecbf598874d22b090262eabec1e7ea5 100644 (file)
@@ -8,8 +8,10 @@ import time
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
+    ExtractorError,
     float_or_none,
     remove_end,
+    std_headers,
     struct_unpack,
 )
 
@@ -84,13 +86,22 @@ class RTVEALaCartaIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    def _real_initialize(self):
+        user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
+        manager_info = self._download_json(
+            'http://www.rtve.es/odin/loki/' + user_agent_b64,
+            None, 'Fetching manager info')
+        self._manager = manager_info['manager']
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         info = self._download_json(
             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
             video_id)['page']['items'][0]
-        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
+        if info['state'] == 'DESPU':
+            raise ExtractorError('The video is no longer available', expected=True)
+        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
         png = self._download_webpage(png_url, video_id, 'Downloading url information')
         video_url = _decrypt_url(png)
         if not video_url.endswith('.f4m'):
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
new file mode 100644 (file)
index 0000000..10251f2
--- /dev/null
@@ -0,0 +1,157 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
+    smuggle_url,
+    std_headers,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+    _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
+    _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
+    _NETRC_MACHINE = 'safari'
+
+    _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+    _API_FORMAT = 'json'
+
+    LOGGED_IN = False
+
+    def _real_initialize(self):
+        # We only need to log in once for courses or individual videos
+        if not self.LOGGED_IN:
+            self._login()
+            SafariBaseIE.LOGGED_IN = True
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            raise ExtractorError(
+                self._ACCOUNT_CREDENTIALS_HINT,
+                expected=True)
+
+        headers = std_headers
+        if 'Referer' not in headers:
+            headers['Referer'] = self._LOGIN_URL
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None,
+            'Downloading login form')
+
+        csrf = self._html_search_regex(
+            r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
+            login_page, 'csrf token')
+
+        login_form = {
+            'csrfmiddlewaretoken': csrf,
+            'email': username,
+            'password1': password,
+            'login': 'Sign In',
+            'next': '',
+        }
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
+        login_page = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+            raise ExtractorError(
+                'Login failed; make sure your credentials are correct and try again.',
+                expected=True)
+
+        self.to_screen('Login successful')
+
+
+class SafariIE(SafariBaseIE):
+    IE_NAME = 'safari'
+    IE_DESC = 'safaribooksonline.com online video'
+    _VALID_URL = r'''(?x)https?://
+                            (?:www\.)?safaribooksonline\.com/
+                                (?:
+                                    library/view/[^/]+|
+                                    api/v1/book
+                                )/
+                                (?P<course_id>\d+)/
+                                    (?:chapter(?:-content)?/)?
+                                (?P<part>part\d+)\.html
+    '''
+
+    _TESTS = [{
+        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+        'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+        'info_dict': {
+            'id': '2842601850001',
+            'ext': 'mp4',
+            'title': 'Introduction',
+        },
+        'skip': 'Requires safaribooksonline account credentials',
+    }, {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        course_id = mobj.group('course_id')
+        part = mobj.group('part')
+
+        webpage = self._download_webpage(
+            '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
+            part)
+
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if not bc_url:
+            raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+
+        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+
+
+class SafariCourseIE(SafariBaseIE):
+    IE_NAME = 'safari:course'
+    IE_DESC = 'safaribooksonline.com online courses'
+
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+
+    _TESTS = [{
+        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+        'info_dict': {
+            'id': '9780133392838',
+            'title': 'Hadoop Fundamentals LiveLessons',
+        },
+        'playlist_count': 22,
+        'skip': 'Requires safaribooksonline account credentials',
+    }, {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        course_id = self._match_id(url)
+
+        course_json = self._download_json(
+            '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+            course_id, 'Downloading course JSON')
+
+        if 'chapters' not in course_json:
+            raise ExtractorError(
+                'No chapters found for course %s' % course_id, expected=True)
+
+        entries = [
+            self.url_result(chapter, 'Safari')
+            for chapter in course_json['chapters']]
+
+        course_title = course_json['title']
+
+        return self.playlist_result(entries, course_id, course_title)
index 9f79ff5c1b66d2bf37369a6009a914043493b407..0b717a1e42b8dd2c3d8a88d602f001876cf99e03 100644 (file)
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
         page_title = mobj.group('title')
         webpage = self._download_webpage(url, page_title)
         slideshare_obj = self._search_regex(
-            r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
+            r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
             webpage, 'slideshare object')
         info = json.loads(slideshare_obj)
         if info['slideshow']['type'] != 'video':
index 9d45059723c3450a0774275ad708eed88b6e0fcb..316b2c90f110770299084889552b8137e072a617 100644 (file)
@@ -242,7 +242,7 @@ class SoundcloudIE(InfoExtractor):
 
 
 class SoundcloudSetIE(SoundcloudIE):
-    _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
     IE_NAME = 'soundcloud:set'
     _TESTS = [{
         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
@@ -287,7 +287,7 @@ class SoundcloudSetIE(SoundcloudIE):
 
 
 class SoundcloudUserIE(SoundcloudIE):
-    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
     IE_NAME = 'soundcloud:user'
     _TESTS = [{
         'url': 'https://soundcloud.com/the-concept-band',
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
new file mode 100644 (file)
index 0000000..7f060b1
--- /dev/null
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SpankBangIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
+    _TEST = {
+        'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
+        'md5': '1cc433e1d6aa14bc376535b8679302f7',
+        'info_dict': {
+            'id': '3vvn',
+            'ext': 'mp4',
+            'title': 'fantasy solo',
+            'description': 'dillion harper masturbates on a bed',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'silly2587',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        stream_key = self._html_search_regex(
+            r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
+            webpage, 'stream key')
+
+        formats = [{
+            'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
+            'ext': 'mp4',
+            'format_id': '%sp' % height,
+            'height': int(height),
+        } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
+        self._sort_formats(formats)
+
+        title = self._html_search_regex(
+            r'(?s)<h1>(.+?)</h1>', webpage, 'title')
+        description = self._search_regex(
+            r'class="desc"[^>]*>([^<]+)',
+            webpage, 'description', default=None)
+        thumbnail = self._og_search_thumbnail(webpage)
+        uploader = self._search_regex(
+            r'class="user"[^>]*>([^<]+)',
+            webpage, 'uploader', fatal=False)
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'formats': formats,
+            'age_limit': age_limit,
+        }
index 7cb06f351e5b388142b00b51aeba69a1ecfef250..1caf08cb752d201066f0a1d4679efffc26598d1e 100644 (file)
@@ -4,7 +4,10 @@ import base64
 import re
 
 from .common import InfoExtractor
-from ..utils import qualities
+from ..utils import (
+    ExtractorError,
+    qualities,
+)
 
 
 class TeamcocoIE(InfoExtractor):
@@ -18,6 +21,7 @@ class TeamcocoIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+                'duration': 504,
                 'age_limit': 0,
             }
         }, {
@@ -28,6 +32,7 @@ class TeamcocoIE(InfoExtractor):
                 'ext': 'mp4',
                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
                 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+                'duration': 288,
                 'age_limit': 0,
             }
         }
@@ -49,35 +54,37 @@ class TeamcocoIE(InfoExtractor):
             video_id = self._html_search_regex(
                 self._VIDEO_ID_REGEXES, webpage, 'video id')
 
-        embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
-        embed = self._download_webpage(
-            embed_url, video_id, 'Downloading embed page')
-
-        player_data = self._parse_json(self._search_regex(
-            r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
+        preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage)
+        if not preloads:
+            raise ExtractorError('Preload information could not be extracted')
+        preload = max([(len(p), p) for p in preloads])[1]
         data = self._parse_json(
-            base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
+            base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id)
 
         formats = []
         get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
         for filed in data['files']:
-            m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
-            if m_format is not None:
-                format_id = m_format.group(1)
+            if filed['type'] == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    filed['url'], video_id, ext='mp4'))
             else:
-                format_id = filed['bitrate']
-            tbr = (
-                int(filed['bitrate'])
-                if filed['bitrate'].isdigit()
-                else None)
+                m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
+                if m_format is not None:
+                    format_id = m_format.group(1)
+                else:
+                    format_id = filed['bitrate']
+                tbr = (
+                    int(filed['bitrate'])
+                    if filed['bitrate'].isdigit()
+                    else None)
 
-            formats.append({
-                'url': filed['url'],
-                'ext': 'mp4',
-                'tbr': tbr,
-                'format_id': format_id,
-                'quality': get_quality(format_id),
-            })
+                formats.append({
+                    'url': filed['url'],
+                    'ext': 'mp4',
+                    'tbr': tbr,
+                    'format_id': format_id,
+                    'quality': get_quality(format_id),
+                })
 
         self._sort_formats(formats)
 
@@ -88,5 +95,6 @@ class TeamcocoIE(InfoExtractor):
             'title': data['title'],
             'thumbnail': data.get('thumb', {}).get('href'),
             'description': data.get('teaser'),
+            'duration': data.get('duration'),
             'age_limit': self._family_friendly_search(webpage),
         }
index 4cec06f8bd6e2a18ac3062e916225746f5153c93..a2dc14c2b652527930af81bd845466b795e8866f 100644 (file)
@@ -5,9 +5,8 @@ import re
 
 from .common import InfoExtractor
 
-from ..compat import (
-    compat_str,
-)
+from ..compat import compat_str
+from ..utils import int_or_none
 
 
 class TEDIE(InfoExtractor):
@@ -170,17 +169,41 @@ class TEDIE(InfoExtractor):
                 finfo = self._NATIVE_FORMATS.get(f['format_id'])
                 if finfo:
                     f.update(finfo)
-        else:
-            # Use rtmp downloads
-            formats = [{
-                'format_id': f['name'],
-                'url': talk_info['streamer'],
-                'play_path': f['file'],
-                'ext': 'flv',
-                'width': f['width'],
-                'height': f['height'],
-                'tbr': f['bitrate'],
-            } for f in talk_info['resources']['rtmp']]
+
+        for format_id, resources in talk_info['resources'].items():
+            if format_id == 'h264':
+                for resource in resources:
+                    bitrate = int_or_none(resource.get('bitrate'))
+                    formats.append({
+                        'url': resource['file'],
+                        'format_id': '%s-%sk' % (format_id, bitrate),
+                        'tbr': bitrate,
+                    })
+            elif format_id == 'rtmp':
+                streamer = talk_info.get('streamer')
+                if not streamer:
+                    continue
+                for resource in resources:
+                    formats.append({
+                        'format_id': '%s-%s' % (format_id, resource.get('name')),
+                        'url': streamer,
+                        'play_path': resource['file'],
+                        'ext': 'flv',
+                        'width': int_or_none(resource.get('width')),
+                        'height': int_or_none(resource.get('height')),
+                        'tbr': int_or_none(resource.get('bitrate')),
+                    })
+            elif format_id == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id))
+
+        audio_download = talk_info.get('audioDownload')
+        if audio_download:
+            formats.append({
+                'url': audio_download,
+                'format_id': 'audio',
+            })
+
         self._sort_formats(formats)
 
         video_id = compat_str(talk_info['id'])
index feac666f78baff49f4fb312a147acad67d320bc2..6a006b2d201365eab62c5b090633a1c5e345a48f 100644 (file)
@@ -17,6 +17,7 @@ from ..utils import (
     ExtractorError,
     xpath_with_ns,
     unsmuggle_url,
+    int_or_none,
 )
 
 _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
@@ -28,7 +29,7 @@ class ThePlatformIE(InfoExtractor):
            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
          |theplatform:)(?P<id>[^/\?&]+)'''
 
-    _TEST = {
+    _TESTS = [{
         # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
         'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
         'info_dict': {
@@ -42,7 +43,20 @@ class ThePlatformIE(InfoExtractor):
             # rtmp download
             'skip_download': True,
         },
-    }
+    }, {
+        # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
+        'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
+        'info_dict': {
+            'id': '22d_qsQ6MIRT',
+            'ext': 'flv',
+            'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
+            'title': 'Tesla Model S: A second step towards a cleaner motoring future',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }]
 
     @staticmethod
     def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
@@ -92,7 +106,7 @@ class ThePlatformIE(InfoExtractor):
             error_msg = next(
                 n.attrib['abstract']
                 for n in meta.findall(_x('.//smil:ref'))
-                if n.attrib.get('title') == 'Geographic Restriction')
+                if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
         except StopIteration:
             pass
         else:
@@ -115,7 +129,7 @@ class ThePlatformIE(InfoExtractor):
         head = meta.find(_x('smil:head'))
         body = meta.find(_x('smil:body'))
 
-        f4m_node = body.find(_x('smil:seq//smil:video'))
+        f4m_node = body.find(_x('smil:seq//smil:video')) or body.find(_x('smil:seq/smil:video'))
         if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
             f4m_url = f4m_node.attrib['src']
             if 'manifest.f4m?' not in f4m_url:
@@ -127,13 +141,17 @@ class ThePlatformIE(InfoExtractor):
         else:
             formats = []
             switch = body.find(_x('smil:switch'))
+            if switch is None:
+                switch = body.find(_x('smil:par//smil:switch')) or body.find(_x('smil:par/smil:switch'))
+            if switch is None:
+                switch = body.find(_x('smil:par'))
             if switch is not None:
                 base_url = head.find(_x('smil:meta')).attrib['base']
                 for f in switch.findall(_x('smil:video')):
                     attr = f.attrib
-                    width = int(attr['width'])
-                    height = int(attr['height'])
-                    vbr = int(attr['system-bitrate']) // 1000
+                    width = int_or_none(attr.get('width'))
+                    height = int_or_none(attr.get('height'))
+                    vbr = int_or_none(attr.get('system-bitrate'), 1000)
                     format_id = '%dx%d_%dk' % (width, height, vbr)
                     formats.append({
                         'format_id': format_id,
@@ -145,10 +163,10 @@ class ThePlatformIE(InfoExtractor):
                         'vbr': vbr,
                     })
             else:
-                switch = body.find(_x('smil:seq//smil:switch'))
+                switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch'))
                 for f in switch.findall(_x('smil:video')):
                     attr = f.attrib
-                    vbr = int(attr['system-bitrate']) // 1000
+                    vbr = int_or_none(attr.get('system-bitrate'), 1000)
                     ext = determine_ext(attr['src'])
                     if ext == 'once':
                         ext = 'mp4'
@@ -167,5 +185,5 @@ class ThePlatformIE(InfoExtractor):
             'formats': formats,
             'description': info['description'],
             'thumbnail': info['defaultThumbnailUrl'],
-            'duration': info['duration'] // 1000,
+            'duration': int_or_none(info.get('duration'), 1000),
         }
diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py
new file mode 100644 (file)
index 0000000..d6c0ab1
--- /dev/null
@@ -0,0 +1,86 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+# 22Tracks regularly replace the audio tracks that can be streamed on their
+# site. The tracks usually expire after 1 months, so we can't add tests.
+
+
+class TwentyTwoTracksIE(InfoExtractor):
+    _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
+    IE_NAME = '22tracks:track'
+
+    _API_BASE = 'http://22tracks.com/api'
+
+    def _extract_info(self, city, genre_name, track_id=None):
+        item_id = track_id if track_id else genre_name
+
+        cities = self._download_json(
+            '%s/cities' % self._API_BASE, item_id,
+            'Downloading cities info',
+            'Unable to download cities info')
+        city_id = [x['id'] for x in cities if x['slug'] == city][0]
+
+        genres = self._download_json(
+            '%s/genres/%s' % (self._API_BASE, city_id), item_id,
+            'Downloading %s genres info' % city,
+            'Unable to download %s genres info' % city)
+        genre = [x for x in genres if x['slug'] == genre_name][0]
+        genre_id = genre['id']
+
+        tracks = self._download_json(
+            '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
+            'Downloading %s genre tracks info' % genre_name,
+            'Unable to download track info')
+
+        return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
+
+    def _get_track_url(self, filename, track_id):
+        token = self._download_json(
+            'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
+            track_id, 'Downloading token', 'Unable to download token')
+        return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
+
+    def _extract_track_info(self, track_info, track_id):
+        download_url = self._get_track_url(track_info['filename'], track_id)
+        title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
+        return {
+            'id': track_id,
+            'url': download_url,
+            'ext': 'mp3',
+            'title': title,
+            'duration': int_or_none(track_info.get('duration')),
+            'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        city = mobj.group('city')
+        genre = mobj.group('genre')
+        track_id = mobj.group('id')
+
+        track_info = self._extract_info(city, genre, track_id)
+        return self._extract_track_info(track_info, track_id)
+
+
+class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
+    _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
+    IE_NAME = '22tracks:genre'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        city = mobj.group('city')
+        genre = mobj.group('genre')
+
+        genre_title, tracks = self._extract_info(city, genre)
+
+        entries = [
+            self._extract_track_info(track_info, track_info['id'])
+            for track_info in tracks]
+
+        return self.playlist_result(entries, genre, genre_title)
index aad2bf222269e5c3adf530b7332eed6eaabcee1a..94bd6345da18815a50b72502a8b91ae4e30ae2b5 100644 (file)
@@ -149,7 +149,7 @@ class TwitchItemBaseIE(TwitchBaseIE):
 
 class TwitchVideoIE(TwitchItemBaseIE):
     IE_NAME = 'twitch:video'
-    _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+    _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
     _ITEM_TYPE = 'video'
     _ITEM_SHORTCUT = 'a'
 
@@ -165,7 +165,7 @@ class TwitchVideoIE(TwitchItemBaseIE):
 
 class TwitchChapterIE(TwitchItemBaseIE):
     IE_NAME = 'twitch:chapter'
-    _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+    _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
     _ITEM_TYPE = 'chapter'
     _ITEM_SHORTCUT = 'c'
 
@@ -184,7 +184,7 @@ class TwitchChapterIE(TwitchItemBaseIE):
 
 class TwitchVodIE(TwitchItemBaseIE):
     IE_NAME = 'twitch:vod'
-    _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+    _VALID_URL = r'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
     _ITEM_TYPE = 'vod'
     _ITEM_SHORTCUT = 'v'
 
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py
new file mode 100644 (file)
index 0000000..bba25bb
--- /dev/null
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+from .common import InfoExtractor
+from ..utils import js_to_json
+from ..compat import compat_urlparse
+
+
+class UDNEmbedIE(InfoExtractor):
+    _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://video.udn.com/embed/news/300040',
+        'md5': 'de06b4c90b042c128395a88f0384817e',
+        'info_dict': {
+            'id': '300040',
+            'ext': 'mp4',
+            'title': '生物老師男變女 全校挺"做自己"',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': '//video.udn.com/embed/news/300040',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        page = self._download_webpage(url, video_id)
+
+        options = json.loads(js_to_json(self._html_search_regex(
+            r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
+
+        video_urls = options['video']
+
+        if video_urls.get('youtube'):
+            return self.url_result(video_urls.get('youtube'), 'Youtube')
+
+        try:
+            del video_urls['youtube']
+        except KeyError:
+            pass
+
+        formats = [{
+            'url': self._download_webpage(
+                compat_urlparse.urljoin(url, api_url), video_id,
+                'retrieve url for %s video' % video_type),
+            'format_id': video_type,
+            'preference': 0 if video_type == 'mp4' else -1,
+        } for video_type, api_url in video_urls.items()]
+
+        self._sort_formats(formats)
+
+        thumbnail = None
+
+        if options.get('gallery') and len(options['gallery']):
+            thumbnail = options['gallery'][0].get('original')
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': options['title'],
+            'thumbnail': thumbnail
+        }
index 06554a1befefb938796f106b5ac45c75d942dc14..96c809eaf7155290210e0f8b18d3a2c7c948ba97 100644 (file)
@@ -42,7 +42,6 @@ class UltimediaIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         webpage = self._download_webpage(url, video_id)
 
         deliver_url = self._search_regex(
@@ -81,8 +80,8 @@ class UltimediaIE(InfoExtractor):
         title = clean_html((
             self._html_search_regex(
                 r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
-                webpage, 'title', default=None)
-            or self._search_regex(
+                webpage, 'title', default=None) or
+            self._search_regex(
                 r"var\s+nameVideo\s*=\s*'([^']+)'",
                 deliver_page, 'title')))
 
diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py
new file mode 100644 (file)
index 0000000..9369aba
--- /dev/null
@@ -0,0 +1,45 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class Varzesh3IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
+    _TEST = {
+        'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
+        'md5': '2a933874cb7dce4366075281eb49e855',
+        'info_dict': {
+            'id': '76337',
+            'ext': 'mp4',
+            'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
+            'description': 'فصل ۲۰۱۵-۲۰۱۴',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_url = self._search_regex(
+            r'<source[^>]+src="([^"]+)"', webpage, 'video url')
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(
+            r'(?s)<div class="matn">(.+?)</div>',
+            webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        video_id = self._search_regex(
+            r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
+            webpage, display_id, default=display_id)
+
+        return {
+            'url': video_url,
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py
new file mode 100644 (file)
index 0000000..6215f06
--- /dev/null
@@ -0,0 +1,127 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+)
+
+
+class VesselIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
+    _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
+    _LOGIN_URL = 'https://www.vessel.com/api/account/login'
+    _NETRC_MACHINE = 'vessel'
+    _TEST = {
+        'url': 'https://www.vessel.com/videos/HDN7G5UMs',
+        'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
+        'info_dict': {
+            'id': 'HDN7G5UMs',
+            'ext': 'mp4',
+            'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20150317',
+            'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
+            'timestamp': int,
+        },
+    }
+
+    @staticmethod
+    def make_json_request(url, data):
+        payload = json.dumps(data).encode('utf-8')
+        req = compat_urllib_request.Request(url, payload)
+        req.add_header('Content-Type', 'application/json; charset=utf-8')
+        return req
+
+    @staticmethod
+    def find_assets(data, asset_type):
+        for asset in data.get('assets', []):
+            if asset.get('type') == asset_type:
+                yield asset
+
+    def _check_access_rights(self, data):
+        access_info = data.get('__view', {})
+        if not access_info.get('allow_access', True):
+            err_code = access_info.get('error_code') or ''
+            if err_code == 'ITEM_PAID_ONLY':
+                raise ExtractorError(
+                    'This video requires subscription.', expected=True)
+            else:
+                raise ExtractorError(
+                    'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+        self.report_login()
+        data = {
+            'client_id': 'web',
+            'type': 'password',
+            'user_key': username,
+            'password': password,
+        }
+        login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
+        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+    def _real_initialize(self):
+        self._login()
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        data = self._parse_json(self._search_regex(
+            r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
+        asset_id = data['model']['data']['id']
+
+        req = VesselIE.make_json_request(
+            self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
+        data = self._download_json(req, video_id)
+
+        self._check_access_rights(data)
+
+        try:
+            video_asset = next(VesselIE.find_assets(data, 'video'))
+        except StopIteration:
+            raise ExtractorError('No video assets found')
+
+        formats = []
+        for f in video_asset.get('sources', []):
+            if f['name'] == 'hls-index':
+                formats.extend(self._extract_m3u8_formats(
+                    f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
+            else:
+                formats.append({
+                    'format_id': f['name'],
+                    'tbr': f.get('bitrate'),
+                    'height': f.get('height'),
+                    'width': f.get('width'),
+                    'url': f['location'],
+                })
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for im_asset in VesselIE.find_assets(data, 'image'):
+            thumbnails.append({
+                'url': im_asset['location'],
+                'width': im_asset.get('width', 0),
+                'height': im_asset.get('height', 0),
+            })
+
+        return {
+            'id': video_id,
+            'title': data['title'],
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'description': data.get('short_description'),
+            'duration': data.get('duration'),
+            'comment_count': data.get('comment_count'),
+            'like_count': data.get('like_count'),
+            'view_count': data.get('view_count'),
+            'timestamp': parse_iso8601(data.get('released_at')),
+        }
index bd09652cd96340155cc084f6814df4fbecd6f707..28bcc89cd7423dafa40032076d1bd3ad12f4bdcf 100644 (file)
@@ -244,6 +244,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
         # and latter we extract those that are Vimeo specific.
         self.report_extraction(video_id)
 
+        vimeo_config = self._search_regex(
+            r'vimeo\.config\s*=\s*({.+?});', webpage,
+            'vimeo config', default=None)
+        if vimeo_config:
+            seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
+            if seed_status.get('state') == 'failed':
+                raise ExtractorError(
+                    '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+                    expected=True)
+
         # Extract the config JSON
         try:
             try:
index c3187cfeb7c7f04dd040ca4cb3e89f0653ccbd03..d4f5a991e8d69f8c7d233cb90de987b962c43326 100644 (file)
@@ -9,8 +9,8 @@ from ..utils import unified_strdate
 
 
 class VineIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
+    _TESTS = [{
         'url': 'https://vine.co/v/b9KOOWX7HUx',
         'md5': '2f36fed6235b16da96ce9b4dc890940d',
         'info_dict': {
@@ -23,21 +23,53 @@ class VineIE(InfoExtractor):
             'uploader': 'Jack Dorsey',
             'uploader_id': '76',
         },
-    }
+    }, {
+        'url': 'https://vine.co/v/MYxVapFvz2z',
+        'md5': '7b9a7cbc76734424ff942eb52c8f1065',
+        'info_dict': {
+            'id': 'MYxVapFvz2z',
+            'ext': 'mp4',
+            'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+            'alt_title': 'Vine by Luna',
+            'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+            'upload_date': '20140815',
+            'uploader': 'Luna',
+            'uploader_id': '1102363502380728320',
+        },
+    }, {
+        'url': 'https://vine.co/v/bxVjBbZlPUH',
+        'md5': 'ea27decea3fa670625aac92771a96b73',
+        'info_dict': {
+            'id': 'bxVjBbZlPUH',
+            'ext': 'mp4',
+            'title': '#mw3 #ac130 #killcam #angelofdeath',
+            'alt_title': 'Vine by Z3k3',
+            'description': '#mw3 #ac130 #killcam #angelofdeath',
+            'upload_date': '20130430',
+            'uploader': 'Z3k3',
+            'uploader_id': '936470460173008896',
+        },
+    }, {
+        'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
 
-        data = json.loads(self._html_search_regex(
-            r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
+        data = self._parse_json(
+            self._html_search_regex(
+                r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id,
+                webpage, 'vine data'),
+            video_id)
 
         formats = [{
             'format_id': '%(format)s-%(rate)s' % f,
             'vcodec': f['format'],
             'quality': f['rate'],
             'url': f['videoUrl'],
-        } for f in data['videoUrls'] if f.get('rate')]
+        } for f in data['videoUrls']]
 
         self._sort_formats(formats)
 
index 4971965f9d090cce61a2e8b6d1486fadc873b4dc..81d885fdcee1cf788c217e862629df58f386d73c 100644 (file)
@@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    @staticmethod
+    def base64_decode_utf8(data):
+        return base64.b64decode(data.encode('utf-8')).decode('utf-8')
+
+    @staticmethod
+    def base64_encode_utf8(data):
+        return base64.b64encode(data.encode('utf-8')).decode('utf-8')
+
     def _extract_flv_config(self, media_id):
-        base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+        base64_media_id = self.base64_encode_utf8(media_id)
         flv_config = self._download_xml(
             'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
             'flv config')
         prop_dict = {}
         for prop in flv_config.findall('./property'):
-            prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+            prop_id = self.base64_decode_utf8(prop.attrib['id'])
             # CDATA may be empty in flv config
             if not prop.text:
                 continue
-            encoded_content = base64.b64decode(prop.text).decode('utf-8')
+            encoded_content = self.base64_decode_utf8(prop.text)
             prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
         return prop_dict
 
index 97dbac4cce53d7fe956b074fddbe40993fd5681f..b777159c5639304edf1433857f626c29299e4bcb 100644 (file)
@@ -17,6 +17,8 @@ from ..utils import (
     int_or_none,
 )
 
+from .nbc import NBCSportsVPlayerIE
+
 
 class YahooIE(InfoExtractor):
     IE_DESC = 'Yahoo screen and movies'
@@ -129,6 +131,15 @@ class YahooIE(InfoExtractor):
         }, {
             'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
             'only_matching': True,
+        }, {
+            'note': 'NBC Sports embeds',
+            'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+            'info_dict': {
+                'id': '9CsDKds0kvHI',
+                'ext': 'flv',
+                'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+                'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+            }
         }
     ]
 
@@ -151,6 +162,10 @@ class YahooIE(InfoExtractor):
                 items = json.loads(items_json)
                 video_id = items[0]['id']
                 return self._get_info(video_id, display_id, webpage)
+        # Look for NBCSports iframes
+        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+        if nbc_sports_url:
+            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
 
         items_json = self._search_regex(
             r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
index e4c855ee0e3616981c77d41b441f15adf6ce0453..6abe72f739b63d8b39d8cdfc5bfccf70dc545715 100644 (file)
@@ -52,7 +52,7 @@ class YouPornIE(InfoExtractor):
             webpage, 'JSON parameters')
         try:
             params = json.loads(json_params)
-        except:
+        except ValueError:
             raise ExtractorError('Invalid JSON')
 
         self.report_extraction(video_id)
index 27c8c4453773974f52d02ecba58f092221c6e118..2774ec30b26d408817a8f4328747a99e791ec8f5 100644 (file)
@@ -495,7 +495,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': '孫艾倫',
                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
             },
-        }
+        },
+        # url_encoded_fmt_stream_map is empty string
+        {
+            'url': 'qEJwOuvDf7I',
+            'info_dict': {
+                'id': 'qEJwOuvDf7I',
+                'ext': 'mp4',
+                'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+                'description': '',
+                'upload_date': '20150404',
+                'uploader_id': 'spbelect',
+                'uploader': 'Наблюдатели Петербурга',
+            },
+            'params': {
+                'skip_download': 'requires avconv',
+            }
+        },
     ]
 
     def __init__(self, *args, **kwargs):
@@ -772,33 +788,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             errnote='Could not download DASH manifest')
 
         formats = []
-        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
-            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
-            if url_el is None:
-                continue
-            format_id = r.attrib['id']
-            video_url = url_el.text
-            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
-            f = {
-                'format_id': format_id,
-                'url': video_url,
-                'width': int_or_none(r.attrib.get('width')),
-                'height': int_or_none(r.attrib.get('height')),
-                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
-                'asr': int_or_none(r.attrib.get('audioSamplingRate')),
-                'filesize': filesize,
-                'fps': int_or_none(r.attrib.get('frameRate')),
-            }
-            try:
-                existing_format = next(
-                    fo for fo in formats
-                    if fo['format_id'] == format_id)
-            except StopIteration:
-                full_info = self._formats.get(format_id, {}).copy()
-                full_info.update(f)
-                formats.append(full_info)
-            else:
-                existing_format.update(f)
+        for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
+            mime_type = a.attrib.get('mimeType')
+            for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+                url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+                if url_el is None:
+                    continue
+                if mime_type == 'text/vtt':
+                    # TODO implement WebVTT downloading
+                    pass
+                elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+                    format_id = r.attrib['id']
+                    video_url = url_el.text
+                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+                    f = {
+                        'format_id': format_id,
+                        'url': video_url,
+                        'width': int_or_none(r.attrib.get('width')),
+                        'height': int_or_none(r.attrib.get('height')),
+                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+                        'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+                        'filesize': filesize,
+                        'fps': int_or_none(r.attrib.get('frameRate')),
+                    }
+                    try:
+                        existing_format = next(
+                            fo for fo in formats
+                            if fo['format_id'] == format_id)
+                    except StopIteration:
+                        full_info = self._formats.get(format_id, {}).copy()
+                        full_info.update(f)
+                        formats.append(full_info)
+                    else:
+                        existing_format.update(f)
+                else:
+                    self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats
 
     def _real_extract(self, url):
@@ -855,7 +879,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 args = ytplayer_config['args']
                 # Convert to the same format returned by compat_parse_qs
                 video_info = dict((k, [v]) for k, v in args.items())
-                if 'url_encoded_fmt_stream_map' not in args:
+                if not args.get('url_encoded_fmt_stream_map'):
                     raise ValueError('No stream_map present')  # caught below
             except ValueError:
                 # We fallback to the get_video_info pages (used by the embed page)
@@ -1263,27 +1287,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
 
         return self.playlist_result(url_results, playlist_id, title)
 
-    def _real_extract(self, url):
-        # Extract playlist id
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
-        playlist_id = mobj.group(1) or mobj.group(2)
-
-        # Check if it's a video-specific URL
-        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-        if 'v' in query_dict:
-            video_id = query_dict['v'][0]
-            if self._downloader.params.get('noplaylist'):
-                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
-                return self.url_result(video_id, 'Youtube', video_id=video_id)
-            else:
-                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
-
-        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
-            # Mixes require a custom extraction process
-            return self._extract_mix(playlist_id)
-
+    def _extract_playlist(self, playlist_id):
         url = self._TEMPLATE_URL % playlist_id
         page = self._download_webpage(url, playlist_id)
         more_widget_html = content_html = page
@@ -1327,6 +1331,29 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         url_results = self._ids_to_results(ids)
         return self.playlist_result(url_results, playlist_id, playlist_title)
 
+    def _real_extract(self, url):
+        # Extract playlist id
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError('Invalid URL: %s' % url)
+        playlist_id = mobj.group(1) or mobj.group(2)
+
+        # Check if it's a video-specific URL
+        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        if 'v' in query_dict:
+            video_id = query_dict['v'][0]
+            if self._downloader.params.get('noplaylist'):
+                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+                return self.url_result(video_id, 'Youtube', video_id=video_id)
+            else:
+                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+            # Mixes require a custom extraction process
+            return self._extract_mix(playlist_id)
+
+        return self._extract_playlist(playlist_id)
+
 
 class YoutubeChannelIE(InfoExtractor):
     IE_DESC = 'YouTube.com channels'
@@ -1643,21 +1670,26 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
 
 
 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+    IE_NAME = 'youtube:recommended'
     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
     _FEED_NAME = 'recommended'
     _PLAYLIST_TITLE = 'Youtube Recommended videos'
 
 
-class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+    IE_NAME = 'youtube:watchlater'
     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
-    _FEED_NAME = 'watch_later'
-    _PLAYLIST_TITLE = 'Youtube Watch Later'
-    _PERSONAL_FEED = True
+    _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
+
+    _TESTS = []  # override PlaylistIE tests
+
+    def _real_extract(self, url):
+        return self._extract_playlist('WL')
 
 
 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+    IE_NAME = 'youtube:history'
     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
     _FEED_NAME = 'history'
index 4e6e47d6fdc430f4071c1470f25a1b7377ad7845..5720fb424ff44ac3e659a797ee2bef8a5a1529b1 100644 (file)
@@ -13,6 +13,7 @@ from .compat import (
     compat_kwargs,
 )
 from .utils import (
+    preferredencoding,
     write_string,
 )
 from .version import __version__
@@ -120,19 +121,19 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '-h', '--help',
         action='help',
-        help='print this help text and exit')
+        help='Print this help text and exit')
     general.add_option(
         '-v', '--version',
         action='version',
-        help='print program version and exit')
+        help='Print program version and exit')
     general.add_option(
         '-U', '--update',
         action='store_true', dest='update_self',
-        help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+        help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
     general.add_option(
         '-i', '--ignore-errors',
         action='store_true', dest='ignoreerrors', default=False,
-        help='continue on download errors, for example to skip unavailable videos in a playlist')
+        help='Continue on download errors, for example to skip unavailable videos in a playlist')
     general.add_option(
         '--abort-on-error',
         action='store_false', dest='ignoreerrors',
@@ -140,7 +141,7 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '--dump-user-agent',
         action='store_true', dest='dump_user_agent', default=False,
-        help='display the current browser identification')
+        help='Display the current browser identification')
     general.add_option(
         '--list-extractors',
         action='store_true', dest='list_extractors', default=False,
@@ -152,7 +153,7 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '--default-search',
         dest='default_search', metavar='PREFIX',
-        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
     general.add_option(
         '--ignore-config',
         action='store_true',
@@ -169,7 +170,7 @@ def parseOpts(overrideArguments=None):
         '--no-color', '--no-colors',
         action='store_true', dest='no_color',
         default=False,
-        help='Do not emit color codes in output.')
+        help='Do not emit color codes in output')
 
     network = optparse.OptionGroup(parser, 'Network Options')
     network.add_option(
@@ -206,23 +207,23 @@ def parseOpts(overrideArguments=None):
     selection.add_option(
         '--playlist-start',
         dest='playliststart', metavar='NUMBER', default=1, type=int,
-        help='playlist video to start at (default is %default)')
+        help='Playlist video to start at (default is %default)')
     selection.add_option(
         '--playlist-end',
         dest='playlistend', metavar='NUMBER', default=None, type=int,
-        help='playlist video to end at (default is last)')
+        help='Playlist video to end at (default is last)')
     selection.add_option(
         '--playlist-items',
         dest='playlist_items', metavar='ITEM_SPEC', default=None,
-        help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+        help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
     selection.add_option(
         '--match-title',
         dest='matchtitle', metavar='REGEX',
-        help='download only matching titles (regex or caseless sub-string)')
+        help='Download only matching titles (regex or caseless sub-string)')
     selection.add_option(
         '--reject-title',
         dest='rejecttitle', metavar='REGEX',
-        help='skip download for matching titles (regex or caseless sub-string)')
+        help='Skip download for matching titles (regex or caseless sub-string)')
     selection.add_option(
         '--max-downloads',
         dest='max_downloads', metavar='NUMBER', type=int, default=None,
@@ -238,19 +239,19 @@ def parseOpts(overrideArguments=None):
     selection.add_option(
         '--date',
         metavar='DATE', dest='date', default=None,
-        help='download only videos uploaded in this date')
+        help='Download only videos uploaded in this date')
     selection.add_option(
         '--datebefore',
         metavar='DATE', dest='datebefore', default=None,
-        help='download only videos uploaded on or before this date (i.e. inclusive)')
+        help='Download only videos uploaded on or before this date (i.e. inclusive)')
     selection.add_option(
         '--dateafter',
         metavar='DATE', dest='dateafter', default=None,
-        help='download only videos uploaded on or after this date (i.e. inclusive)')
+        help='Download only videos uploaded on or after this date (i.e. inclusive)')
     selection.add_option(
         '--min-views',
         metavar='COUNT', dest='min_views', default=None, type=int,
-        help='Do not download any videos with less than COUNT views',)
+        help='Do not download any videos with less than COUNT views')
     selection.add_option(
         '--max-views',
         metavar='COUNT', dest='max_views', default=None, type=int,
@@ -259,7 +260,7 @@ def parseOpts(overrideArguments=None):
         '--match-filter',
         metavar='FILTER', dest='match_filter', default=None,
         help=(
-            '(Experimental) Generic video filter. '
+            'Generic video filter (experimental). '
             'Specify any key (see help for -o for a list of available keys) to'
             ' match if the key is present, '
             '!key to check if the key is not present,'
@@ -277,15 +278,15 @@ def parseOpts(overrideArguments=None):
     selection.add_option(
         '--no-playlist',
         action='store_true', dest='noplaylist', default=False,
-        help='If the URL refers to a video and a playlist, download only the video.')
+        help='Download only the video, if the URL refers to a video and a playlist.')
     selection.add_option(
         '--yes-playlist',
         action='store_false', dest='noplaylist', default=False,
-        help='If the URL refers to a video and a playlist, download the playlist.')
+        help='Download the playlist, if the URL refers to a video and a playlist.')
     selection.add_option(
         '--age-limit',
         metavar='YEARS', dest='age_limit', default=None, type=int,
-        help='download only videos suitable for the given age')
+        help='Download only videos suitable for the given age')
     selection.add_option(
         '--download-archive', metavar='FILE',
         dest='download_archive',
@@ -299,30 +300,30 @@ def parseOpts(overrideArguments=None):
     authentication.add_option(
         '-u', '--username',
         dest='username', metavar='USERNAME',
-        help='login with this account ID')
+        help='Login with this account ID')
     authentication.add_option(
         '-p', '--password',
         dest='password', metavar='PASSWORD',
-        help='account password. If this option is left out, youtube-dl will ask interactively.')
+        help='Account password. If this option is left out, youtube-dl will ask interactively.')
     authentication.add_option(
         '-2', '--twofactor',
         dest='twofactor', metavar='TWOFACTOR',
-        help='two-factor auth code')
+        help='Two-factor auth code')
     authentication.add_option(
         '-n', '--netrc',
         action='store_true', dest='usenetrc', default=False,
-        help='use .netrc authentication data')
+        help='Use .netrc authentication data')
     authentication.add_option(
         '--video-password',
         dest='videopassword', metavar='PASSWORD',
-        help='video password (vimeo, smotri)')
+        help='Video password (vimeo, smotri)')
 
     video_format = optparse.OptionGroup(parser, 'Video Format Options')
     video_format.add_option(
         '-f', '--format',
         action='store', dest='format', metavar='FORMAT', default=None,
         help=(
-            'video format code, specify the order of preference using'
+            'Video format code, specify the order of preference using'
             ' slashes, as in -f 22/17/18 . '
             ' Instead of format codes, you can select by extension for the '
             'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
@@ -350,19 +351,19 @@ def parseOpts(overrideArguments=None):
     video_format.add_option(
         '--all-formats',
         action='store_const', dest='format', const='all',
-        help='download all available video formats')
+        help='Download all available video formats')
     video_format.add_option(
         '--prefer-free-formats',
         action='store_true', dest='prefer_free_formats', default=False,
-        help='prefer free video formats unless a specific one is requested')
+        help='Prefer free video formats unless a specific one is requested')
     video_format.add_option(
         '--max-quality',
         action='store', dest='format_limit', metavar='FORMAT',
-        help='highest quality format to download')
+        help='Highest quality format to download')
     video_format.add_option(
         '-F', '--list-formats',
         action='store_true', dest='listformats',
-        help='list all available formats')
+        help='List all available formats')
     video_format.add_option(
         '--youtube-include-dash-manifest',
         action='store_true', dest='youtube_include_dash_manifest', default=True,
@@ -382,46 +383,46 @@ def parseOpts(overrideArguments=None):
     subtitles.add_option(
         '--write-sub', '--write-srt',
         action='store_true', dest='writesubtitles', default=False,
-        help='write subtitle file')
+        help='Write subtitle file')
     subtitles.add_option(
         '--write-auto-sub', '--write-automatic-sub',
         action='store_true', dest='writeautomaticsub', default=False,
-        help='write automatic subtitle file (youtube only)')
+        help='Write automatic subtitle file (YouTube only)')
     subtitles.add_option(
         '--all-subs',
         action='store_true', dest='allsubtitles', default=False,
-        help='downloads all the available subtitles of the video')
+        help='Download all the available subtitles of the video')
     subtitles.add_option(
         '--list-subs',
         action='store_true', dest='listsubtitles', default=False,
-        help='lists all available subtitles for the video')
+        help='List all available subtitles for the video')
     subtitles.add_option(
         '--sub-format',
         action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
-        help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
+        help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
     subtitles.add_option(
         '--sub-lang', '--sub-langs', '--srt-lang',
         action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
         default=[], callback=_comma_separated_values_options_callback,
-        help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
+        help='Languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
 
     downloader = optparse.OptionGroup(parser, 'Download Options')
     downloader.add_option(
         '-r', '--rate-limit',
         dest='ratelimit', metavar='LIMIT',
-        help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+        help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
     downloader.add_option(
         '-R', '--retries',
         dest='retries', metavar='RETRIES', default=10,
-        help='number of retries (default is %default), or "infinite".')
+        help='Number of retries (default is %default), or "infinite".')
     downloader.add_option(
         '--buffer-size',
         dest='buffersize', metavar='SIZE', default='1024',
-        help='size of download buffer (e.g. 1024 or 16K) (default is %default)')
+        help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
     downloader.add_option(
         '--no-resize-buffer',
         action='store_true', dest='noresizebuffer', default=False,
-        help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+        help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
     downloader.add_option(
         '--test',
         action='store_true', dest='test', default=False,
@@ -433,11 +434,11 @@ def parseOpts(overrideArguments=None):
     downloader.add_option(
         '--xattr-set-filesize',
         dest='xattr_set_filesize', action='store_true',
-        help='(experimental) set file xattribute ytdl.filesize with expected filesize')
+        help='Set file xattribute ytdl.filesize with expected filesize (experimental)')
     downloader.add_option(
         '--hls-prefer-native',
         dest='hls_prefer_native', action='store_true',
-        help='(experimental) Use the native HLS downloader instead of ffmpeg.')
+        help='Use the native HLS downloader instead of ffmpeg (experimental)')
     downloader.add_option(
         '--external-downloader',
         dest='external_downloader', metavar='COMMAND',
@@ -446,7 +447,7 @@ def parseOpts(overrideArguments=None):
     downloader.add_option(
         '--external-downloader-args',
         dest='external_downloader_args', metavar='ARGS',
-        help='Give these arguments to the external downloader.')
+        help='Give these arguments to the external downloader')
 
     workarounds = optparse.OptionGroup(parser, 'Workarounds')
     workarounds.add_option(
@@ -456,7 +457,7 @@ def parseOpts(overrideArguments=None):
     workarounds.add_option(
         '--no-check-certificate',
         action='store_true', dest='no_check_certificate', default=False,
-        help='Suppress HTTPS certificate validation.')
+        help='Suppress HTTPS certificate validation')
     workarounds.add_option(
         '--prefer-insecure',
         '--prefer-unsecure', action='store_true', dest='prefer_insecure',
@@ -464,16 +465,16 @@ def parseOpts(overrideArguments=None):
     workarounds.add_option(
         '--user-agent',
         metavar='UA', dest='user_agent',
-        help='specify a custom user agent')
+        help='Specify a custom user agent')
     workarounds.add_option(
         '--referer',
         metavar='URL', dest='referer', default=None,
-        help='specify a custom referer, use if the video access is restricted to one domain',
+        help='Specify a custom referer, use if the video access is restricted to one domain',
     )
     workarounds.add_option(
         '--add-header',
         metavar='FIELD:VALUE', dest='headers', action='append',
-        help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+        help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
     )
     workarounds.add_option(
         '--bidi-workaround',
@@ -488,7 +489,7 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option(
         '-q', '--quiet',
         action='store_true', dest='quiet', default=False,
-        help='activates quiet mode')
+        help='Activate quiet mode')
     verbosity.add_option(
         '--no-warnings',
         dest='no_warnings', action='store_true', default=False,
@@ -496,51 +497,51 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option(
         '-s', '--simulate',
         action='store_true', dest='simulate', default=False,
-        help='do not download the video and do not write anything to disk',)
+        help='Do not download the video and do not write anything to disk')
     verbosity.add_option(
         '--skip-download',
         action='store_true', dest='skip_download', default=False,
-        help='do not download the video',)
+        help='Do not download the video')
     verbosity.add_option(
         '-g', '--get-url',
         action='store_true', dest='geturl', default=False,
-        help='simulate, quiet but print URL')
+        help='Simulate, quiet but print URL')
     verbosity.add_option(
         '-e', '--get-title',
         action='store_true', dest='gettitle', default=False,
-        help='simulate, quiet but print title')
+        help='Simulate, quiet but print title')
     verbosity.add_option(
         '--get-id',
         action='store_true', dest='getid', default=False,
-        help='simulate, quiet but print id')
+        help='Simulate, quiet but print id')
     verbosity.add_option(
         '--get-thumbnail',
         action='store_true', dest='getthumbnail', default=False,
-        help='simulate, quiet but print thumbnail URL')
+        help='Simulate, quiet but print thumbnail URL')
     verbosity.add_option(
         '--get-description',
         action='store_true', dest='getdescription', default=False,
-        help='simulate, quiet but print video description')
+        help='Simulate, quiet but print video description')
     verbosity.add_option(
         '--get-duration',
         action='store_true', dest='getduration', default=False,
-        help='simulate, quiet but print video length')
+        help='Simulate, quiet but print video length')
     verbosity.add_option(
         '--get-filename',
         action='store_true', dest='getfilename', default=False,
-        help='simulate, quiet but print output filename')
+        help='Simulate, quiet but print output filename')
     verbosity.add_option(
         '--get-format',
         action='store_true', dest='getformat', default=False,
-        help='simulate, quiet but print output format')
+        help='Simulate, quiet but print output format')
     verbosity.add_option(
         '-j', '--dump-json',
         action='store_true', dest='dumpjson', default=False,
-        help='simulate, quiet but print JSON information. See --output for a description of available keys.')
+        help='Simulate, quiet but print JSON information. See --output for a description of available keys.')
     verbosity.add_option(
         '-J', '--dump-single-json',
         action='store_true', dest='dump_single_json', default=False,
-        help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+        help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
     verbosity.add_option(
         '--print-json',
         action='store_true', dest='print_json', default=False,
@@ -549,23 +550,23 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option(
         '--newline',
         action='store_true', dest='progress_with_newline', default=False,
-        help='output progress bar as new lines')
+        help='Output progress bar as new lines')
     verbosity.add_option(
         '--no-progress',
         action='store_true', dest='noprogress', default=False,
-        help='do not print progress bar')
+        help='Do not print progress bar')
     verbosity.add_option(
         '--console-title',
         action='store_true', dest='consoletitle', default=False,
-        help='display progress in console titlebar')
+        help='Display progress in console titlebar')
     verbosity.add_option(
         '-v', '--verbose',
         action='store_true', dest='verbose', default=False,
-        help='print various debugging information')
+        help='Print various debugging information')
     verbosity.add_option(
         '--dump-pages', '--dump-intermediate-pages',
         action='store_true', dest='dump_intermediate_pages', default=False,
-        help='print downloaded pages to debug problems (very verbose)')
+        help='Print downloaded pages to debug problems (very verbose)')
     verbosity.add_option(
         '--write-pages',
         action='store_true', dest='write_pages', default=False,
@@ -581,29 +582,29 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option(
         '-C', '--call-home',
         dest='call_home', action='store_true', default=False,
-        help='Contact the youtube-dl server for debugging.')
+        help='Contact the youtube-dl server for debugging')
     verbosity.add_option(
         '--no-call-home',
         dest='call_home', action='store_false', default=False,
-        help='Do NOT contact the youtube-dl server for debugging.')
+        help='Do NOT contact the youtube-dl server for debugging')
 
     filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
     filesystem.add_option(
         '-a', '--batch-file',
         dest='batchfile', metavar='FILE',
-        help='file containing URLs to download (\'-\' for stdin)')
+        help='File containing URLs to download (\'-\' for stdin)')
     filesystem.add_option(
         '--id', default=False,
-        action='store_true', dest='useid', help='use only video ID in file name')
+        action='store_true', dest='useid', help='Use only video ID in file name')
     filesystem.add_option(
         '-o', '--output',
         dest='outtmpl', metavar='TEMPLATE',
-        help=('output filename template. Use %(title)s to get the title, '
+        help=('Output filename template. Use %(title)s to get the title, '
               '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
               '%(autonumber)s to get an automatically incremented number, '
               '%(ext)s for the filename extension, '
               '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
-              '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
+              '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), '
               '%(upload_date)s for the upload date (YYYYMMDD), '
               '%(extractor)s for the provider (youtube, metacafe, etc), '
               '%(id)s for the video id, '
@@ -617,7 +618,7 @@ def parseOpts(overrideArguments=None):
     filesystem.add_option(
         '--autonumber-size',
         dest='autonumber_size', metavar='NUMBER',
-        help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
+        help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
     filesystem.add_option(
         '--restrict-filenames',
         action='store_true', dest='restrictfilenames', default=False,
@@ -625,55 +626,55 @@ def parseOpts(overrideArguments=None):
     filesystem.add_option(
         '-A', '--auto-number',
         action='store_true', dest='autonumber', default=False,
-        help='[deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000')
+        help='[deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000')
     filesystem.add_option(
         '-t', '--title',
         action='store_true', dest='usetitle', default=False,
-        help='[deprecated] use title in file name (default)')
+        help='[deprecated] Use title in file name (default)')
     filesystem.add_option(
         '-l', '--literal', default=False,
         action='store_true', dest='usetitle',
-        help='[deprecated] alias of --title')
+        help='[deprecated] Alias of --title')
     filesystem.add_option(
         '-w', '--no-overwrites',
         action='store_true', dest='nooverwrites', default=False,
-        help='do not overwrite files')
+        help='Do not overwrite files')
     filesystem.add_option(
         '-c', '--continue',
         action='store_true', dest='continue_dl', default=True,
-        help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
+        help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
     filesystem.add_option(
         '--no-continue',
         action='store_false', dest='continue_dl',
-        help='do not resume partially downloaded files (restart from beginning)')
+        help='Do not resume partially downloaded files (restart from beginning)')
     filesystem.add_option(
         '--no-part',
         action='store_true', dest='nopart', default=False,
-        help='do not use .part files - write directly into output file')
+        help='Do not use .part files - write directly into output file')
     filesystem.add_option(
         '--no-mtime',
         action='store_false', dest='updatetime', default=True,
-        help='do not use the Last-modified header to set the file modification time')
+        help='Do not use the Last-modified header to set the file modification time')
     filesystem.add_option(
         '--write-description',
         action='store_true', dest='writedescription', default=False,
-        help='write video description to a .description file')
+        help='Write video description to a .description file')
     filesystem.add_option(
         '--write-info-json',
         action='store_true', dest='writeinfojson', default=False,
-        help='write video metadata to a .info.json file')
+        help='Write video metadata to a .info.json file')
     filesystem.add_option(
         '--write-annotations',
         action='store_true', dest='writeannotations', default=False,
-        help='write video annotations to a .annotation file')
+        help='Write video annotations to a .annotation file')
     filesystem.add_option(
         '--load-info',
         dest='load_info_filename', metavar='FILE',
-        help='json file containing the video information (created with the "--write-json" option)')
+        help='JSON file containing the video information (created with the "--write-info-json" option)')
     filesystem.add_option(
         '--cookies',
         dest='cookiefile', metavar='FILE',
-        help='file to read cookies from and dump cookie jar in')
+        help='File to read cookies from and dump cookie jar in')
     filesystem.add_option(
         '--cache-dir', dest='cachedir', default=None, metavar='DIR',
         help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
@@ -689,11 +690,11 @@ def parseOpts(overrideArguments=None):
     thumbnail.add_option(
         '--write-thumbnail',
         action='store_true', dest='writethumbnail', default=False,
-        help='write thumbnail image to disk')
+        help='Write thumbnail image to disk')
     thumbnail.add_option(
         '--write-all-thumbnails',
         action='store_true', dest='write_all_thumbnails', default=False,
-        help='write all thumbnail image formats to disk')
+        help='Write all thumbnail image formats to disk')
     thumbnail.add_option(
         '--list-thumbnails',
         action='store_true', dest='list_thumbnails', default=False,
@@ -703,14 +704,14 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '-x', '--extract-audio',
         action='store_true', dest='extractaudio', default=False,
-        help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+        help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
     postproc.add_option(
         '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
-        help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
+        help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
     postproc.add_option(
         '--audio-quality', metavar='QUALITY',
         dest='audioquality', default='5',
-        help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+        help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
     postproc.add_option(
         '--recode-video',
         metavar='FORMAT', dest='recodevideo', default=None,
@@ -718,27 +719,27 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '-k', '--keep-video',
         action='store_true', dest='keepvideo', default=False,
-        help='keeps the video file on disk after the post-processing; the video is erased by default')
+        help='Keep the video file on disk after the post-processing; the video is erased by default')
     postproc.add_option(
         '--no-post-overwrites',
         action='store_true', dest='nopostoverwrites', default=False,
-        help='do not overwrite post-processed files; the post-processed files are overwritten by default')
+        help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
     postproc.add_option(
         '--embed-subs',
         action='store_true', dest='embedsubtitles', default=False,
-        help='embed subtitles in the video (only for mp4 videos)')
+        help='Embed subtitles in the video (only for mp4 videos)')
     postproc.add_option(
         '--embed-thumbnail',
         action='store_true', dest='embedthumbnail', default=False,
-        help='embed thumbnail in the audio as cover art')
+        help='Embed thumbnail in the audio as cover art')
     postproc.add_option(
         '--add-metadata',
         action='store_true', dest='addmetadata', default=False,
-        help='write metadata to the video file')
+        help='Write metadata to the video file')
     postproc.add_option(
         '--metadata-from-title',
         metavar='FORMAT', dest='metafromtitle',
-        help='parse additional metadata like song title / artist from the video title. '
+        help='Parse additional metadata like song title / artist from the video title. '
              'The format syntax is the same as --output, '
              'the parsed parameters replace existing values. '
              'Additional templates: %(album), %(artist). '
@@ -747,7 +748,7 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '--xattrs',
         action='store_true', dest='xattrs', default=False,
-        help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+        help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
     postproc.add_option(
         '--fixup',
         metavar='POLICY', dest='fixup', default='detect_or_warn',
@@ -794,6 +795,11 @@ def parseOpts(overrideArguments=None):
             write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
     else:
         command_line_conf = sys.argv[1:]
+        # Workaround for Python 2.x, where argv is a byte list
+        if sys.version_info < (3,):
+            command_line_conf = [
+                a.decode(preferredencoding(), 'replace') for a in command_line_conf]
+
         if '--ignore-config' in command_line_conf:
             system_conf = []
             user_conf = []
index e54ae678da17bef5c5848bc7165d42d5eec912a4..ef9fdfa19a5562b6b85840ce129b632967b30326 100644 (file)
@@ -1,6 +1,11 @@
 from __future__ import unicode_literals
 
-from ..utils import PostProcessingError
+import os
+
+from ..utils import (
+    PostProcessingError,
+    encodeFilename,
+)
 
 
 class PostProcessor(object):
@@ -46,6 +51,12 @@ class PostProcessor(object):
         """
         return None, information  # by default, keep file and do nothing
 
+    def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
+        try:
+            os.utime(encodeFilename(path), (atime, mtime))
+        except Exception:
+            self._downloader.report_warning(errnote)
+
 
 class AudioConversionError(PostProcessingError):
     pass
index b6f51cfd5e1ed5cebb4981dfbf3152e67ed33d1d..8e99a3c2c461d300dbf077236907e0f80bd16e9b 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import io
 import os
 import subprocess
-import sys
 import time
 
 
@@ -117,6 +116,10 @@ class FFmpegPostProcessor(PostProcessor):
     def executable(self):
         return self._paths[self.basename]
 
+    @property
+    def probe_available(self):
+        return self.probe_basename is not None
+
     @property
     def probe_executable(self):
         return self._paths[self.probe_basename]
@@ -143,7 +146,8 @@ class FFmpegPostProcessor(PostProcessor):
             stderr = stderr.decode('utf-8', 'replace')
             msg = stderr.strip().split('\n')[-1]
             raise FFmpegPostProcessorError(msg)
-        os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime))
+        self.try_utime(out_path, oldest_mtime, oldest_mtime)
+
         if self._deletetempfiles:
             for ipath in input_paths:
                 os.remove(ipath)
@@ -169,7 +173,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 
     def get_audio_codec(self, path):
 
-        if not self.probe_executable:
+        if not self.probe_available:
             raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
         try:
             cmd = [
@@ -269,20 +273,17 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
             else:
                 self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
                 self.run_ffmpeg(path, new_path, acodec, more_opts)
-        except:
-            etype, e, tb = sys.exc_info()
-            if isinstance(e, AudioConversionError):
-                msg = 'audio conversion failed: ' + e.msg
-            else:
-                msg = 'error running ' + self.basename
-            raise PostProcessingError(msg)
+        except AudioConversionError as e:
+            raise PostProcessingError(
+                'audio conversion failed: ' + e.msg)
+        except Exception:
+            raise PostProcessingError('error running ' + self.basename)
 
         # Try to update the date time for extracted audio file.
         if information.get('filetime') is not None:
-            try:
-                os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
-            except:
-                self._downloader.report_warning('Cannot update utime of audio file')
+            self.try_utime(
+                new_path, time.time(), information['filetime'],
+                errnote='Cannot update utime of audio file')
 
         information['filepath'] = new_path
         return self._nopostoverwrites, information
index d8be4049f5dce0fdd9a61f2aff3c4284d494e598..de3169eef1d6ec29d82a60b2f4b6a68f49d7dd4e 100644 (file)
@@ -65,7 +65,7 @@ def update_self(to_screen, verbose):
     # Check if there is a new version
     try:
         newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
-    except:
+    except Exception:
         if verbose:
             to_screen(compat_str(traceback.format_exc()))
         to_screen('ERROR: can\'t find the current version. Please try again later.')
@@ -78,7 +78,7 @@ def update_self(to_screen, verbose):
     try:
         versions_info = opener.open(JSON_URL).read().decode('utf-8')
         versions_info = json.loads(versions_info)
-    except:
+    except Exception:
         if verbose:
             to_screen(compat_str(traceback.format_exc()))
         to_screen('ERROR: can\'t obtain versions info. Please try again later.')
index 472d4df41fda2cb1ffd0392cc4da0f4bdcc2a48a..52f0dd09aac2ef0103212086a280fc317b36b82d 100644 (file)
@@ -75,7 +75,7 @@ def preferredencoding():
     try:
         pref = locale.getpreferredencoding()
         'TEST'.encode(pref)
-    except:
+    except Exception:
         pref = 'UTF-8'
 
     return pref
@@ -127,7 +127,7 @@ def write_json_file(obj, fn):
             except OSError:
                 pass
         os.rename(tf.name, fn)
-    except:
+    except Exception:
         try:
             os.remove(tf.name)
         except OSError:
@@ -348,7 +348,7 @@ def _htmlentity_transform(entity):
     if entity in compat_html_entities.name2codepoint:
         return compat_chr(compat_html_entities.name2codepoint[entity])
 
-    mobj = re.match(r'#(x?[0-9]+)', entity)
+    mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
     if mobj is not None:
         numstr = mobj.group(1)
         if numstr.startswith('x'):
@@ -730,7 +730,8 @@ def unified_strdate(date_str, day_first=True):
     # Replace commas
     date_str = date_str.replace(',', ' ')
     # %z (UTC offset) is only supported in python>=3.2
-    date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+    if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
+        date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
     # Remove AM/PM + timezone
     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
 
@@ -759,6 +760,7 @@ def unified_strdate(date_str, day_first=True):
     ]
     if day_first:
         format_expressions.extend([
+            '%d-%m-%Y',
             '%d.%m.%Y',
             '%d/%m/%Y',
             '%d/%m/%y',
@@ -766,6 +768,7 @@ def unified_strdate(date_str, day_first=True):
         ])
     else:
         format_expressions.extend([
+            '%m-%d-%Y',
             '%m.%d.%Y',
             '%m/%d/%Y',
             '%m/%d/%y',
@@ -1577,7 +1580,7 @@ def js_to_json(code):
         '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
         [a-zA-Z_][.a-zA-Z_0-9]*
         ''', fix_kv, code)
-    res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+    res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
     return res
 
 
index 51b4260aad38f9dbae3b8d2e0d42602c540d3e4f..1095fea2fe908fe59d28b3489b7916d05b8e6354 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.03.18'
+__version__ = '2015.04.09'