Devin J. Pohly
Eduardo Ferro Aldama
Jeff Buchbinder
+Amish Bhadeshia
+Joram Schrijver
+Will W.
+Mohammad Teimori Pabandi
+Roman Le Négrate
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
- find -name "*.pyc" -delete
+ find . -name "*.pyc" -delete
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
youtube-dl [OPTIONS] URL [URL...]
# OPTIONS
- -h, --help print this help text and exit
- --version print program version and exit
- -U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
- -i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist
+ -h, --help Print this help text and exit
+ --version Print program version and exit
+ -U, --update Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
+ -i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
- --dump-user-agent display the current browser identification
+ --dump-user-agent Display the current browser identification
--list-extractors List all supported extractors and the URLs they would handle
--extractor-descriptions Output descriptions of all supported extractors
- --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
+ --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
--ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
--flat-playlist Do not extract the videos of a playlist, only list them.
- --no-color Do not emit color codes in output.
+ --no-color Do not emit color codes in output
## Network Options:
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
not present) is used for the actual downloading. (experimental)
## Video Selection:
- --playlist-start NUMBER playlist video to start at (default is 1)
- --playlist-end NUMBER playlist video to end at (default is last)
- --playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+ --playlist-start NUMBER Playlist video to start at (default is 1)
+ --playlist-end NUMBER Playlist video to end at (default is last)
+ --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
- --match-title REGEX download only matching titles (regex or caseless sub-string)
- --reject-title REGEX skip download for matching titles (regex or caseless sub-string)
+ --match-title REGEX Download only matching titles (regex or caseless sub-string)
+ --reject-title REGEX Skip download for matching titles (regex or caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
--max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
- --date DATE download only videos uploaded in this date
- --datebefore DATE download only videos uploaded on or before this date (i.e. inclusive)
- --dateafter DATE download only videos uploaded on or after this date (i.e. inclusive)
+ --date DATE Download only videos uploaded in this date
+ --datebefore DATE Download only videos uploaded on or before this date (i.e. inclusive)
+ --dateafter DATE Download only videos uploaded on or after this date (i.e. inclusive)
--min-views COUNT Do not download any videos with less than COUNT views
--max-views COUNT Do not download any videos with more than COUNT views
- --match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present,
+ --match-filter FILTER Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present,
!key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
dislike_count <? 50 & description" .
- --no-playlist If the URL refers to a video and a playlist, download only the video.
- --yes-playlist If the URL refers to a video and a playlist, download the playlist.
- --age-limit YEARS download only videos suitable for the given age
+ --no-playlist Download only the video, if the URL refers to a video and a playlist.
+ --yes-playlist Download the playlist, if the URL refers to a video and a playlist.
+ --age-limit YEARS Download only videos suitable for the given age
--download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
--include-ads Download advertisements as well (experimental)
## Download Options:
- -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M)
- -R, --retries RETRIES number of retries (default is 10), or "infinite".
- --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024)
- --no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
+ -r, --rate-limit LIMIT Maximum download rate in bytes per second (e.g. 50K or 4.2M)
+ -R, --retries RETRIES Number of retries (default is 10), or "infinite".
+ --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024)
+ --no-resize-buffer Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
--playlist-reverse Download playlist videos in reverse order
- --xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize
- --hls-prefer-native (experimental) Use the native HLS downloader instead of ffmpeg.
+ --xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
+ --hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
- --external-downloader-args ARGS Give these arguments to the external downloader.
+ --external-downloader-args ARGS Give these arguments to the external downloader
## Filesystem Options:
- -a, --batch-file FILE file containing URLs to download ('-' for stdin)
- --id use only video ID in file name
- -o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
+ -a, --batch-file FILE File containing URLs to download ('-' for stdin)
+ --id Use only video ID in file name
+ -o, --output TEMPLATE Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
- the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"),
+ the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"),
%(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
%(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
%(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
- --autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
+ --autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
--restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
- -A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000
- -t, --title [deprecated] use title in file name (default)
- -l, --literal [deprecated] alias of --title
- -w, --no-overwrites do not overwrite files
- -c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
- --no-continue do not resume partially downloaded files (restart from beginning)
- --no-part do not use .part files - write directly into output file
- --no-mtime do not use the Last-modified header to set the file modification time
- --write-description write video description to a .description file
- --write-info-json write video metadata to a .info.json file
- --write-annotations write video annotations to a .annotation file
- --load-info FILE json file containing the video information (created with the "--write-json" option)
- --cookies FILE file to read cookies from and dump cookie jar in
+ -A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000
+ -t, --title [deprecated] Use title in file name (default)
+ -l, --literal [deprecated] Alias of --title
+ -w, --no-overwrites Do not overwrite files
+ -c, --continue Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
+ --no-continue Do not resume partially downloaded files (restart from beginning)
+ --no-part Do not use .part files - write directly into output file
+ --no-mtime Do not use the Last-modified header to set the file modification time
+ --write-description Write video description to a .description file
+ --write-info-json Write video metadata to a .info.json file
+ --write-annotations Write video annotations to a .annotation file
+ --load-info FILE JSON file containing the video information (created with the "--write-info-json" option)
+ --cookies FILE File to read cookies from and dump cookie jar in
--cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
change.
--rm-cache-dir Delete all filesystem cache files
## Thumbnail images:
- --write-thumbnail write thumbnail image to disk
- --write-all-thumbnails write all thumbnail image formats to disk
+ --write-thumbnail Write thumbnail image to disk
+ --write-all-thumbnails Write all thumbnail image formats to disk
--list-thumbnails Simulate and list all available thumbnail formats
## Verbosity / Simulation Options:
- -q, --quiet activates quiet mode
+ -q, --quiet Activate quiet mode
--no-warnings Ignore warnings
- -s, --simulate do not download the video and do not write anything to disk
- --skip-download do not download the video
- -g, --get-url simulate, quiet but print URL
- -e, --get-title simulate, quiet but print title
- --get-id simulate, quiet but print id
- --get-thumbnail simulate, quiet but print thumbnail URL
- --get-description simulate, quiet but print video description
- --get-duration simulate, quiet but print video length
- --get-filename simulate, quiet but print output filename
- --get-format simulate, quiet but print output format
- -j, --dump-json simulate, quiet but print JSON information. See --output for a description of available keys.
- -J, --dump-single-json simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
+ -s, --simulate Do not download the video and do not write anything to disk
+ --skip-download Do not download the video
+ -g, --get-url Simulate, quiet but print URL
+ -e, --get-title Simulate, quiet but print title
+ --get-id Simulate, quiet but print id
+ --get-thumbnail Simulate, quiet but print thumbnail URL
+ --get-description Simulate, quiet but print video description
+ --get-duration Simulate, quiet but print video length
+ --get-filename Simulate, quiet but print output filename
+ --get-format Simulate, quiet but print output format
+ -j, --dump-json Simulate, quiet but print JSON information. See --output for a description of available keys.
+ -J, --dump-single-json Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
information in a single line.
--print-json Be quiet and print the video information as JSON (video is still being downloaded).
- --newline output progress bar as new lines
- --no-progress do not print progress bar
- --console-title display progress in console titlebar
- -v, --verbose print various debugging information
- --dump-pages print downloaded pages to debug problems (very verbose)
+ --newline Output progress bar as new lines
+ --no-progress Do not print progress bar
+ --console-title Display progress in console titlebar
+ -v, --verbose Print various debugging information
+ --dump-pages Print downloaded pages to debug problems (very verbose)
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
--print-traffic Display sent and read HTTP traffic
- -C, --call-home Contact the youtube-dl server for debugging.
- --no-call-home Do NOT contact the youtube-dl server for debugging.
+ -C, --call-home Contact the youtube-dl server for debugging
+ --no-call-home Do NOT contact the youtube-dl server for debugging
## Workarounds:
--encoding ENCODING Force the specified encoding (experimental)
- --no-check-certificate Suppress HTTPS certificate validation.
+ --no-check-certificate Suppress HTTPS certificate validation
--prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
- --user-agent UA specify a custom user agent
- --referer URL specify a custom referer, use if the video access is restricted to one domain
- --add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
+ --user-agent UA Specify a custom user agent
+ --referer URL Specify a custom referer, use if the video access is restricted to one domain
+ --add-header FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
--bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
--sleep-interval SECONDS Number of seconds to sleep before each download.
## Video Format Options:
- -f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by
+ -f, --format FORMAT Video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by
extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio",
"worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec,
Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio
of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f
bestvideo+bestaudio.
- --all-formats download all available video formats
- --prefer-free-formats prefer free video formats unless a specific one is requested
- --max-quality FORMAT highest quality format to download
- -F, --list-formats list all available formats
+ --all-formats Download all available video formats
+ --prefer-free-formats Prefer free video formats unless a specific one is requested
+ --max-quality FORMAT Highest quality format to download
+ -F, --list-formats List all available formats
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
merge is required
## Subtitle Options:
- --write-sub write subtitle file
- --write-auto-sub write automatic subtitle file (youtube only)
- --all-subs downloads all the available subtitles of the video
- --list-subs lists all available subtitles for the video
- --sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best"
- --sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
+ --write-sub Write subtitle file
+ --write-auto-sub Write automatic subtitle file (YouTube only)
+ --all-subs Download all the available subtitles of the video
+ --list-subs List all available subtitles for the video
+ --sub-format FORMAT Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"
+ --sub-lang LANGS Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
## Authentication Options:
- -u, --username USERNAME login with this account ID
- -p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively.
- -2, --twofactor TWOFACTOR two-factor auth code
- -n, --netrc use .netrc authentication data
- --video-password PASSWORD video password (vimeo, smotri)
+ -u, --username USERNAME Login with this account ID
+ -p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively.
+ -2, --twofactor TWOFACTOR Two-factor auth code
+ -n, --netrc Use .netrc authentication data
+ --video-password PASSWORD Video password (vimeo, smotri)
## Post-processing Options:
- -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
- --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
- --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K
- (default 5)
+ -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
+ --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
+ --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
+ 5)
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
- -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default
- --no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default
- --embed-subs embed subtitles in the video (only for mp4 videos)
- --embed-thumbnail embed thumbnail in the audio as cover art
- --add-metadata write metadata to the video file
- --metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
+ -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
+ --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
+ --embed-subs Embed subtitles in the video (only for mp4 videos)
+ --embed-thumbnail Embed thumbnail in the audio as cover art
+ --add-metadata Write metadata to the video file
+ --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
%(title)s" matches a title like "Coldplay - Paradise"
- --xattrs write metadata to the video file's xattrs (using dublin core and xdg standards)
+ --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
fix file if we can, warn otherwise)
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
if METHOD == 'EURISTIC':
try:
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
- except:
+ except Exception:
print('\nFail: {0}'.format(test['name']))
continue
--- /dev/null
+from __future__ import unicode_literals
+
+import codecs
+import subprocess
+
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import intlist_to_bytes
+from youtube_dl.aes import aes_encrypt, key_expansion
+
+secret_msg = b'Secret message goes here'
+
+
+def hex_str(int_list):
+ return codecs.encode(intlist_to_bytes(int_list), 'hex')
+
+
+def openssl_encode(algo, key, iv):
+ cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
+ prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ out, _ = prog.communicate(secret_msg)
+ return out
+
+iv = key = [0x20, 0x15] + 14 * [0]
+
+r = openssl_encode('aes-128-cbc', key, iv)
+print('aes_cbc_decrypt')
+print(repr(r))
+
+password = key
+new_key = aes_encrypt(password, key_expansion(password))
+r = openssl_encode('aes-128-ctr', new_key, iv)
+print('aes_decrypt_text 16')
+print(repr(r))
+
+password = key + 16 * [0]
+new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
+r = openssl_encode('aes-256-ctr', new_key, iv)
+print('aes_decrypt_text 32')
+print(repr(r))
- **1tv**: Первый канал
- **1up.com**
- **220.ro**
+ - **22tracks:genre**
+ - **22tracks:track**
- **24video**
- **3sat**
- **4tube**
- **DctpTv**
- **DeezerPlaylist**
- **defense.gouv.fr**
+ - **DHM**: Filmarchiv - Deutsches Historisches Museum
- **Discovery**
- **divxstage**: DivxStage
- **Dotsub**
- **DrTuber**
- **DRTV**
- **Dump**
+ - **Dumpert**
- **dvtv**: http://video.aktualne.cz/
- **EaglePlatform**
- **EbaumsWorld**
- **Gamekings**
- **GameOne**
- **gameone:playlist**
+ - **Gamersyde**
- **GameSpot**
- **GameStar**
- **Gametrailers**
- **Letv**
- **LetvPlaylist**
- **LetvTv**
+ - **Libsyn**
- **lifenews**: LIFE | NEWS
- **LiveLeak**
- **livestream**
- **Mgoon**
- **Minhateca**
- **MinistryGrid**
+ - **miomio.tv**
- **mitele.es**
- **mixcloud**
- **MLB**
- **NBA**
- **NBC**
- **NBCNews**
+ - **NBCSports**
+ - **NBCSportsVPlayer**
- **ndr**: NDR.de - Mediathek
- **NDTV**
- **NerdCubedFeed**
- **npo.nl:radio**
- **npo.nl:radio:fragment**
- **NRK**
+ - **NRKPlaylist**
- **NRKTV**
- **ntv.ru**
- **Nuvid**
- **PornHub**
- **PornHubPlaylist**
- **Pornotube**
+ - **PornoVoisines**
- **PornoXO**
- **PrimeShareTV**
- **PromptFile**
- **radio.de**
- **radiobremen**
- **radiofrance**
+ - **RadioJavan**
- **Rai**
- **RBMARadio**
- **RedTube**
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU
+ - **safari**: safaribooksonline.com online video
+ - **safari:course**: safaribooksonline.com online courses
- **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **southpark.cc.com**
- **southpark.de**
- **Space**
+ - **SpankBang**
- **Spankwire**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Ubu**
- **udemy**
- **udemy:course**
+ - **UDNEmbed**
- **Ultimedia**
- **Unistra**
- **Urort**: NRK P3 Urørt
- **ustream**
- **ustream:channel**
+ - **Varzesh3**
- **Vbox7**
- **VeeHD**
- **Veoh**
+ - **Vessel**
- **Vesti**: Вести.Ru
- **Vevo**
- **VGTV**
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**
- **ZDFChannel**
--- /dev/null
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
+from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
+import base64
+
+# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
+
+
+class TestAES(unittest.TestCase):
+ def setUp(self):
+ self.key = self.iv = [0x20, 0x15] + 14 * [0]
+ self.secret_msg = b'Secret message goes here'
+
+ def test_encrypt(self):
+ msg = b'message'
+ key = list(range(16))
+ encrypted = aes_encrypt(bytes_to_intlist(msg), key)
+ decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
+ self.assertEqual(decrypted, msg)
+
+ def test_cbc_decrypt(self):
+ data = bytes_to_intlist(
+ b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
+ )
+ decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+ def test_decrypt_text(self):
+ password = intlist_to_bytes(self.key).decode('utf-8')
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8]) +
+ b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
+ )
+ decrypted = (aes_decrypt_text(encrypted, password, 16))
+ self.assertEqual(decrypted, self.secret_msg)
+
+ password = intlist_to_bytes(self.key).decode('utf-8')
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8]) +
+ b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
+ )
+ decrypted = (aes_decrypt_text(encrypted, password, 32))
+ self.assertEqual(decrypted, self.secret_msg)
+
+if __name__ == '__main__':
+ unittest.main()
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
def test_youtube_feeds(self):
- self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
+ self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
#!/usr/bin/env python
+# coding: utf-8
+
from __future__ import unicode_literals
import unittest
import sys
import os
import subprocess
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import encodeArgument
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def test_main_exec(self):
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+ def test_cmdline_umlauts(self):
+ p = subprocess.Popen(
+ [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
+ cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
+ _, stderr = p.communicate()
+ self.assertFalse(stderr)
+
if __name__ == '__main__':
unittest.main()
encodeFilename,
escape_rfc3986,
escape_url,
+ ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
InAdvancePagedList,
urlencode_postdata,
version_tuple,
xpath_with_ns,
+ xpath_text,
render_table,
match_str,
)
def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;')
+ self.assertEqual(unescapeHTML('/'), '/')
+ self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(
unescapeHTML('é'), 'é')
self.assertEqual(
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
'20150202')
+ self.assertEqual(unified_strdate('25-09-2014'), '20140925')
def test_find_xpath_attr(self):
testxml = '''<root>
self.assertEqual(find('media:song/media:author').text, 'The Author')
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
+ def test_xpath_text(self):
+ testxml = '''<root>
+ <div>
+ <p>Foo</p>
+ </div>
+ </root>'''
+ doc = xml.etree.ElementTree.fromstring(testxml)
+ self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
+ self.assertTrue(xpath_text(doc, 'div/bar') is None)
+ self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
+
def test_smuggle_url(self):
data = {"ö": "ö", "abc": [3]}
url = 'https://foo.bar/baz?x=y#a'
self.assertEqual(d['x'], 1)
self.assertEqual(d['y'], 'a')
+ on = js_to_json('["abc", "def",]')
+ self.assertEqual(json.loads(on), ['abc', 'def'])
+
+ on = js_to_json('{"abc": "def",}')
+ self.assertEqual(json.loads(on), {'abc': 'def'})
+
def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
'Parameter outtmpl is bytes, but should be a unicode string. '
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
- if '%(stitle)s' in self.params.get('outtmpl', ''):
- self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
-
self._setup_opener()
if auto_init:
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + '...'
- # Keep for backwards compatibility
- info_dict['stitle'] = info_dict['title']
-
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
self._write_string('[debug] Git HEAD: ' + out + '\n')
- except:
+ except Exception:
try:
sys.exc_clear()
- except:
+ except Exception:
pass
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
if opts.allsubtitles and not opts.writeautomaticsub:
opts.writesubtitles = True
- if sys.version_info < (3,):
- # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
- if opts.outtmpl is not None:
- opts.outtmpl = opts.outtmpl.decode(preferredencoding())
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
lines, columns = map(int, out.split())
- except:
+ except Exception:
pass
return _terminal_size(columns, lines)
return
try:
os.utime(filename, (time.time(), filetime))
- except:
+ except Exception:
pass
return filetime
)
continuedl_and_exists = (
- self.params.get('continuedl', False) and
+ self.params.get('continuedl', True) and
os.path.isfile(encodeFilename(filename)) and
not self.params.get('nopart', False)
)
open_mode = 'wb'
if resume_len != 0:
- if self.params.get('continuedl', False):
+ if self.params.get('continuedl', True):
self.report_resuming_byte(resume_len)
request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
protocol = info_dict.get('rtmp_protocol', None)
real_time = info_dict.get('rtmp_real_time', False)
no_resume = info_dict.get('no_resume', False)
- continue_dl = info_dict.get('continuedl', False)
+ continue_dl = info_dict.get('continuedl', True)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .dfb import DFBIE
+from .dhm import DHMIE
from .dotsub import DotsubIE
from .douyutv import DouyuTVIE
from .dreisat import DreiSatIE
from .drtv import DRTVIE
from .dvtv import DVTVIE
from .dump import DumpIE
+from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
GameOneIE,
GameOnePlaylistIE,
)
+from .gamersyde import GamersydeIE
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gametrailers import GametrailersIE
LetvTvIE,
LetvPlaylistIE
)
+from .libsyn import LibsynIE
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import (
from .mgoon import MgoonIE
from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE
+from .miomio import MioMioIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE
from .mixcloud import MixcloudIE
from .nbc import (
NBCIE,
NBCNewsIE,
+ NBCSportsIE,
+ NBCSportsVPlayerIE,
)
from .ndr import NDRIE
from .ndtv import NDTVIE
)
from .nrk import (
NRKIE,
+ NRKPlaylistIE,
NRKTVIE,
)
from .ntvde import NTVDeIE
PornHubPlaylistIE,
)
from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
from .quickvid import QuickVidIE
from .r7 import R7IE
from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
)
from .rutv import RUTVIE
from .sandia import SandiaIE
+from .safari import (
+ SafariIE,
+ SafariCourseIE,
+)
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
SouthparkDeIE,
)
from .space import SpaceIE
+from .spankbang import SpankBangIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE
from .tvplay import TVPlayIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
+from .twentytwotracks import (
+ TwentyTwoTracksIE,
+ TwentyTwoTracksGenreIE
+)
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
UdemyIE,
UdemyCourseIE
)
+from .udn import UDNEmbedIE
from .ultimedia import UltimediaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE
+from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
+from .vessel import VesselIE
from .vesti import VestiIE
from .vevo import VevoIE
from .vgtv import VGTVIE
)
from ..utils import (
ExtractorError,
+ qualities,
)
class AddAnimeIE(InfoExtractor):
- _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
- _TEST = {
+ _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
+ _TESTS = [{
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
'md5': '72954ea10bc979ab5e2eb288b21425a0',
'info_dict': {
'description': 'One Piece 606',
'title': 'One Piece 606',
}
- }
+ }, {
+ 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
note='Confirming after redirect')
webpage = self._download_webpage(url, video_id)
+ FORMATS = ('normal', 'hq')
+ quality = qualities(FORMATS)
formats = []
- for format_id in ('normal', 'hq'):
+ for format_id in FORMATS:
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
video_url = self._search_regex(rex, webpage, 'video file URLx',
fatal=False)
formats.append({
'format_id': format_id,
'url': video_url,
+ 'quality': quality(format_id),
})
self._sort_formats(formats)
video_title = self._og_search_title(webpage)
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import int_or_none
class AftonbladetIE(InfoExtractor):
- _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
+ _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
_TEST = {
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
'info_dict': {
formats.append({
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
'ext': 'mp4',
- 'width': fmt['width'],
- 'height': fmt['height'],
- 'tbr': fmt['bitrate'],
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ 'tbr': int_or_none(fmt.get('bitrate')),
'protocol': 'http',
})
self._sort_formats(formats)
'id': video_id,
'title': internal_meta_json['title'],
'formats': formats,
- 'thumbnail': internal_meta_json['imageUrl'],
- 'description': internal_meta_json['shortPreamble'],
- 'timestamp': internal_meta_json['timePublished'],
- 'duration': internal_meta_json['duration'],
- 'view_count': internal_meta_json['views'],
+ 'thumbnail': internal_meta_json.get('imageUrl'),
+ 'description': internal_meta_json.get('shortPreamble'),
+ 'timestamp': int_or_none(internal_meta_json.get('timePublished')),
+ 'duration': int_or_none(internal_meta_json.get('duration')),
+ 'view_count': int_or_none(internal_meta_json.get('views')),
}
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
})
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id, subtitles_urls)
class BloombergIE(InfoExtractor):
- _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
+ _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
- 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+ 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
# The md5 checksum changes
'info_dict': {
'id': 'qurhIVlJSB6hzkVi229d8g',
'ext': 'flv',
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
- 'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
+ 'description': 'md5:a8ba0302912d03d246979735c17d2761',
},
}
def _real_extract(self, url):
name = self._match_id(url)
webpage = self._download_webpage(url, name)
-
- f4m_url = self._search_regex(
- r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
- 'f4m url')
+ video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
title = re.sub(': Video$', '', self._og_search_title(webpage))
+ embed_info = self._download_json(
+ 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+ formats = []
+ for stream in embed_info['streams']:
+ if stream["muxing_format"] == "TS":
+ formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
+ else:
+ formats.extend(self._extract_f4m_formats(stream['url'], video_id))
+ self._sort_formats(formats)
+
return {
- 'id': name.split('-')[-1],
+ 'id': video_id,
'title': title,
- 'formats': self._extract_f4m_formats(f4m_url, name),
+ 'formats': formats,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
- (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
'description': 'md5:e7223a503315c9f150acac52e76de086',
'upload_date': '20141222',
}
+ }, {
+ 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
+ 'only_matching': True,
}]
def _real_extract(self, url):
(media_el.attrib.get('href') or media_el.attrib.get('url')))
tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({
- 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
+ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
'url': manifest_url,
'ext': 'flv',
'tbr': tbr,
)
from ..aes import (
aes_cbc_decrypt,
- inc,
)
key = obfuscate_key(id)
- class Counter:
- __value = iv
-
- def next_value(self):
- temp = self.__value
- self.__value = inc(self.__value)
- return temp
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
return zlib.decompress(decrypted_data)
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
- request.add_header('Cookie', 'family_filter=off')
- request.add_header('Cookie', 'ff=off')
+ request.add_header('Cookie', 'family_filter=off; ff=off')
return request
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
- embed_page = self._download_webpage(embed_url, video_id,
- 'Downloading embed page')
+ embed_request = self._build_request(embed_url)
+ embed_page = self._download_webpage(
+ embed_request, video_id, 'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = 'dailymotion:user'
- _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ parse_duration,
+)
+
+
+class DHMIE(InfoExtractor):
+ IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+ _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+ 'md5': '11c475f670209bf6acca0b2b7ef51827',
+ 'info_dict': {
+ 'id': 'the-marshallplan-at-work-in-west-germany',
+ 'ext': 'flv',
+ 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+ 'description': 'md5:1fabd480c153f97b07add61c44407c82',
+ 'duration': 660,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+ 'md5': '09890226332476a3e3f6f2cb74734aa5',
+ 'info_dict': {
+ 'id': 'rolle-1',
+ 'ext': 'flv',
+ 'title': 'ROLLE 1',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ playlist_url = self._search_regex(
+ r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+ playlist = self._download_xml(playlist_url, video_id)
+
+ track = playlist.find(
+ './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
+
+ video_url = xpath_text(
+ track, './{http://xspf.org/ns/0/}location',
+ 'video url', fatal=True)
+ thumbnail = xpath_text(
+ track, './{http://xspf.org/ns/0/}image',
+ 'thumbnail')
+
+ title = self._search_regex(
+ [r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
+ webpage, 'title').strip()
+ description = self._html_search_regex(
+ r'<p><strong>Description:</strong>(.+?)</p>',
+ webpage, 'description', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+ webpage, 'duration', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
# coding: utf-8
from __future__ import unicode_literals
+import hashlib
+import time
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (ExtractorError, unescapeHTML)
+from ..compat import (compat_str, compat_basestring)
class DouyuTVIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.douyutv.com/iseven',
'info_dict': {
- 'id': 'iseven',
+ 'id': '17732',
+ 'display_id': 'iseven',
'ext': 'flv',
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'description': 'md5:9e525642c25a0a24302869937cf69d17',
+ 'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅',
'uploader_id': '431925',
'params': {
'skip_download': True,
}
- }
+ }, {
+ 'url': 'http://www.douyutv.com/85982',
+ 'info_dict': {
+ 'id': '85982',
+ 'display_id': '85982',
+ 'ext': 'flv',
+ 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'douyu小漠',
+ 'uploader_id': '3769985',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
+ if video_id.isdigit():
+ room_id = video_id
+ else:
+ page = self._download_webpage(url, video_id)
+ room_id = self._html_search_regex(
+ r'"room_id"\s*:\s*(\d+),', page, 'room id')
+
+ prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
+ room_id, int(time.time()))
+
+ auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
config = self._download_json(
- 'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
+ 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
+ video_id)
data = config['data']
error_code = config.get('error', 0)
- show_status = data.get('show_status')
if error_code is not 0:
- raise ExtractorError(
- 'Server reported error %i' % error_code, expected=True)
+ error_desc = 'Server reported error %i' % error_code
+ if isinstance(data, (compat_str, compat_basestring)):
+ error_desc += ': ' + data
+ raise ExtractorError(error_desc, expected=True)
+ show_status = data.get('show_status')
# 1 = live, 2 = offline
if show_status == '2':
raise ExtractorError(
base_url = data['rtmp_url']
live_path = data['rtmp_live']
- title = self._live_title(data['room_name'])
+ title = self._live_title(unescapeHTML(data['room_name']))
description = data.get('show_details')
thumbnail = data.get('room_src')
self._sort_formats(formats)
return {
- 'id': video_id,
+ 'id': room_id,
+ 'display_id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
import re
from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+ ExtractorError,
+ unified_strdate,
+)
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = {
- 'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
- 'md5': '9dcfe344732808dbfcc901537973c922',
+ 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
+ 'md5': 'be37228896d30a88f315b638900a026e',
'info_dict': {
- 'id': '36983',
+ 'id': '45918',
'ext': 'mp4',
- 'title': 'Kaffeeland Schweiz',
- 'description': 'md5:cc4424b18b75ae9948b13929a0814033',
+ 'title': 'Waidmannsheil',
+ 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
'uploader': '3sat',
- 'upload_date': '20130622'
+ 'upload_date': '20140913'
}
}
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
+ status_code = details_doc.find('./status/statuscode')
+ if status_code is not None and status_code.text != 'ok':
+ code = status_code.text
+ if code == 'notVisibleAnymore':
+ message = 'Video %s is not available' % video_id
+ else:
+ message = '%s returned error: %s' % (self.IE_NAME, code)
+ raise ExtractorError(message, expected=True)
+
thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{
'width': int(te.attrib['key'].partition('x')[0]),
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor, ExtractorError
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
_TEST = {
- 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
- 'md5': '4a7e1dd65cdb2643500a3f753c942f25',
+ 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
+ 'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
'info_dict': {
- 'id': 'partiets-mand-7-8',
+ 'id': 'panisk-paske-5',
'ext': 'mp4',
- 'title': 'Partiets mand (7:8)',
- 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
- 'timestamp': 1403047940,
- 'upload_date': '20140617',
- 'duration': 1299.040,
+ 'title': 'Panisk Påske (5)',
+ 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
+ 'timestamp': 1426984612,
+ 'upload_date': '20150322',
+ 'duration': 1455,
},
}
webpage = self._download_webpage(url, video_id)
+ if '>Programmet er ikke længere tilgængeligt' in webpage:
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
video_id = self._search_regex(
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
webpage, 'video id')
video_url = self._search_regex(
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
- thumb = self._og_search_thumbnail(webpage)
- title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
return {
'id': video_id,
'title': title,
'url': video_url,
- 'thumbnail': thumb,
+ 'thumbnail': thumbnail,
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import qualities
+
+
+class DumpertIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+ _TEST = {
+ 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+ 'info_dict': {
+ 'id': '6646981/951bc60f',
+ 'ext': 'mp4',
+ 'title': 'Ik heb nieuws voor je',
+ 'description': 'Niet schrikken hoor',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ req = compat_urllib_request.Request(url)
+ req.add_header('Cookie', 'nsfw=1')
+ webpage = self._download_webpage(req, video_id)
+
+ files_base64 = self._search_regex(
+ r'data-files="([^"]+)"', webpage, 'data files')
+
+ files = self._parse_json(
+ base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
+ video_id)
+
+ quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+
+ formats = [{
+ 'url': video_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ } for format_id, video_url in files.items() if format_id != 'still']
+ self._sort_formats(formats)
+
+ title = self._html_search_meta(
+ 'title', webpage) or self._og_search_title(webpage)
+ description = self._html_search_meta(
+ 'description', webpage) or self._og_search_description(webpage)
+ thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats
+ }
'duration': 216,
'view_count': int,
},
+ 'skip': 'Georestricted',
}]
def _handle_error(self, response):
class EllenTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
_TESTS = [{
- 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
- 'md5': 'e4af06f3bf0d5f471921a18db5764642',
+ 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
+ 'md5': '8e3c576bf2e9bfff4d76565f56f94c9c',
'info_dict': {
- 'id': '0-7jqrsr18',
+ 'id': '0-ipq1gsai',
'ext': 'mp4',
- 'title': 'What\'s Wrong with These Photos? A Whole Lot',
- 'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
- 'timestamp': 1406876400,
- 'upload_date': '20140801',
+ 'title': 'Fast Fingers of Fate',
+ 'description': 'md5:686114ced0a032926935e9015ee794ac',
+ 'timestamp': 1428033600,
+ 'upload_date': '20150403',
}
}, {
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_url = self._html_search_meta('VideoURL', webpage, 'url')
+
+ video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True)
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'description') or self._og_search_description(webpage)
timestamp = parse_iso8601(self._search_regex(
r'<span class="publish-date"><time datetime="([^"]+)">',
- webpage, 'timestamp'))
+ webpage, 'timestamp', fatal=False))
return {
'id': video_id,
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import ExtractorError
class EroProfileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
- _TEST = {
+ _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+ _NETRC_MACHINE = 'eroprofile'
+ _TESTS = [{
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
'info_dict': {
'thumbnail': 're:https?://.*\.jpg',
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+ 'md5': '1baa9602ede46ce904c431f5418d8916',
+ 'info_dict': {
+ 'id': '1133519',
+ 'ext': 'm4v',
+ 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+ 'thumbnail': 're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': 'Requires login',
+ }]
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ query = compat_urllib_parse.urlencode({
+ 'username': username,
+ 'password': password,
+ 'url': 'http://www.eroprofile.com/',
+ })
+ login_url = self._LOGIN_URL + query
+ login_page = self._download_webpage(login_url, None, False)
+
+ m = re.search(r'Your username or password was incorrect\.', login_page)
+ if m:
+ raise ExtractorError(
+ 'Wrong username and/or password.', expected=True)
+
+ self.report_login()
+ redirect_url = self._search_regex(
+ r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+ self._download_webpage(redirect_url, None, False)
+
+ def _real_initialize(self):
+ self._login()
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+ m = re.search(r'You must be logged in to view this video\.', webpage)
+ if m:
+ raise ExtractorError(
+ 'This video requires login. Please specify a username and password and try again.', expected=True)
+
video_id = self._search_regex(
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
webpage, 'video id', default=None)
clean_html,
ExtractorError,
int_or_none,
+ float_or_none,
parse_duration,
+ determine_ext,
)
if not video_url:
continue
format_id = video['format']
- if video_url.endswith('.f4m'):
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
if georestricted:
# See https://github.com/rg3/youtube-dl/issues/3963
# m3u8 urls work fine
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
video_id, 'Downloading f4m manifest token', fatal=False)
if f4m_url:
- f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
- for f4m_format in f4m_formats:
- f4m_format['preference'] = 1
- formats.extend(f4m_formats)
- elif video_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
+ formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
elif video_url.startswith('rtmp'):
formats.append({
'url': video_url,
'title': info['titre'],
'description': clean_html(info['synopsis']),
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
- 'duration': parse_duration(info['duree']),
+ 'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
'timestamp': int_or_none(info['diffusion']['timestamp']),
'formats': formats,
}
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = {
- 'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
- 'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
+ 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
+ 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
'info_dict': {
- 'id': 'EV_22853',
+ 'id': 'EV_50111',
'ext': 'flv',
- 'title': 'Dans les jardins de William Christie - Le Camus',
- 'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
- 'upload_date': '20140829',
- 'timestamp': 1409317200,
+ 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
+ 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
+ 'upload_date': '20150320',
+ 'timestamp': 1426892400,
+ 'duration': 2760.9,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
+
webpage = self._download_webpage(url, name)
+
+ if ">Ce live n'est plus disponible en replay<" in webpage:
+ raise ExtractorError('Video %s is not available' % name, expected=True)
+
video_id, catalogue = self._search_regex(
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ parse_duration,
+ remove_start,
+)
+
+
+class GamersydeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
+ _TEST = {
+ 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
+ 'md5': 'f38d400d32f19724570040d5ce3a505f',
+ 'info_dict': {
+ 'id': '34371',
+ 'ext': 'mp4',
+ 'duration': 372,
+ 'title': 'Bloodborne - Birth of a hero',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
+ display_id, transform_source=js_to_json)
+
+ formats = []
+ for source in playlist['sources']:
+ video_url = source.get('file')
+ if not video_url:
+ continue
+ format_id = source.get('label')
+ f = {
+ 'url': video_url,
+ 'format_id': format_id,
+ }
+ m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
+ if m:
+ f.update({
+ 'height': int(m.group('height')),
+ 'fps': int(m.group('fps')),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ title = remove_start(playlist['title'], '%s - ' % video_id)
+ thumbnail = playlist.get('image')
+ duration = parse_duration(self._search_regex(
+ r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
xpath_text,
)
from .brightcove import BrightcoveIE
+from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .smotri import SmotriIE
from .condenast import CondeNastIE
+from .udn import UDNEmbedIE
class GenericIE(InfoExtractor):
},
'add_ie': ['Viddler'],
},
+ # Libsyn embed
+ {
+ 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ },
# jwplayer YouTube
{
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
'age_limit': 0,
},
},
+ # 5min embed
+ {
+ 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+ 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+ 'info_dict': {
+ 'id': '518726732',
+ 'ext': 'mp4',
+ 'title': 'Facebook Creates "On This Day" | Crunch Report',
+ },
+ },
# RSS feed with enclosure
{
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
'upload_date': '20150228',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
+ },
+ # NBC Sports vplayer embed
+ {
+ 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+ 'info_dict': {
+ 'id': 'ln7x1qSThw4k',
+ 'ext': 'flv',
+ 'title': "PFT Live: New leader in the 'new-look' defense",
+ 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ },
+ },
+ # UDN embed
+ {
+ 'url': 'http://www.udn.com/news/story/7314/822787',
+ 'md5': 'de06b4c90b042c128395a88f0384817e',
+ 'info_dict': {
+ 'id': '300040',
+ 'ext': 'mp4',
+ 'title': '生物老師男變女 全校挺"做自己"',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
}
]
if mobj is not None:
return self.url_result(mobj.group('url'))
+ # Look for Libsyn player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
# Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
if mobj is not None:
return self.url_result(mobj.group('url'), 'Pladform')
+ # Look for 5min embeds
+ mobj = re.search(
+ r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+ if mobj is not None:
+ return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+ # Look for NBC Sports VPlayer embeds
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
+ # Look for UDN embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
+ if mobj is not None:
+ return self.url_result(
+ compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
float_or_none,
int_or_none,
compat_str,
+ determine_ext,
)
servers.append(base_url)
for stream in cdn.get('bitrates'):
label = stream.get('label')
- if label != 'Auto':
+ if label == 'Auto':
+ continue
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ bitrate = int_or_none(stream.get('bitrate'))
+ if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
+ if not stream_url.startswith('http'):
+ continue
formats.append({
- 'url': '%s/%s' % (base_url, stream.get('url')),
+ 'url': stream_url,
'ext': 'mp4',
- 'vbr': stream.get('bitrate'),
- 'resolution': label,
+ 'tbr': bitrate,
+ 'format_note': label,
+ 'rtmp_live': True,
+ })
+ else:
+ formats.append({
+ 'url': '%s/%s' % (base_url, stream_url),
+ 'ext': 'mp4',
+ 'tbr': bitrate,
'rtmp_live': True,
'format_note': host,
'page_url': url,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class LibsynIE(InfoExtractor):
+ _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
+ 'md5': '443360ee1b58007bc3dcf09b41d093bb',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = [{
+ 'url': media_url,
+ } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
+
+ podcast_title = self._search_regex(
+ r'<h2>([^<]+)</h2>', webpage, 'title')
+ episode_title = self._search_regex(
+ r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
+
+ title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+
+ description = self._html_search_regex(
+ r'<div id="info_text_body">(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ thumbnail = self._search_regex(
+ r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ release_date = unified_strdate(self._search_regex(
+ r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': release_date,
+ 'formats': formats,
+ }
class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream'
- _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
+ _VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
_TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b',
}, {
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
'only_matching': True,
+ }, {
+ 'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
+ 'only_matching': True,
}]
def _parse_smil(self, video_id, smil_url):
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ int_or_none,
+)
+
+
+class MioMioIE(InfoExtractor):
+ IE_NAME = 'miomio.tv'
+ _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.miomio.tv/watch/cc179734/',
+ 'md5': '48de02137d0739c15b440a224ad364b9',
+ 'info_dict': {
+ 'id': '179734',
+ 'ext': 'flv',
+ 'title': '手绘动漫鬼泣但丁全程画法',
+ 'duration': 354,
+ },
+ }, {
+ 'url': 'http://www.miomio.tv/watch/cc184024/',
+ 'info_dict': {
+ 'id': '43729',
+ 'title': '《动漫同人插画绘制》',
+ },
+ 'playlist_mincount': 86,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta(
+ 'description', webpage, 'title', fatal=True)
+
+ mioplayer_path = self._search_regex(
+ r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
+
+ xml_config = self._search_regex(
+ r'flashvars="type=sina&(.+?)&',
+ webpage, 'xml config')
+
+ # skipping the following page causes lags and eventually connection drop-outs
+ self._request_webpage(
+ 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
+ video_id)
+
+ # the following xml contains the actual configuration information on the video file(s)
+ vid_config = self._download_xml(
+ 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
+ video_id)
+
+ http_headers = {
+ 'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
+ }
+
+ entries = []
+ for f in vid_config.findall('./durl'):
+ segment_url = xpath_text(f, 'url', 'video url')
+ if not segment_url:
+ continue
+ order = xpath_text(f, 'order', 'order')
+ segment_id = video_id
+ segment_title = title
+ if order:
+ segment_id += '-%s' % order
+ segment_title += ' part %s' % order
+ entries.append({
+ 'id': segment_id,
+ 'url': segment_url,
+ 'title': segment_title,
+ 'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
+ 'http_headers': http_headers,
+ })
+
+ if len(entries) == 1:
+ segment = entries[0]
+ segment['id'] = video_id
+ segment['title'] = title
+ return segment
+
+ return {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'entries': entries,
+ 'title': title,
+ 'http_headers': http_headers,
+ }
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
like_count = str_to_int(self._search_regex(
- r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
+ r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
webpage, 'like count', fatal=False))
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
class MLBIE(InfoExtractor):
- _VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
+ _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
_TESTS = [
{
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
'only_matching': True,
},
+ {
+ 'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
class NBCIE(InfoExtractor):
- _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+ _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
_TESTS = [
{
return self.url_result(theplatform_url)
+class NBCSportsVPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+ _TESTS = [{
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
+ }, {
+ 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ iframe_m = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+ if iframe_m:
+ return iframe_m.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ theplatform_url = self._og_search_video_url(webpage)
+ return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+ # Does not include https becuase its certificate is invalid
+ _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+ _TEST = {
+ 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+ 'info_dict': {
+ 'id': 'PHJSaFWbrTY9',
+ 'ext': 'flv',
+ 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+ 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(
+ NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)|
stream_url = self._download_json(
stream_info['stream'], display_id,
'Downloading %s URL' % stream_type,
- transform_source=strip_jsonp)
+ 'Unable to download %s URL' % stream_type,
+ transform_source=strip_jsonp, fatal=False)
+ if not stream_url:
+ continue
if stream_type == 'hds':
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
# f4m downloader downloads only piece of live stream
class NRKIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
+ _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
_TESTS = [
{
- 'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
- 'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
+ 'url': 'http://www.nrk.no/video/PS*150533',
+ 'md5': 'bccd850baebefe23b56d708a113229c2',
'info_dict': {
'id': '150533',
'ext': 'flv',
'title': 'Dompap og andre fugler i Piip-Show',
- 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
+ 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+ 'duration': 263,
}
},
{
- 'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
- 'md5': '3471f2a51718195164e88f46bf427668',
+ 'url': 'http://www.nrk.no/video/PS*154915',
+ 'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
'info_dict': {
'id': '154915',
'ext': 'flv',
'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+ 'duration': 20,
}
},
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- page = self._download_webpage(url, video_id)
-
- video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
+ video_id = self._match_id(url)
data = self._download_json(
- 'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
+ 'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
+ video_id, 'Downloading media JSON')
if data['usageRights']['isGeoBlocked']:
- raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
+ raise ExtractorError(
+ 'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
+ expected=True)
+
+ video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
- video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
+ duration = parse_duration(data.get('duration'))
images = data.get('images')
if images:
'ext': 'flv',
'title': data['title'],
'description': data['description'],
+ 'duration': duration,
'thumbnail': thumbnail,
}
+class NRKPlaylistIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'info_dict': {
+ 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'title': 'Gjenopplev den historiske solformørkelsen',
+ 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+ 'info_dict': {
+ 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+ 'title': 'Rivertonprisen til Karin Fossum',
+ 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+ },
+ 'playlist_count': 5,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, 'NRK')
+ for video_id in re.findall(
+ r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+ webpage)
+ ]
+
+ playlist_title = self._og_search_title(webpage)
+ playlist_description = self._og_search_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+
class NRKTVIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
class PhoenixIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.phoenix.de/content/884301',
- 'md5': 'ed249f045256150c92e72dbb70eadec6',
- 'info_dict': {
- 'id': '884301',
- 'ext': 'mp4',
- 'title': 'Michael Krons mit Hans-Werner Sinn',
- 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
- 'upload_date': '20141025',
- 'uploader': 'Im Dialog',
- }
- }
+ _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
+ (?:
+ phoenix/die_sendungen/(?:[^/]+/)?
+ )?
+ (?P<id>[0-9]+)'''
+ _TESTS = [
+ {
+ 'url': 'http://www.phoenix.de/content/884301',
+ 'md5': 'ed249f045256150c92e72dbb70eadec6',
+ 'info_dict': {
+ 'id': '884301',
+ 'ext': 'mp4',
+ 'title': 'Michael Krons mit Hans-Werner Sinn',
+ 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
+ 'upload_date': '20141025',
+ 'uploader': 'Im Dialog',
+ }
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
+ 'only_matching': True,
+ },
+ ]
def _real_extract(self, url):
video_id = self._match_id(url)
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
+from ..compat import compat_str
from ..utils import (
ExtractorError,
- float_or_none,
int_or_none,
- str_to_int,
+ parse_iso8601,
)
class PlayFMIE(InfoExtractor):
IE_NAME = 'play.fm'
- _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
+ _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
_TEST = {
- 'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
+ 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
'md5': 'c505f8307825a245d0c7ad1850001f22',
'info_dict': {
- 'id': '137220',
+ 'id': '71276',
'ext': 'mp3',
- 'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
- 'uploader': 'Sven Tasnadi',
- 'uploader_id': 'sventasnadi',
- 'duration': 5627.428,
- 'upload_date': '20140712',
+ 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+ 'description': '',
+ 'duration': 5627,
+ 'timestamp': 1406033781,
+ 'upload_date': '20140722',
+ 'uploader': 'Dan Drastic',
+ 'uploader_id': '71170',
'view_count': int,
'comment_count': int,
- 'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- upload_date = mobj.group('upload_date')
-
- rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
- req = compat_urllib_request.Request(
- 'http://www.play.fm/flexRead/recording', data=rec_data)
- req.add_header('Content-Type', 'application/x-www-form-urlencoded')
- rec_doc = self._download_xml(req, video_id)
+ slug = mobj.group('slug')
- error_node = rec_doc.find('./error')
- if error_node is not None:
- raise ExtractorError('An error occured: %s (code %s)' % (
- error_node.text, rec_doc.find('./status').text))
+ recordings = self._download_json(
+ 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
- recording = rec_doc.find('./recording')
- title = recording.find('./title').text
- view_count = str_to_int(recording.find('./stats/playcount').text)
- comment_count = str_to_int(recording.find('./stats/comments').text)
- duration = float_or_none(recording.find('./duration').text, scale=1000)
- thumbnail = recording.find('./image').text
+ error = recordings.get('error')
+ if isinstance(error, dict):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error.get('message')),
+ expected=True)
- artist = recording.find('./artists/artist')
- uploader = artist.find('./name').text
- uploader_id = artist.find('./slug').text
-
- video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
- 'http:', recording.find('./url').text,
- recording.find('./_class').text, recording.find('./file_id').text,
- rec_doc.find('./uuid').text, video_id,
- rec_doc.find('./jingle/file_id').text,
- 'http%3A%2F%2Fwww.play.fm%2Fplayer',
- )
+ audio_url = recordings['audio']
+ video_id = compat_str(recordings.get('id') or video_id)
+ title = recordings['title']
+ description = recordings.get('description')
+ duration = int_or_none(recordings.get('recordingDuration'))
+ timestamp = parse_iso8601(recordings.get('created_at'))
+ uploader = recordings.get('page', {}).get('title')
+ uploader_id = compat_str(recordings.get('page', {}).get('id'))
+ view_count = int_or_none(recordings.get('playCount'))
+ comment_count = int_or_none(recordings.get('commentCount'))
+ categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
return {
'id': video_id,
- 'url': video_url,
- 'ext': 'mp3',
- 'filesize': int_or_none(recording.find('./size').text),
+ 'url': audio_url,
'title': title,
- 'upload_date': upload_date,
- 'view_count': view_count,
- 'comment_count': comment_count,
+ 'description': description,
'duration': duration,
- 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
}
}
def _extract_count(self, pattern, webpage, name):
- count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
- if count:
- count = str_to_int(count)
- return count
+ return str_to_int(self._search_regex(
+ pattern, webpage, '%s count' % name, fatal=False))
def _real_extract(self, url):
video_id = self._match_id(url)
if thumbnail:
thumbnail = compat_urllib_parse.unquote(thumbnail)
- view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
- like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
- dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+ view_count = self._extract_count(
+ r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+ like_count = self._extract_count(
+ r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+ dislike_count = self._extract_count(
+ r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
comment_count = self._extract_count(
- r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+ r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_strdate,
+)
+
+
+class PornoVoisinesIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
+
+ _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
+ '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
+
+ _SERVER_NUMBERS = (1, 2)
+
+ _TEST = {
+ 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
+ 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
+ 'info_dict': {
+ 'id': '1285',
+ 'display_id': 'recherche-appartement',
+ 'ext': 'mp4',
+ 'title': 'Recherche appartement',
+ 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20140925',
+ 'duration': 120,
+ 'view_count': int,
+ 'average_rating': float,
+ 'categories': ['Débutante', 'Scénario', 'Sodomie'],
+ 'age_limit': 18,
+ }
+ }
+
+ @classmethod
+ def build_video_url(cls, num):
+ return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self.build_video_url(video_id)
+
+ title = self._html_search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
+ description = self._html_search_regex(
+ r'<article id="descriptif">(.+?)</article>',
+ webpage, "description", fatal=False, flags=re.DOTALL)
+
+ thumbnail = self._search_regex(
+ r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
+ webpage, 'thumbnail', fatal=False)
+ if thumbnail:
+ thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
+
+ upload_date = unified_strdate(self._search_regex(
+ r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
+ duration = int_or_none(self._search_regex(
+ 'Durée (\d+)', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'(\d+) vues', webpage, 'view count', fatal=False))
+ average_rating = self._search_regex(
+ r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
+ if average_rating:
+ average_rating = float_or_none(average_rating.replace(',', '.'))
+
+ categories = self._html_search_meta(
+ 'keywords', webpage, 'categories', fatal=False)
+ if categories:
+ categories = [category.strip() for category in categories.split(',')]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'age_limit': 18,
+ }
)
from ..utils import (
unified_strdate,
+ int_or_none,
)
'info_dict': {
'id': '2104602',
'ext': 'mp4',
- 'title': 'Staffel 2, Episode 18 - Jahresrückblick',
+ 'title': 'Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231',
'duration': 5845.04,
urls_sources = urls_sources.values()
def fix_bitrate(bitrate):
+ bitrate = int_or_none(bitrate)
+ if not bitrate:
+ return None
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
for source in urls_sources:
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import(
+ unified_strdate,
+ str_to_int,
+)
+
+
+class RadioJavanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
+ 'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
+ 'info_dict': {
+ 'id': 'chaartaar-ashoobam',
+ 'ext': 'mp4',
+ 'title': 'Chaartaar - Ashoobam',
+ 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'upload_date': '20150215',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = [{
+ 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
+ 'format_id': '%sp' % height,
+ 'height': int(height),
+ } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class="date_added">Date added: ([^<]+)<',
+ webpage, 'upload date', fatal=False))
+
+ view_count = str_to_int(self._search_regex(
+ r'class="views">Plays: ([\d,]+)',
+ webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) likes',
+ webpage, 'like count', fatal=False))
+ dislike_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) dislikes',
+ webpage, 'dislike count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'formats': formats,
+ }
class RaiIE(InfoExtractor):
- _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
+ _VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
_TESTS = [
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'description': 'Edizione delle ore 20:30 ',
}
},
+ {
+ 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
+ 'md5': '02b64456f7cc09f96ff14e7dd489017e',
+ 'info_dict': {
+ 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
+ 'ext': 'flv',
+ 'title': 'Il Candidato - Primo episodio: "Le Primarie"',
+ 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
+ 'uploader': 'RaiTre',
+ }
+ }
]
+ def _extract_relinker_url(self, webpage):
+ return self._proto_relative_url(self._search_regex(
+ [r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'],
+ webpage, 'relinker url', default=None))
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ host = mobj.group('host')
- media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
+ webpage = self._download_webpage(url, video_id)
- title = media.get('name')
- description = media.get('desc')
- thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
- duration = parse_duration(media.get('length'))
- uploader = media.get('author')
- upload_date = unified_strdate(media.get('date'))
+ relinker_url = self._extract_relinker_url(webpage)
- formats = []
+ if not relinker_url:
+ iframe_path = self._search_regex(
+ r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
+ webpage, 'iframe')
+ webpage = self._download_webpage(
+ '%s/%s' % (host, iframe_path), video_id)
+ relinker_url = self._extract_relinker_url(webpage)
- for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
- media_url = media.get(format_id)
- if not media_url:
- continue
- formats.append({
+ relinker = self._download_json(
+ '%s&output=47' % relinker_url, video_id)
+
+ media_url = relinker['video'][0]
+ ct = relinker.get('ct')
+ if ct == 'f4m':
+ formats = self._extract_f4m_formats(
+ media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id)
+ else:
+ formats = [{
'url': media_url,
- 'format_id': format_id,
- 'ext': 'mp4',
- })
+ 'format_id': ct,
+ }]
- subtitles = self.extract_subtitles(video_id, url)
+ json_link = self._html_search_meta(
+ 'jsonlink', webpage, 'JSON link', default=None)
+ if json_link:
+ media = self._download_json(
+ host + json_link, video_id, 'Downloading video JSON')
+ title = media.get('name')
+ description = media.get('desc')
+ thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
+ duration = parse_duration(media.get('length'))
+ uploader = media.get('author')
+ upload_date = unified_strdate(media.get('date'))
+ else:
+ title = (self._search_regex(
+ r'var\s+videoTitolo\s*=\s*"(.+?)";',
+ webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"')
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = None
+ uploader = self._html_search_meta('Editore', webpage, 'uploader')
+ upload_date = unified_strdate(self._html_search_meta(
+ 'item-date', webpage, 'upload date', default=None))
+
+ subtitles = self.extract_subtitles(video_id, webpage)
return {
'id': video_id,
'subtitles': subtitles,
}
- def _get_subtitles(self, video_id, url):
- webpage = self._download_webpage(url, video_id)
+ def _get_subtitles(self, video_id, webpage):
subtitles = {}
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
if m:
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import ExtractorError
class RedTubeIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.redtube.com/66418',
+ 'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': {
'id': '66418',
'ext': 'mp4',
- "title": "Sucked on a toilet",
- "age_limit": 18,
+ 'title': 'Sucked on a toilet',
+ 'age_limit': 18,
}
}
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
+ raise ExtractorError('Video %s has been removed' % video_id, expected=True)
+
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
video_title = self._html_search_regex(
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
+ ExtractorError,
float_or_none,
remove_end,
+ std_headers,
struct_unpack,
)
'only_matching': True,
}]
+ def _real_initialize(self):
+ user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
+ manager_info = self._download_json(
+ 'http://www.rtve.es/odin/loki/' + user_agent_b64,
+ None, 'Fetching manager info')
+ self._manager = manager_info['manager']
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info = self._download_json(
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
video_id)['page']['items'][0]
- png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
+ if info['state'] == 'DESPU':
+ raise ExtractorError('The video is no longer available', expected=True)
+ png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'):
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+ std_headers,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+ _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
+ _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
+ _NETRC_MACHINE = 'safari'
+
+ _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+ _API_FORMAT = 'json'
+
+ LOGGED_IN = False
+
+ def _real_initialize(self):
+ # We only need to log in once for courses or individual videos
+ if not self.LOGGED_IN:
+ self._login()
+ SafariBaseIE.LOGGED_IN = True
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ raise ExtractorError(
+ self._ACCOUNT_CREDENTIALS_HINT,
+ expected=True)
+
+ headers = std_headers
+ if 'Referer' not in headers:
+ headers['Referer'] = self._LOGIN_URL
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None,
+ 'Downloading login form')
+
+ csrf = self._html_search_regex(
+ r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
+ login_page, 'csrf token')
+
+ login_form = {
+ 'csrfmiddlewaretoken': csrf,
+ 'email': username,
+ 'password1': password,
+ 'login': 'Sign In',
+ 'next': '',
+ }
+
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
+ login_page = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+ raise ExtractorError(
+ 'Login failed; make sure your credentials are correct and try again.',
+ expected=True)
+
+ self.to_screen('Login successful')
+
+
+class SafariIE(SafariBaseIE):
+ IE_NAME = 'safari'
+ IE_DESC = 'safaribooksonline.com online video'
+ _VALID_URL = r'''(?x)https?://
+ (?:www\.)?safaribooksonline\.com/
+ (?:
+ library/view/[^/]+|
+ api/v1/book
+ )/
+ (?P<course_id>\d+)/
+ (?:chapter(?:-content)?/)?
+ (?P<part>part\d+)\.html
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+ 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+ 'info_dict': {
+ 'id': '2842601850001',
+ 'ext': 'mp4',
+ 'title': 'Introduction',
+ },
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ course_id = mobj.group('course_id')
+ part = mobj.group('part')
+
+ webpage = self._download_webpage(
+ '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
+ part)
+
+ bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+ if not bc_url:
+ raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+
+ return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+
+
+class SafariCourseIE(SafariBaseIE):
+ IE_NAME = 'safari:course'
+ IE_DESC = 'safaribooksonline.com online courses'
+
+ _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'info_dict': {
+ 'id': '9780133392838',
+ 'title': 'Hadoop Fundamentals LiveLessons',
+ },
+ 'playlist_count': 22,
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ course_json = self._download_json(
+ '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+ course_id, 'Downloading course JSON')
+
+ if 'chapters' not in course_json:
+ raise ExtractorError(
+ 'No chapters found for course %s' % course_id, expected=True)
+
+ entries = [
+ self.url_result(chapter, 'Safari')
+ for chapter in course_json['chapters']]
+
+ course_title = course_json['title']
+
+ return self.playlist_result(entries, course_id, course_title)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex(
- r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
+ r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
webpage, 'slideshare object')
info = json.loads(slideshare_obj)
if info['slideshow']['type'] != 'video':
class SoundcloudSetIE(SoundcloudIE):
- _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
IE_NAME = 'soundcloud:set'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
class SoundcloudUserIE(SoundcloudIE):
- _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band',
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SpankBangIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
+ _TEST = {
+ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
+ 'md5': '1cc433e1d6aa14bc376535b8679302f7',
+ 'info_dict': {
+ 'id': '3vvn',
+ 'ext': 'mp4',
+ 'title': 'fantasy solo',
+ 'description': 'dillion harper masturbates on a bed',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'silly2587',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ stream_key = self._html_search_regex(
+ r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
+ webpage, 'stream key')
+
+ formats = [{
+ 'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
+ 'ext': 'mp4',
+ 'format_id': '%sp' % height,
+ 'height': int(height),
+ } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
+ self._sort_formats(formats)
+
+ title = self._html_search_regex(
+ r'(?s)<h1>(.+?)</h1>', webpage, 'title')
+ description = self._search_regex(
+ r'class="desc"[^>]*>([^<]+)',
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(webpage)
+ uploader = self._search_regex(
+ r'class="user"[^>]*>([^<]+)',
+ webpage, 'uploader', fatal=False)
+
+ age_limit = self._rta_search(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'formats': formats,
+ 'age_limit': age_limit,
+ }
import re
from .common import InfoExtractor
-from ..utils import qualities
+from ..utils import (
+ ExtractorError,
+ qualities,
+)
class TeamcocoIE(InfoExtractor):
'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+ 'duration': 504,
'age_limit': 0,
}
}, {
'ext': 'mp4',
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+ 'duration': 288,
'age_limit': 0,
}
}
video_id = self._html_search_regex(
self._VIDEO_ID_REGEXES, webpage, 'video id')
- embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
- embed = self._download_webpage(
- embed_url, video_id, 'Downloading embed page')
-
- player_data = self._parse_json(self._search_regex(
- r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
+ preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage)
+ if not preloads:
+ raise ExtractorError('Preload information could not be extracted')
+ preload = max([(len(p), p) for p in preloads])[1]
data = self._parse_json(
- base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
+ base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id)
formats = []
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
for filed in data['files']:
- m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
- if m_format is not None:
- format_id = m_format.group(1)
+ if filed['type'] == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ filed['url'], video_id, ext='mp4'))
else:
- format_id = filed['bitrate']
- tbr = (
- int(filed['bitrate'])
- if filed['bitrate'].isdigit()
- else None)
+ m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
+ if m_format is not None:
+ format_id = m_format.group(1)
+ else:
+ format_id = filed['bitrate']
+ tbr = (
+ int(filed['bitrate'])
+ if filed['bitrate'].isdigit()
+ else None)
- formats.append({
- 'url': filed['url'],
- 'ext': 'mp4',
- 'tbr': tbr,
- 'format_id': format_id,
- 'quality': get_quality(format_id),
- })
+ formats.append({
+ 'url': filed['url'],
+ 'ext': 'mp4',
+ 'tbr': tbr,
+ 'format_id': format_id,
+ 'quality': get_quality(format_id),
+ })
self._sort_formats(formats)
'title': data['title'],
'thumbnail': data.get('thumb', {}).get('href'),
'description': data.get('teaser'),
+ 'duration': data.get('duration'),
'age_limit': self._family_friendly_search(webpage),
}
from .common import InfoExtractor
-from ..compat import (
- compat_str,
-)
+from ..compat import compat_str
+from ..utils import int_or_none
class TEDIE(InfoExtractor):
finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo:
f.update(finfo)
- else:
- # Use rtmp downloads
- formats = [{
- 'format_id': f['name'],
- 'url': talk_info['streamer'],
- 'play_path': f['file'],
- 'ext': 'flv',
- 'width': f['width'],
- 'height': f['height'],
- 'tbr': f['bitrate'],
- } for f in talk_info['resources']['rtmp']]
+
+ for format_id, resources in talk_info['resources'].items():
+ if format_id == 'h264':
+ for resource in resources:
+ bitrate = int_or_none(resource.get('bitrate'))
+ formats.append({
+ 'url': resource['file'],
+ 'format_id': '%s-%sk' % (format_id, bitrate),
+ 'tbr': bitrate,
+ })
+ elif format_id == 'rtmp':
+ streamer = talk_info.get('streamer')
+ if not streamer:
+ continue
+ for resource in resources:
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, resource.get('name')),
+ 'url': streamer,
+ 'play_path': resource['file'],
+ 'ext': 'flv',
+ 'width': int_or_none(resource.get('width')),
+ 'height': int_or_none(resource.get('height')),
+ 'tbr': int_or_none(resource.get('bitrate')),
+ })
+ elif format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ resources.get('stream'), video_name, 'mp4', m3u8_id=format_id))
+
+ audio_download = talk_info.get('audioDownload')
+ if audio_download:
+ formats.append({
+ 'url': audio_download,
+ 'format_id': 'audio',
+ })
+
self._sort_formats(formats)
video_id = compat_str(talk_info['id'])
ExtractorError,
xpath_with_ns,
unsmuggle_url,
+ int_or_none,
)
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|theplatform:)(?P<id>[^/\?&]+)'''
- _TEST = {
+ _TESTS = [{
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
'info_dict': {
# rtmp download
'skip_download': True,
},
- }
+ }, {
+ # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
+ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
+ 'info_dict': {
+ 'id': '22d_qsQ6MIRT',
+ 'ext': 'flv',
+ 'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
+ 'title': 'Tesla Model S: A second step towards a cleaner motoring future',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }]
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
- if n.attrib.get('title') == 'Geographic Restriction')
+ if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
except StopIteration:
pass
else:
head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body'))
- f4m_node = body.find(_x('smil:seq//smil:video'))
+ f4m_node = body.find(_x('smil:seq//smil:video')) or body.find(_x('smil:seq/smil:video'))
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url:
else:
formats = []
switch = body.find(_x('smil:switch'))
+ if switch is None:
+ switch = body.find(_x('smil:par//smil:switch')) or body.find(_x('smil:par/smil:switch'))
+ if switch is None:
+ switch = body.find(_x('smil:par'))
if switch is not None:
base_url = head.find(_x('smil:meta')).attrib['base']
for f in switch.findall(_x('smil:video')):
attr = f.attrib
- width = int(attr['width'])
- height = int(attr['height'])
- vbr = int(attr['system-bitrate']) // 1000
+ width = int_or_none(attr.get('width'))
+ height = int_or_none(attr.get('height'))
+ vbr = int_or_none(attr.get('system-bitrate'), 1000)
format_id = '%dx%d_%dk' % (width, height, vbr)
formats.append({
'format_id': format_id,
'vbr': vbr,
})
else:
- switch = body.find(_x('smil:seq//smil:switch'))
+ switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch'))
for f in switch.findall(_x('smil:video')):
attr = f.attrib
- vbr = int(attr['system-bitrate']) // 1000
+ vbr = int_or_none(attr.get('system-bitrate'), 1000)
ext = determine_ext(attr['src'])
if ext == 'once':
ext = 'mp4'
'formats': formats,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
- 'duration': info['duration'] // 1000,
+ 'duration': int_or_none(info.get('duration'), 1000),
}
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+# 22Tracks regularly replace the audio tracks that can be streamed on their
+# site. The tracks usually expire after 1 months, so we can't add tests.
+
+
+class TwentyTwoTracksIE(InfoExtractor):
+ _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
+ IE_NAME = '22tracks:track'
+
+ _API_BASE = 'http://22tracks.com/api'
+
+ def _extract_info(self, city, genre_name, track_id=None):
+ item_id = track_id if track_id else genre_name
+
+ cities = self._download_json(
+ '%s/cities' % self._API_BASE, item_id,
+ 'Downloading cities info',
+ 'Unable to download cities info')
+ city_id = [x['id'] for x in cities if x['slug'] == city][0]
+
+ genres = self._download_json(
+ '%s/genres/%s' % (self._API_BASE, city_id), item_id,
+ 'Downloading %s genres info' % city,
+ 'Unable to download %s genres info' % city)
+ genre = [x for x in genres if x['slug'] == genre_name][0]
+ genre_id = genre['id']
+
+ tracks = self._download_json(
+ '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
+ 'Downloading %s genre tracks info' % genre_name,
+ 'Unable to download track info')
+
+ return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
+
+ def _get_track_url(self, filename, track_id):
+ token = self._download_json(
+ 'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
+ track_id, 'Downloading token', 'Unable to download token')
+ return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
+
+ def _extract_track_info(self, track_info, track_id):
+ download_url = self._get_track_url(track_info['filename'], track_id)
+ title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
+ return {
+ 'id': track_id,
+ 'url': download_url,
+ 'ext': 'mp3',
+ 'title': title,
+ 'duration': int_or_none(track_info.get('duration')),
+ 'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ city = mobj.group('city')
+ genre = mobj.group('genre')
+ track_id = mobj.group('id')
+
+ track_info = self._extract_info(city, genre, track_id)
+ return self._extract_track_info(track_info, track_id)
+
+
+class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
+ _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
+ IE_NAME = '22tracks:genre'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ city = mobj.group('city')
+ genre = mobj.group('genre')
+
+ genre_title, tracks = self._extract_info(city, genre)
+
+ entries = [
+ self._extract_track_info(track_info, track_info['id'])
+ for track_info in tracks]
+
+ return self.playlist_result(entries, genre, genre_title)
class TwitchVideoIE(TwitchItemBaseIE):
IE_NAME = 'twitch:video'
- _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'video'
_ITEM_SHORTCUT = 'a'
class TwitchChapterIE(TwitchItemBaseIE):
IE_NAME = 'twitch:chapter'
- _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'chapter'
_ITEM_SHORTCUT = 'c'
class TwitchVodIE(TwitchItemBaseIE):
IE_NAME = 'twitch:vod'
- _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'vod'
_ITEM_SHORTCUT = 'v'
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+from .common import InfoExtractor
+from ..utils import js_to_json
+from ..compat import compat_urlparse
+
+
+class UDNEmbedIE(InfoExtractor):
+ _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://video.udn.com/embed/news/300040',
+ 'md5': 'de06b4c90b042c128395a88f0384817e',
+ 'info_dict': {
+ 'id': '300040',
+ 'ext': 'mp4',
+ 'title': '生物老師男變女 全校挺"做自己"',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': '//video.udn.com/embed/news/300040',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ page = self._download_webpage(url, video_id)
+
+ options = json.loads(js_to_json(self._html_search_regex(
+ r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
+
+ video_urls = options['video']
+
+ if video_urls.get('youtube'):
+ return self.url_result(video_urls.get('youtube'), 'Youtube')
+
+ try:
+ del video_urls['youtube']
+ except KeyError:
+ pass
+
+ formats = [{
+ 'url': self._download_webpage(
+ compat_urlparse.urljoin(url, api_url), video_id,
+ 'retrieve url for %s video' % video_type),
+ 'format_id': video_type,
+ 'preference': 0 if video_type == 'mp4' else -1,
+ } for video_type, api_url in video_urls.items()]
+
+ self._sort_formats(formats)
+
+ thumbnail = None
+
+ if options.get('gallery') and len(options['gallery']):
+ thumbnail = options['gallery'][0].get('original')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': options['title'],
+ 'thumbnail': thumbnail
+ }
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(url, video_id)
deliver_url = self._search_regex(
title = clean_html((
self._html_search_regex(
r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
- webpage, 'title', default=None)
- or self._search_regex(
+ webpage, 'title', default=None) or
+ self._search_regex(
r"var\s+nameVideo\s*=\s*'([^']+)'",
deliver_page, 'title')))
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class Varzesh3IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
+ 'md5': '2a933874cb7dce4366075281eb49e855',
+ 'info_dict': {
+ 'id': '76337',
+ 'ext': 'mp4',
+ 'title': '۵ واکنش برتر دروازهبانان؛هفته ۲۶ بوندسلیگا',
+ 'description': 'فصل ۲۰۱۵-۲۰۱۴',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r'<source[^>]+src="([^"]+)"', webpage, 'video url')
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'(?s)<div class="matn">(.+?)</div>',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ video_id = self._search_regex(
+ r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
+ webpage, display_id, default=display_id)
+
+ return {
+ 'url': video_url,
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class VesselIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
+ _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
+ _LOGIN_URL = 'https://www.vessel.com/api/account/login'
+ _NETRC_MACHINE = 'vessel'
+ _TEST = {
+ 'url': 'https://www.vessel.com/videos/HDN7G5UMs',
+ 'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
+ 'info_dict': {
+ 'id': 'HDN7G5UMs',
+ 'ext': 'mp4',
+ 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20150317',
+ 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
+ 'timestamp': int,
+ },
+ }
+
+ @staticmethod
+ def make_json_request(url, data):
+ payload = json.dumps(data).encode('utf-8')
+ req = compat_urllib_request.Request(url, payload)
+ req.add_header('Content-Type', 'application/json; charset=utf-8')
+ return req
+
+ @staticmethod
+ def find_assets(data, asset_type):
+ for asset in data.get('assets', []):
+ if asset.get('type') == asset_type:
+ yield asset
+
+ def _check_access_rights(self, data):
+ access_info = data.get('__view', {})
+ if not access_info.get('allow_access', True):
+ err_code = access_info.get('error_code') or ''
+ if err_code == 'ITEM_PAID_ONLY':
+ raise ExtractorError(
+ 'This video requires subscription.', expected=True)
+ else:
+ raise ExtractorError(
+ 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+ self.report_login()
+ data = {
+ 'client_id': 'web',
+ 'type': 'password',
+ 'user_key': username,
+ 'password': password,
+ }
+ login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
+ self._download_webpage(login_request, None, False, 'Wrong login info')
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ data = self._parse_json(self._search_regex(
+ r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
+ asset_id = data['model']['data']['id']
+
+ req = VesselIE.make_json_request(
+ self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
+ data = self._download_json(req, video_id)
+
+ self._check_access_rights(data)
+
+ try:
+ video_asset = next(VesselIE.find_assets(data, 'video'))
+ except StopIteration:
+ raise ExtractorError('No video assets found')
+
+ formats = []
+ for f in video_asset.get('sources', []):
+ if f['name'] == 'hls-index':
+ formats.extend(self._extract_m3u8_formats(
+ f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
+ else:
+ formats.append({
+ 'format_id': f['name'],
+ 'tbr': f.get('bitrate'),
+ 'height': f.get('height'),
+ 'width': f.get('width'),
+ 'url': f['location'],
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for im_asset in VesselIE.find_assets(data, 'image'):
+ thumbnails.append({
+ 'url': im_asset['location'],
+ 'width': im_asset.get('width', 0),
+ 'height': im_asset.get('height', 0),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': data['title'],
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': data.get('short_description'),
+ 'duration': data.get('duration'),
+ 'comment_count': data.get('comment_count'),
+ 'like_count': data.get('like_count'),
+ 'view_count': data.get('view_count'),
+ 'timestamp': parse_iso8601(data.get('released_at')),
+ }
# and latter we extract those that are Vimeo specific.
self.report_extraction(video_id)
+ vimeo_config = self._search_regex(
+ r'vimeo\.config\s*=\s*({.+?});', webpage,
+ 'vimeo config', default=None)
+ if vimeo_config:
+ seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
+ if seed_status.get('state') == 'failed':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+ expected=True)
+
# Extract the config JSON
try:
try:
class VineIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
+ _TESTS = [{
'url': 'https://vine.co/v/b9KOOWX7HUx',
'md5': '2f36fed6235b16da96ce9b4dc890940d',
'info_dict': {
'uploader': 'Jack Dorsey',
'uploader_id': '76',
},
- }
+ }, {
+ 'url': 'https://vine.co/v/MYxVapFvz2z',
+ 'md5': '7b9a7cbc76734424ff942eb52c8f1065',
+ 'info_dict': {
+ 'id': 'MYxVapFvz2z',
+ 'ext': 'mp4',
+ 'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+ 'alt_title': 'Vine by Luna',
+ 'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+ 'upload_date': '20140815',
+ 'uploader': 'Luna',
+ 'uploader_id': '1102363502380728320',
+ },
+ }, {
+ 'url': 'https://vine.co/v/bxVjBbZlPUH',
+ 'md5': 'ea27decea3fa670625aac92771a96b73',
+ 'info_dict': {
+ 'id': 'bxVjBbZlPUH',
+ 'ext': 'mp4',
+ 'title': '#mw3 #ac130 #killcam #angelofdeath',
+ 'alt_title': 'Vine by Z3k3',
+ 'description': '#mw3 #ac130 #killcam #angelofdeath',
+ 'upload_date': '20130430',
+ 'uploader': 'Z3k3',
+ 'uploader_id': '936470460173008896',
+ },
+ }, {
+ 'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
- data = json.loads(self._html_search_regex(
- r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
+ data = self._parse_json(
+ self._html_search_regex(
+ r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id,
+ webpage, 'vine data'),
+ video_id)
formats = [{
'format_id': '%(format)s-%(rate)s' % f,
'vcodec': f['format'],
'quality': f['rate'],
'url': f['videoUrl'],
- } for f in data['videoUrls'] if f.get('rate')]
+ } for f in data['videoUrls']]
self._sort_formats(formats)
'only_matching': True,
}]
+ @staticmethod
+ def base64_decode_utf8(data):
+ return base64.b64decode(data.encode('utf-8')).decode('utf-8')
+
+ @staticmethod
+ def base64_encode_utf8(data):
+ return base64.b64encode(data.encode('utf-8')).decode('utf-8')
+
def _extract_flv_config(self, media_id):
- base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+ base64_media_id = self.base64_encode_utf8(media_id)
flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
'flv config')
prop_dict = {}
for prop in flv_config.findall('./property'):
- prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+ prop_id = self.base64_decode_utf8(prop.attrib['id'])
# CDATA may be empty in flv config
if not prop.text:
continue
- encoded_content = base64.b64decode(prop.text).decode('utf-8')
+ encoded_content = self.base64_decode_utf8(prop.text)
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
return prop_dict
int_or_none,
)
+from .nbc import NBCSportsVPlayerIE
+
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
}, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True,
+ }, {
+ 'note': 'NBC Sports embeds',
+ 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
}
]
items = json.loads(items_json)
video_id = items[0]['id']
return self._get_info(video_id, display_id, webpage)
+ # Look for NBCSports iframes
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
webpage, 'JSON parameters')
try:
params = json.loads(json_params)
- except:
+ except ValueError:
raise ExtractorError('Invalid JSON')
self.report_extraction(video_id)
'uploader': '孫艾倫',
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
},
- }
+ },
+ # url_encoded_fmt_stream_map is empty string
+ {
+ 'url': 'qEJwOuvDf7I',
+ 'info_dict': {
+ 'id': 'qEJwOuvDf7I',
+ 'ext': 'mp4',
+ 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+ 'description': '',
+ 'upload_date': '20150404',
+ 'uploader_id': 'spbelect',
+ 'uploader': 'Наблюдатели Петербурга',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
]
def __init__(self, *args, **kwargs):
errnote='Could not download DASH manifest')
formats = []
- for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
- url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
- if url_el is None:
- continue
- format_id = r.attrib['id']
- video_url = url_el.text
- filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
- f = {
- 'format_id': format_id,
- 'url': video_url,
- 'width': int_or_none(r.attrib.get('width')),
- 'height': int_or_none(r.attrib.get('height')),
- 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
- 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
- 'filesize': filesize,
- 'fps': int_or_none(r.attrib.get('frameRate')),
- }
- try:
- existing_format = next(
- fo for fo in formats
- if fo['format_id'] == format_id)
- except StopIteration:
- full_info = self._formats.get(format_id, {}).copy()
- full_info.update(f)
- formats.append(full_info)
- else:
- existing_format.update(f)
+ for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
+ mime_type = a.attrib.get('mimeType')
+ for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+ url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+ if url_el is None:
+ continue
+ if mime_type == 'text/vtt':
+ # TODO implement WebVTT downloading
+ pass
+ elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+ format_id = r.attrib['id']
+ video_url = url_el.text
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+ f = {
+ 'format_id': format_id,
+ 'url': video_url,
+ 'width': int_or_none(r.attrib.get('width')),
+ 'height': int_or_none(r.attrib.get('height')),
+ 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+ 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+ 'filesize': filesize,
+ 'fps': int_or_none(r.attrib.get('frameRate')),
+ }
+ try:
+ existing_format = next(
+ fo for fo in formats
+ if fo['format_id'] == format_id)
+ except StopIteration:
+ full_info = self._formats.get(format_id, {}).copy()
+ full_info.update(f)
+ formats.append(full_info)
+ else:
+ existing_format.update(f)
+ else:
+ self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
def _real_extract(self, url):
args = ytplayer_config['args']
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
- if 'url_encoded_fmt_stream_map' not in args:
+ if not args.get('url_encoded_fmt_stream_map'):
raise ValueError('No stream_map present') # caught below
except ValueError:
# We fallback to the get_video_info pages (used by the embed page)
return self.playlist_result(url_results, playlist_id, title)
- def _real_extract(self, url):
- # Extract playlist id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- playlist_id = mobj.group(1) or mobj.group(2)
-
- # Check if it's a video-specific URL
- query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- if 'v' in query_dict:
- video_id = query_dict['v'][0]
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result(video_id, 'Youtube', video_id=video_id)
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
-
- if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
- # Mixes require a custom extraction process
- return self._extract_mix(playlist_id)
-
+ def _extract_playlist(self, playlist_id):
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
more_widget_html = content_html = page
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)
+ def _real_extract(self, url):
+ # Extract playlist id
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ playlist_id = mobj.group(1) or mobj.group(2)
+
+ # Check if it's a video-specific URL
+ query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ if 'v' in query_dict:
+ video_id = query_dict['v'][0]
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(video_id, 'Youtube', video_id=video_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+ if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+ # Mixes require a custom extraction process
+ return self._extract_mix(playlist_id)
+
+ return self._extract_playlist(playlist_id)
+
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_NAME = 'youtube:recommended'
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = 'Youtube Recommended videos'
-class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+ IE_NAME = 'youtube:watchlater'
IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
- _FEED_NAME = 'watch_later'
- _PLAYLIST_TITLE = 'Youtube Watch Later'
- _PERSONAL_FEED = True
+ _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
+
+ _TESTS = [] # override PlaylistIE tests
+
+ def _real_extract(self, url):
+ return self._extract_playlist('WL')
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+ IE_NAME = 'youtube:history'
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
_FEED_NAME = 'history'
compat_kwargs,
)
from .utils import (
+ preferredencoding,
write_string,
)
from .version import __version__
general.add_option(
'-h', '--help',
action='help',
- help='print this help text and exit')
+ help='Print this help text and exit')
general.add_option(
'-v', '--version',
action='version',
- help='print program version and exit')
+ help='Print program version and exit')
general.add_option(
'-U', '--update',
action='store_true', dest='update_self',
- help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+ help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option(
'-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', default=False,
- help='continue on download errors, for example to skip unavailable videos in a playlist')
+ help='Continue on download errors, for example to skip unavailable videos in a playlist')
general.add_option(
'--abort-on-error',
action='store_false', dest='ignoreerrors',
general.add_option(
'--dump-user-agent',
action='store_true', dest='dump_user_agent', default=False,
- help='display the current browser identification')
+ help='Display the current browser identification')
general.add_option(
'--list-extractors',
action='store_true', dest='list_extractors', default=False,
general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
- help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+ help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
general.add_option(
'--ignore-config',
action='store_true',
'--no-color', '--no-colors',
action='store_true', dest='no_color',
default=False,
- help='Do not emit color codes in output.')
+ help='Do not emit color codes in output')
network = optparse.OptionGroup(parser, 'Network Options')
network.add_option(
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
- help='playlist video to start at (default is %default)')
+ help='Playlist video to start at (default is %default)')
selection.add_option(
'--playlist-end',
dest='playlistend', metavar='NUMBER', default=None, type=int,
- help='playlist video to end at (default is last)')
+ help='Playlist video to end at (default is last)')
selection.add_option(
'--playlist-items',
dest='playlist_items', metavar='ITEM_SPEC', default=None,
- help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+ help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
- help='download only matching titles (regex or caseless sub-string)')
+ help='Download only matching titles (regex or caseless sub-string)')
selection.add_option(
'--reject-title',
dest='rejecttitle', metavar='REGEX',
- help='skip download for matching titles (regex or caseless sub-string)')
+ help='Skip download for matching titles (regex or caseless sub-string)')
selection.add_option(
'--max-downloads',
dest='max_downloads', metavar='NUMBER', type=int, default=None,
selection.add_option(
'--date',
metavar='DATE', dest='date', default=None,
- help='download only videos uploaded in this date')
+ help='Download only videos uploaded in this date')
selection.add_option(
'--datebefore',
metavar='DATE', dest='datebefore', default=None,
- help='download only videos uploaded on or before this date (i.e. inclusive)')
+ help='Download only videos uploaded on or before this date (i.e. inclusive)')
selection.add_option(
'--dateafter',
metavar='DATE', dest='dateafter', default=None,
- help='download only videos uploaded on or after this date (i.e. inclusive)')
+ help='Download only videos uploaded on or after this date (i.e. inclusive)')
selection.add_option(
'--min-views',
metavar='COUNT', dest='min_views', default=None, type=int,
- help='Do not download any videos with less than COUNT views',)
+ help='Do not download any videos with less than COUNT views')
selection.add_option(
'--max-views',
metavar='COUNT', dest='max_views', default=None, type=int,
'--match-filter',
metavar='FILTER', dest='match_filter', default=None,
help=(
- '(Experimental) Generic video filter. '
+ 'Generic video filter (experimental). '
'Specify any key (see help for -o for a list of available keys) to'
' match if the key is present, '
'!key to check if the key is not present,'
selection.add_option(
'--no-playlist',
action='store_true', dest='noplaylist', default=False,
- help='If the URL refers to a video and a playlist, download only the video.')
+ help='Download only the video, if the URL refers to a video and a playlist.')
selection.add_option(
'--yes-playlist',
action='store_false', dest='noplaylist', default=False,
- help='If the URL refers to a video and a playlist, download the playlist.')
+ help='Download the playlist, if the URL refers to a video and a playlist.')
selection.add_option(
'--age-limit',
metavar='YEARS', dest='age_limit', default=None, type=int,
- help='download only videos suitable for the given age')
+ help='Download only videos suitable for the given age')
selection.add_option(
'--download-archive', metavar='FILE',
dest='download_archive',
authentication.add_option(
'-u', '--username',
dest='username', metavar='USERNAME',
- help='login with this account ID')
+ help='Login with this account ID')
authentication.add_option(
'-p', '--password',
dest='password', metavar='PASSWORD',
- help='account password. If this option is left out, youtube-dl will ask interactively.')
+ help='Account password. If this option is left out, youtube-dl will ask interactively.')
authentication.add_option(
'-2', '--twofactor',
dest='twofactor', metavar='TWOFACTOR',
- help='two-factor auth code')
+ help='Two-factor auth code')
authentication.add_option(
'-n', '--netrc',
action='store_true', dest='usenetrc', default=False,
- help='use .netrc authentication data')
+ help='Use .netrc authentication data')
authentication.add_option(
'--video-password',
dest='videopassword', metavar='PASSWORD',
- help='video password (vimeo, smotri)')
+ help='Video password (vimeo, smotri)')
video_format = optparse.OptionGroup(parser, 'Video Format Options')
video_format.add_option(
'-f', '--format',
action='store', dest='format', metavar='FORMAT', default=None,
help=(
- 'video format code, specify the order of preference using'
+ 'Video format code, specify the order of preference using'
' slashes, as in -f 22/17/18 . '
' Instead of format codes, you can select by extension for the '
'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
video_format.add_option(
'--all-formats',
action='store_const', dest='format', const='all',
- help='download all available video formats')
+ help='Download all available video formats')
video_format.add_option(
'--prefer-free-formats',
action='store_true', dest='prefer_free_formats', default=False,
- help='prefer free video formats unless a specific one is requested')
+ help='Prefer free video formats unless a specific one is requested')
video_format.add_option(
'--max-quality',
action='store', dest='format_limit', metavar='FORMAT',
- help='highest quality format to download')
+ help='Highest quality format to download')
video_format.add_option(
'-F', '--list-formats',
action='store_true', dest='listformats',
- help='list all available formats')
+ help='List all available formats')
video_format.add_option(
'--youtube-include-dash-manifest',
action='store_true', dest='youtube_include_dash_manifest', default=True,
subtitles.add_option(
'--write-sub', '--write-srt',
action='store_true', dest='writesubtitles', default=False,
- help='write subtitle file')
+ help='Write subtitle file')
subtitles.add_option(
'--write-auto-sub', '--write-automatic-sub',
action='store_true', dest='writeautomaticsub', default=False,
- help='write automatic subtitle file (youtube only)')
+ help='Write automatic subtitle file (YouTube only)')
subtitles.add_option(
'--all-subs',
action='store_true', dest='allsubtitles', default=False,
- help='downloads all the available subtitles of the video')
+ help='Download all the available subtitles of the video')
subtitles.add_option(
'--list-subs',
action='store_true', dest='listsubtitles', default=False,
- help='lists all available subtitles for the video')
+ help='List all available subtitles for the video')
subtitles.add_option(
'--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
- help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
+ help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
subtitles.add_option(
'--sub-lang', '--sub-langs', '--srt-lang',
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
default=[], callback=_comma_separated_values_options_callback,
- help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
+ help='Languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
downloader = optparse.OptionGroup(parser, 'Download Options')
downloader.add_option(
'-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT',
- help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+ help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
downloader.add_option(
'-R', '--retries',
dest='retries', metavar='RETRIES', default=10,
- help='number of retries (default is %default), or "infinite".')
+ help='Number of retries (default is %default), or "infinite".')
downloader.add_option(
'--buffer-size',
dest='buffersize', metavar='SIZE', default='1024',
- help='size of download buffer (e.g. 1024 or 16K) (default is %default)')
+ help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
downloader.add_option(
'--no-resize-buffer',
action='store_true', dest='noresizebuffer', default=False,
- help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+ help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
downloader.add_option(
'--test',
action='store_true', dest='test', default=False,
downloader.add_option(
'--xattr-set-filesize',
dest='xattr_set_filesize', action='store_true',
- help='(experimental) set file xattribute ytdl.filesize with expected filesize')
+ help='Set file xattribute ytdl.filesize with expected filesize (experimental)')
downloader.add_option(
'--hls-prefer-native',
dest='hls_prefer_native', action='store_true',
- help='(experimental) Use the native HLS downloader instead of ffmpeg.')
+ help='Use the native HLS downloader instead of ffmpeg (experimental)')
downloader.add_option(
'--external-downloader',
dest='external_downloader', metavar='COMMAND',
downloader.add_option(
'--external-downloader-args',
dest='external_downloader_args', metavar='ARGS',
- help='Give these arguments to the external downloader.')
+ help='Give these arguments to the external downloader')
workarounds = optparse.OptionGroup(parser, 'Workarounds')
workarounds.add_option(
workarounds.add_option(
'--no-check-certificate',
action='store_true', dest='no_check_certificate', default=False,
- help='Suppress HTTPS certificate validation.')
+ help='Suppress HTTPS certificate validation')
workarounds.add_option(
'--prefer-insecure',
'--prefer-unsecure', action='store_true', dest='prefer_insecure',
workarounds.add_option(
'--user-agent',
metavar='UA', dest='user_agent',
- help='specify a custom user agent')
+ help='Specify a custom user agent')
workarounds.add_option(
'--referer',
metavar='URL', dest='referer', default=None,
- help='specify a custom referer, use if the video access is restricted to one domain',
+ help='Specify a custom referer, use if the video access is restricted to one domain',
)
workarounds.add_option(
'--add-header',
metavar='FIELD:VALUE', dest='headers', action='append',
- help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+ help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
)
workarounds.add_option(
'--bidi-workaround',
verbosity.add_option(
'-q', '--quiet',
action='store_true', dest='quiet', default=False,
- help='activates quiet mode')
+ help='Activate quiet mode')
verbosity.add_option(
'--no-warnings',
dest='no_warnings', action='store_true', default=False,
verbosity.add_option(
'-s', '--simulate',
action='store_true', dest='simulate', default=False,
- help='do not download the video and do not write anything to disk',)
+ help='Do not download the video and do not write anything to disk')
verbosity.add_option(
'--skip-download',
action='store_true', dest='skip_download', default=False,
- help='do not download the video',)
+ help='Do not download the video')
verbosity.add_option(
'-g', '--get-url',
action='store_true', dest='geturl', default=False,
- help='simulate, quiet but print URL')
+ help='Simulate, quiet but print URL')
verbosity.add_option(
'-e', '--get-title',
action='store_true', dest='gettitle', default=False,
- help='simulate, quiet but print title')
+ help='Simulate, quiet but print title')
verbosity.add_option(
'--get-id',
action='store_true', dest='getid', default=False,
- help='simulate, quiet but print id')
+ help='Simulate, quiet but print id')
verbosity.add_option(
'--get-thumbnail',
action='store_true', dest='getthumbnail', default=False,
- help='simulate, quiet but print thumbnail URL')
+ help='Simulate, quiet but print thumbnail URL')
verbosity.add_option(
'--get-description',
action='store_true', dest='getdescription', default=False,
- help='simulate, quiet but print video description')
+ help='Simulate, quiet but print video description')
verbosity.add_option(
'--get-duration',
action='store_true', dest='getduration', default=False,
- help='simulate, quiet but print video length')
+ help='Simulate, quiet but print video length')
verbosity.add_option(
'--get-filename',
action='store_true', dest='getfilename', default=False,
- help='simulate, quiet but print output filename')
+ help='Simulate, quiet but print output filename')
verbosity.add_option(
'--get-format',
action='store_true', dest='getformat', default=False,
- help='simulate, quiet but print output format')
+ help='Simulate, quiet but print output format')
verbosity.add_option(
'-j', '--dump-json',
action='store_true', dest='dumpjson', default=False,
- help='simulate, quiet but print JSON information. See --output for a description of available keys.')
+ help='Simulate, quiet but print JSON information. See --output for a description of available keys.')
verbosity.add_option(
'-J', '--dump-single-json',
action='store_true', dest='dump_single_json', default=False,
- help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+ help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
verbosity.add_option(
'--print-json',
action='store_true', dest='print_json', default=False,
verbosity.add_option(
'--newline',
action='store_true', dest='progress_with_newline', default=False,
- help='output progress bar as new lines')
+ help='Output progress bar as new lines')
verbosity.add_option(
'--no-progress',
action='store_true', dest='noprogress', default=False,
- help='do not print progress bar')
+ help='Do not print progress bar')
verbosity.add_option(
'--console-title',
action='store_true', dest='consoletitle', default=False,
- help='display progress in console titlebar')
+ help='Display progress in console titlebar')
verbosity.add_option(
'-v', '--verbose',
action='store_true', dest='verbose', default=False,
- help='print various debugging information')
+ help='Print various debugging information')
verbosity.add_option(
'--dump-pages', '--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
- help='print downloaded pages to debug problems (very verbose)')
+ help='Print downloaded pages to debug problems (very verbose)')
verbosity.add_option(
'--write-pages',
action='store_true', dest='write_pages', default=False,
verbosity.add_option(
'-C', '--call-home',
dest='call_home', action='store_true', default=False,
- help='Contact the youtube-dl server for debugging.')
+ help='Contact the youtube-dl server for debugging')
verbosity.add_option(
'--no-call-home',
dest='call_home', action='store_false', default=False,
- help='Do NOT contact the youtube-dl server for debugging.')
+ help='Do NOT contact the youtube-dl server for debugging')
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
filesystem.add_option(
'-a', '--batch-file',
dest='batchfile', metavar='FILE',
- help='file containing URLs to download (\'-\' for stdin)')
+ help='File containing URLs to download (\'-\' for stdin)')
filesystem.add_option(
'--id', default=False,
- action='store_true', dest='useid', help='use only video ID in file name')
+ action='store_true', dest='useid', help='Use only video ID in file name')
filesystem.add_option(
'-o', '--output',
dest='outtmpl', metavar='TEMPLATE',
- help=('output filename template. Use %(title)s to get the title, '
+ help=('Output filename template. Use %(title)s to get the title, '
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
'%(autonumber)s to get an automatically incremented number, '
'%(ext)s for the filename extension, '
'%(format)s for the format description (like "22 - 1280x720" or "HD"), '
- '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
+ '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), '
'%(upload_date)s for the upload date (YYYYMMDD), '
'%(extractor)s for the provider (youtube, metacafe, etc), '
'%(id)s for the video id, '
filesystem.add_option(
'--autonumber-size',
dest='autonumber_size', metavar='NUMBER',
- help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
+ help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
filesystem.add_option(
'--restrict-filenames',
action='store_true', dest='restrictfilenames', default=False,
filesystem.add_option(
'-A', '--auto-number',
action='store_true', dest='autonumber', default=False,
- help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000')
+ help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000')
filesystem.add_option(
'-t', '--title',
action='store_true', dest='usetitle', default=False,
- help='[deprecated] use title in file name (default)')
+ help='[deprecated] Use title in file name (default)')
filesystem.add_option(
'-l', '--literal', default=False,
action='store_true', dest='usetitle',
- help='[deprecated] alias of --title')
+ help='[deprecated] Alias of --title')
filesystem.add_option(
'-w', '--no-overwrites',
action='store_true', dest='nooverwrites', default=False,
- help='do not overwrite files')
+ help='Do not overwrite files')
filesystem.add_option(
'-c', '--continue',
action='store_true', dest='continue_dl', default=True,
- help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
+ help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
filesystem.add_option(
'--no-continue',
action='store_false', dest='continue_dl',
- help='do not resume partially downloaded files (restart from beginning)')
+ help='Do not resume partially downloaded files (restart from beginning)')
filesystem.add_option(
'--no-part',
action='store_true', dest='nopart', default=False,
- help='do not use .part files - write directly into output file')
+ help='Do not use .part files - write directly into output file')
filesystem.add_option(
'--no-mtime',
action='store_false', dest='updatetime', default=True,
- help='do not use the Last-modified header to set the file modification time')
+ help='Do not use the Last-modified header to set the file modification time')
filesystem.add_option(
'--write-description',
action='store_true', dest='writedescription', default=False,
- help='write video description to a .description file')
+ help='Write video description to a .description file')
filesystem.add_option(
'--write-info-json',
action='store_true', dest='writeinfojson', default=False,
- help='write video metadata to a .info.json file')
+ help='Write video metadata to a .info.json file')
filesystem.add_option(
'--write-annotations',
action='store_true', dest='writeannotations', default=False,
- help='write video annotations to a .annotation file')
+ help='Write video annotations to a .annotation file')
filesystem.add_option(
'--load-info',
dest='load_info_filename', metavar='FILE',
- help='json file containing the video information (created with the "--write-json" option)')
+ help='JSON file containing the video information (created with the "--write-info-json" option)')
filesystem.add_option(
'--cookies',
dest='cookiefile', metavar='FILE',
- help='file to read cookies from and dump cookie jar in')
+ help='File to read cookies from and dump cookie jar in')
filesystem.add_option(
'--cache-dir', dest='cachedir', default=None, metavar='DIR',
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
thumbnail.add_option(
'--write-thumbnail',
action='store_true', dest='writethumbnail', default=False,
- help='write thumbnail image to disk')
+ help='Write thumbnail image to disk')
thumbnail.add_option(
'--write-all-thumbnails',
action='store_true', dest='write_all_thumbnails', default=False,
- help='write all thumbnail image formats to disk')
+ help='Write all thumbnail image formats to disk')
thumbnail.add_option(
'--list-thumbnails',
action='store_true', dest='list_thumbnails', default=False,
postproc.add_option(
'-x', '--extract-audio',
action='store_true', dest='extractaudio', default=False,
- help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+ help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
- help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
+ help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
postproc.add_option(
'--audio-quality', metavar='QUALITY',
dest='audioquality', default='5',
- help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+ help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
postproc.add_option(
'--recode-video',
metavar='FORMAT', dest='recodevideo', default=None,
postproc.add_option(
'-k', '--keep-video',
action='store_true', dest='keepvideo', default=False,
- help='keeps the video file on disk after the post-processing; the video is erased by default')
+ help='Keep the video file on disk after the post-processing; the video is erased by default')
postproc.add_option(
'--no-post-overwrites',
action='store_true', dest='nopostoverwrites', default=False,
- help='do not overwrite post-processed files; the post-processed files are overwritten by default')
+ help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option(
'--embed-subs',
action='store_true', dest='embedsubtitles', default=False,
- help='embed subtitles in the video (only for mp4 videos)')
+ help='Embed subtitles in the video (only for mp4 videos)')
postproc.add_option(
'--embed-thumbnail',
action='store_true', dest='embedthumbnail', default=False,
- help='embed thumbnail in the audio as cover art')
+ help='Embed thumbnail in the audio as cover art')
postproc.add_option(
'--add-metadata',
action='store_true', dest='addmetadata', default=False,
- help='write metadata to the video file')
+ help='Write metadata to the video file')
postproc.add_option(
'--metadata-from-title',
metavar='FORMAT', dest='metafromtitle',
- help='parse additional metadata like song title / artist from the video title. '
+ help='Parse additional metadata like song title / artist from the video title. '
'The format syntax is the same as --output, '
'the parsed parameters replace existing values. '
'Additional templates: %(album), %(artist). '
postproc.add_option(
'--xattrs',
action='store_true', dest='xattrs', default=False,
- help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+ help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
postproc.add_option(
'--fixup',
metavar='POLICY', dest='fixup', default='detect_or_warn',
write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
command_line_conf = sys.argv[1:]
+ # Workaround for Python 2.x, where argv is a byte list
+ if sys.version_info < (3,):
+ command_line_conf = [
+ a.decode(preferredencoding(), 'replace') for a in command_line_conf]
+
if '--ignore-config' in command_line_conf:
system_conf = []
user_conf = []
from __future__ import unicode_literals
-from ..utils import PostProcessingError
+import os
+
+from ..utils import (
+ PostProcessingError,
+ encodeFilename,
+)
class PostProcessor(object):
"""
return None, information # by default, keep file and do nothing
+ def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
+ try:
+ os.utime(encodeFilename(path), (atime, mtime))
+ except Exception:
+ self._downloader.report_warning(errnote)
+
class AudioConversionError(PostProcessingError):
pass
import io
import os
import subprocess
-import sys
import time
def executable(self):
return self._paths[self.basename]
+ @property
+ def probe_available(self):
+ return self.probe_basename is not None
+
@property
def probe_executable(self):
return self._paths[self.probe_basename]
stderr = stderr.decode('utf-8', 'replace')
msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg)
- os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime))
+ self.try_utime(out_path, oldest_mtime, oldest_mtime)
+
if self._deletetempfiles:
for ipath in input_paths:
os.remove(ipath)
def get_audio_codec(self, path):
- if not self.probe_executable:
+ if not self.probe_available:
raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
try:
cmd = [
else:
self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
self.run_ffmpeg(path, new_path, acodec, more_opts)
- except:
- etype, e, tb = sys.exc_info()
- if isinstance(e, AudioConversionError):
- msg = 'audio conversion failed: ' + e.msg
- else:
- msg = 'error running ' + self.basename
- raise PostProcessingError(msg)
+ except AudioConversionError as e:
+ raise PostProcessingError(
+ 'audio conversion failed: ' + e.msg)
+ except Exception:
+ raise PostProcessingError('error running ' + self.basename)
# Try to update the date time for extracted audio file.
if information.get('filetime') is not None:
- try:
- os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
- except:
- self._downloader.report_warning('Cannot update utime of audio file')
+ self.try_utime(
+ new_path, time.time(), information['filetime'],
+ errnote='Cannot update utime of audio file')
information['filepath'] = new_path
return self._nopostoverwrites, information
# Check if there is a new version
try:
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
- except:
+ except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
try:
versions_info = opener.open(JSON_URL).read().decode('utf-8')
versions_info = json.loads(versions_info)
- except:
+ except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
try:
pref = locale.getpreferredencoding()
'TEST'.encode(pref)
- except:
+ except Exception:
pref = 'UTF-8'
return pref
except OSError:
pass
os.rename(tf.name, fn)
- except:
+ except Exception:
try:
os.remove(tf.name)
except OSError:
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
- mobj = re.match(r'#(x?[0-9]+)', entity)
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
if numstr.startswith('x'):
# Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
- date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+ if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
+ date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
]
if day_first:
format_expressions.extend([
+ '%d-%m-%Y',
'%d.%m.%Y',
'%d/%m/%Y',
'%d/%m/%y',
])
else:
format_expressions.extend([
+ '%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]*
''', fix_kv, code)
- res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+ res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
return res
from __future__ import unicode_literals
-__version__ = '2015.03.18'
+__version__ = '2015.04.09'