Merge pull request #13669 from bmwiedemann/master
authorYen Chi Hsuan <yan12125@gmail.com>
Tue, 22 Aug 2017 13:51:20 +0000 (21:51 +0800)
committerGitHub <noreply@github.com>
Tue, 22 Aug 2017 13:51:20 +0000 (21:51 +0800)
[build] Override timestamps in zip file

77 files changed:
.github/ISSUE_TEMPLATE.md
.github/ISSUE_TEMPLATE_tmpl.md
AUTHORS
ChangeLog
README.md
docs/supportedsites.md
test/test_InfoExtractor.py
test/test_YoutubeDL.py
test/test_options.py [new file with mode: 0644]
test/test_utils.py
test/testdata/mpd/float_duration.mpd [new file with mode: 0644]
youtube_dl/YoutubeDL.py
youtube_dl/downloader/dash.py
youtube_dl/downloader/hls.py
youtube_dl/extractor/amcnetworks.py
youtube_dl/extractor/aparat.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/cda.py
youtube_dl/extractor/cinchcast.py
youtube_dl/extractor/clipfish.py [deleted file]
youtube_dl/extractor/clippit.py [new file with mode: 0644]
youtube_dl/extractor/cloudy.py
youtube_dl/extractor/common.py
youtube_dl/extractor/dispeak.py
youtube_dl/extractor/dplay.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/egghead.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/fourtube.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/itv.py
youtube_dl/extractor/laola1tv.py
youtube_dl/extractor/limelight.py
youtube_dl/extractor/liveleak.py
youtube_dl/extractor/megaphone.py [new file with mode: 0644]
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mlb.py
youtube_dl/extractor/mpora.py [deleted file]
youtube_dl/extractor/mtv.py
youtube_dl/extractor/nick.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/periscope.py
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/podomatic.py
youtube_dl/extractor/pornhd.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/qqmusic.py
youtube_dl/extractor/reddit.py [new file with mode: 0644]
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/sportbox.py
youtube_dl/extractor/svt.py
youtube_dl/extractor/tbs.py
youtube_dl/extractor/teamfourstar.py [deleted file]
youtube_dl/extractor/twentymin.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/vh1.py
youtube_dl/extractor/vidio.py
youtube_dl/extractor/vidme.py
youtube_dl/extractor/vlive.py
youtube_dl/extractor/voot.py [new file with mode: 0644]
youtube_dl/extractor/vzaar.py
youtube_dl/extractor/watchbox.py [new file with mode: 0644]
youtube_dl/extractor/xxxymovies.py
youtube_dl/extractor/yandexdisk.py [new file with mode: 0644]
youtube_dl/extractor/youjizz.py
youtube_dl/extractor/youku.py
youtube_dl/options.py
youtube_dl/utils.py
youtube_dl/version.py

index 0f20d048515bb2ed5fa47b087d49e802f7aa96a1..66dd4c4809d62539d135cdb931dc0f77b7280201 100644 (file)
@@ -1,16 +1,16 @@
 ## Please follow the guide below
 
 - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
-- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
-- Use *Preview* tab to see how your issue will actually look like
+- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`)
+- Use the *Preview* tab to see what your issue will actually look like
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.15**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.18**
 
 ### Before submitting an *issue* make sure you have:
-- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
+- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
 - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
 
 ### What is the purpose of your *issue*?
 
 ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
 
-Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+
 ```
-$ youtube-dl -v <your command line>
 [debug] System config: []
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.07.15
+[debug] youtube-dl version 2017.08.18
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
index df79503d3ec8fe02e76b6f2c529a60959037934e..26f61d3b43e85fb3b45ac09a7dd9be8c9c2e9b26 100644 (file)
@@ -1,16 +1,16 @@
 ## Please follow the guide below
 
 - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
-- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
-- Use *Preview* tab to see how your issue will actually look like
+- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`)
+- Use the *Preview* tab to see what your issue will actually look like
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
 - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s**
 
 ### Before submitting an *issue* make sure you have:
-- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
+- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
 - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
 
 ### What is the purpose of your *issue*?
@@ -28,9 +28,9 @@
 
 ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
 
-Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+
 ```
-$ youtube-dl -v <your command line>
 [debug] System config: []
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
diff --git a/AUTHORS b/AUTHORS
index 053159cc3fd4822891131b83dd44d92e9277117f..478c7872f8cd7d8876b2b15033056ae896a2be68 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -223,3 +223,4 @@ Jan Kundrát
 Giuseppe Fabiano
 Örn Guðjónsson
 Parmjit Virk
+Genki Sky
index 7d71fc5e10b3abe082423bfdb60fd717b5d304aa..c07cb9648a37237fc87a87f4f9777070643fbbad 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,141 @@
+version <unreleased>
+
+Core
+* [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
+
+Extractors
++ [liveleak] Support multi-video pages (#6542)
++ [liveleak] Support another liveleak embedding pattern (#13336)
+* [cda] Fix extraction (#13935)
+
+
+version 2017.08.18
+
+Core
+* [YoutubeDL] Sanitize byte string format URLs (#13951)
++ [extractor/common] Add support for float durations in _parse_mpd_formats
+  (#13919)
+
+Extractors
+* [arte] Detect unavailable videos (#13945)
+* [generic] Convert redirect URLs to unicode strings (#13951)
+* [udemy] Fix paid course detection (#13943)
+* [pluralsight] Use RPC API for course extraction (#13937)
++ [clippit] Add support for clippituser.tv
++ [qqmusic] Support new URL schemes (#13805)
+* [periscope] Renew HLS extraction (#13917)
+* [mixcloud] Extract decrypt key
+
+
+version 2017.08.13
+
+Core
+* [YoutubeDL] Make sure format id is not empty
+* [extractor/common] Make _family_friendly_search optional
+* [extractor/common] Respect source's type attribute for HTML5 media (#13892)
+
+Extractors
+* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902)
++ [fourtube] Add support pornerbros.com (#6022)
++ [fourtube] Add support porntube.com (#7859, #13901)
++ [fourtube] Add support fux.com
+* [limelight] Improve embeds detection (#13895)
++ [reddit] Add support for v.redd.it and reddit.com (#13847)
+* [aparat] Extract all formats (#13887)
+* [mixcloud] Fix play info decryption (#13885)
++ [generic] Add support for vzaar embeds (#13876)
+
+
+version 2017.08.09
+
+Core
+* [utils] Skip missing params in cli_bool_option (#13865)
+
+Extractors
+* [xxxymovies] Fix title extraction (#13868)
++ [nick] Add support for nick.com.pl (#13860)
+* [mixcloud] Fix play info decryption (#13867)
+* [20min] Fix embeds extraction (#13852)
+* [dplayit] Fix extraction (#13851)
++ [niconico] Support videos with multiple formats (#13522)
++ [niconico] Support HTML5-only videos (#13806)
+
+
+version 2017.08.06
+
+Core
+* Use relative paths for DASH fragments (#12990)
+
+Extractors
+* [pluralsight] Fix format selection
+- [mpora] Remove extractor (#13826)
++ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218)
+* [vlive:channel] Limit number of videos per page to 100 (#13830)
+* [podomatic] Extend URL regular expression (#13827)
+* [cinchcast] Extend URL regular expression
+* [yandexdisk] Relax URL regular expression (#13824)
+* [vidme] Extract DASH and HLS formats
+- [teamfour] Remove extractor (#13782)
+* [pornhd] Fix extraction (#13783)
+* [udemy] Fix subtitles extraction (#13812)
+* [mlb] Extend URL regular expression (#13740, #13773)
++ [pbs] Add support for new URL schema (#13801)
+* [nrktv] Update API host (#13796)
+
+
+version 2017.07.30.1
+
+Core
+* [downloader/hls] Use redirect URL as manifest base (#13755)
+* [options] Correctly hide login info from debug outputs (#13696)
+
+Extractors
++ [watchbox] Add support for watchbox.de (#13739)
+- [clipfish] Remove extractor
++ [youjizz] Fix extraction (#13744)
++ [generic] Add support for another ooyala embed pattern (#13727)
++ [ard] Add support for lives (#13771)
+* [soundcloud] Update client id
++ [soundcloud:trackstation] Add support for track stations (#13733)
+* [svtplay] Use geo verification proxy for API request
+* [svtplay] Update API URL (#13767)
++ [yandexdisk] Add support for yadi.sk (#13755)
++ [megaphone] Add support for megaphone.fm
+* [amcnetworks] Make rating optional (#12453)
+* [cloudy] Fix extraction (#13737)
++ [nickru] Add support for nickelodeon.ru
+* [mtv] Improve thumbnal extraction
+* [nick] Automate geo-restriction bypass (#13711)
+* [niconico] Improve error reporting (#13696)
+
+
+version 2017.07.23
+
+Core
+* [YoutubeDL] Improve default format specification (#13704)
+* [YoutubeDL] Do not override id, extractor and extractor_key for
+  url_transparent entities
+* [extractor/common] Fix playlist_from_matches
+
+Extractors
+* [itv] Fix production id extraction (#13671, #13703)
+* [vidio] Make duration non fatal and fix typo
+* [mtv] Skip missing video parts (#13690)
+* [sportbox:embed] Fix extraction
++ [npo] Add support for npo3.nl URLs (#13695)
+* [dramafever] Remove video id from title (#13699)
++ [egghead:lesson] Add support for lessons (#6635)
+* [funnyordie] Extract more metadata (#13677)
+* [youku:show] Fix playlist extraction (#13248)
++ [dispeak] Recognize sevt subdomain (#13276)
+* [adn] Improve error reporting (#13663)
+* [crunchyroll] Relax series and season regex (#13659)
++ [spiegel:article] Add support for nexx iframe embeds (#13029)
++ [nexx:embed] Add support for iframe embeds
+* [nexx] Improve JS embed extraction
++ [pearvideo] Add support for pearvideo.com (#13031)
+
+
 version 2017.07.15
 
 Core
index fe2bebc2aff18efbfa0451d6b289b94a35e24f3d..6f5d00df31f3c36c20499ab72fcdf5ed35d86873 100644 (file)
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget:
     sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
     sudo chmod a+rx /usr/local/bin/youtube-dl
 
-Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
+Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
 
 You can also use pip:
 
@@ -33,7 +33,7 @@ You can also use pip:
     
 This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
 
-OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
+OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
 
     brew install youtube-dl
 
@@ -458,7 +458,7 @@ You can also use `--config-location` if you want to use custom configuration fil
 
 ### Authentication with `.netrc` file
 
-You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
 ```
 touch $HOME/.netrc
 chmod a-rwx,u+rw $HOME/.netrc
@@ -485,7 +485,7 @@ The `-o` option allows users to indicate a template for the output file names.
 
 **tl;dr:** [navigate me to examples](#output-template-examples).
 
-The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
+The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
 
  - `id` (string): Video identifier
  - `title` (string): Video title
@@ -584,7 +584,7 @@ If you are using an output template inside a Windows batch file then you must es
 
 #### Output template examples
 
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of single.
 
 ```bash
 $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
@@ -603,7 +603,7 @@ $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)
 $ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
 
 # Download entire series season keeping each series and each season in separate directory under C:/MyVideos
-$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
+$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
 
 # Stream the video being downloaded to stdout
 $ youtube-dl -o - BaW_jenozKc
@@ -671,7 +671,7 @@ If you want to preserve the old format selection behavior (prior to youtube-dl 2
 
 #### Format selection examples
 
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of single.
 
 ```bash
 # Download best mp4 format available or any other best if no mp4 available
@@ -716,17 +716,17 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
 
 ### How do I update youtube-dl?
 
-If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
+If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
 
 If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
 
-If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
+If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
 
 As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
 
     sudo apt-get remove -y youtube-dl
 
-Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
+Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
 
 ```
 sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
@@ -766,11 +766,11 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much.
 
 youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
 
-Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
+Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
 
 ### I have downloaded a video but how can I play it?
 
-Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
+Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
 
 ### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
 
@@ -845,10 +845,10 @@ Use the `-o` to specify an [output template](#output-template), for example `-o
 
 ### How do I download a video starting with a `-`?
 
-Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
+Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
 
     youtube-dl -- -wNyEUrxzFU
-    youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
+    youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
 
 ### How do I pass cookies to youtube-dl?
 
@@ -862,9 +862,9 @@ Passing cookies to youtube-dl is a good way to workaround login when a particula
 
 ### How do I stream directly to media player?
 
-You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
+You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
 
-    youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
+    youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
 
 ### How do I download only new videos from a playlist?
 
@@ -884,7 +884,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i
 
 When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
 
-In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
+In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
 
 If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
 
@@ -910,7 +910,7 @@ Feel free to bump the issue from time to time by writing a small comment ("Issue
 
 ### How can I detect whether a given URL is supported by youtube-dl?
 
-For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
 
 It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
 
@@ -924,7 +924,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe
 
 # DEVELOPER INSTRUCTIONS
 
-Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
+Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
 
 To run youtube-dl as a developer, you don't need to build anything either. Simply execute
 
@@ -972,7 +972,7 @@ After you have ensured this site is distributing its content legally, you can fo
     class YourExtractorIE(InfoExtractor):
         _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
         _TEST = {
-            'url': 'http://yourextractor.com/watch/42',
+            'url': 'https://yourextractor.com/watch/42',
             'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
             'info_dict': {
                 'id': '42',
@@ -1005,8 +1005,8 @@ After you have ensured this site is distributing its content legally, you can fo
 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
-9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
+8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
 
         $ git add youtube_dl/extractor/extractors.py
         $ git add youtube_dl/extractor/yourextractor.py
@@ -1162,7 +1162,7 @@ import youtube_dl
 
 ydl_opts = {}
 with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-    ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+    ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
 ```
 
 Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
@@ -1201,19 +1201,19 @@ ydl_opts = {
     'progress_hooks': [my_hook],
 }
 with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-    ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+    ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
 ```
 
 # BUGS
 
-Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
 
 **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
 ```
 $ youtube-dl -v <your command line>
 [debug] System config: []
 [debug] User config: []
-[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
 [debug] youtube-dl version 2015.12.06
 [debug] Git HEAD: 135392e
@@ -1244,7 +1244,7 @@ For bug reports, this means that your report should contain the *complete* outpu
 
 If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
 
-**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
 
 ###  Are you using the latest version?
 
index d7304ba0676bd59326d1fde82c56fca87f0a5e2c..1991975cc8024d376216047a5f212568e3c79d30 100644 (file)
@@ -42,7 +42,7 @@
  - **Allocine**
  - **AlphaPorno**
  - **AMCNetworks**
- - **anderetijden**: npo.nl and ntr.nl
+ - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
  - **AnimeOnDemand**
  - **anitube.se**
  - **Anvato**
  - **chirbit:profile**
  - **Cinchcast**
  - **CJSW**
- - **Clipfish**
  - **cliphunter**
+ - **Clippit**
  - **ClipRs**
  - **Clipsyndicate**
  - **CloserToTruth**
  - **EbaumsWorld**
  - **EchoMsk**
  - **egghead:course**: egghead.io course
+ - **egghead:lesson**: egghead.io lesson
  - **eHow**
  - **Einthusan**
  - **eitb.tv**
  - **Funimation**
  - **FunnyOrDie**
  - **Fusion**
+ - **Fux**
  - **FXNetworks**
  - **GameInformer**
  - **GameOne**
  - **Medialaan**
  - **Mediaset**
  - **Medici**
+ - **megaphone.fm**: megaphone.fm embedded players
  - **Meipai**: 美拍
  - **MelonVOD**
  - **META**
  - **MovieFap**
  - **Moviezine**
  - **MovingImage**
- - **MPORA**
  - **MSN**
  - **mtg**: MTG services
  - **mtv**
  - **NextMediaActionNews**: 蘋果日報 - 動新聞
  - **NextTV**: 壹電視
  - **Nexx**
+ - **NexxEmbed**
  - **nfb**: National Film Board of Canada
  - **nfl.com**
  - **NhkVod**
  - **nhl.com:videocenter:category**: NHL videocenter category
  - **nick.com**
  - **nick.de**
+ - **nickelodeonru**
  - **nicknight**
  - **niconico**: ニコニコ動画
  - **NiconicoPlaylist**
  - **NowTVList**
  - **nowvideo**: NowVideo
  - **Noz**
- - **npo**: npo.nl and ntr.nl
+ - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
  - **npo.nl:live**
  - **npo.nl:radio**
  - **npo.nl:radio:fragment**
  - **Patreon**
  - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET  (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
  - **pcmag**
+ - **PearVideo**
  - **People**
  - **periscope**: Periscope
  - **periscope:user**: Periscope user videos
  - **PolskieRadio**
  - **PolskieRadioCategory**
  - **PornCom**
+ - **PornerBros**
  - **PornFlip**
  - **PornHd**
  - **PornHub**: PornHub and Thumbzilla
  - **Pornotube**
  - **PornoVoisines**
  - **PornoXO**
+ - **PornTube**
  - **PressTV**
  - **PrimeShareTV**
  - **PromptFile**
  - **RBMARadio**
  - **RDS**: RDS.ca
  - **RedBullTV**
+ - **Reddit**
+ - **RedditR**
  - **RedTube**
  - **RegioTV**
  - **RENTV**
  - **soundcloud:playlist**
  - **soundcloud:search**: Soundcloud search
  - **soundcloud:set**
+ - **soundcloud:trackstation**
  - **soundcloud:user**
  - **soundgasm**
  - **soundgasm:profile**
  - **tagesschau:player**
  - **Tass**
  - **TastyTrade**
- - **TBS**
+ - **TBS** (Currently broken)
  - **TDSLifeway**
  - **teachertube**: teachertube.com videos
  - **teachertube:user:collection**: teachertube.com user and collection videos
  - **TeachingChannel**
  - **Teamcoco**
- - **TeamFourStar**
  - **TechTalks**
  - **techtv.mit.edu**
  - **ted**
  - **VODPl**
  - **VODPlatform**
  - **VoiceRepublic**
+ - **Voot**
  - **VoxMedia**
  - **Vporn**
- - **vpro**: npo.nl and ntr.nl
+ - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
  - **Vrak**
  - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
  - **vrv**
  - **washingtonpost**
  - **washingtonpost:article**
  - **wat.tv**
+ - **WatchBox**
  - **WatchIndianPorn**: Watch Indian Porn
  - **WDR**
  - **wdr:mobile**
  - **wholecloud**: WholeCloud
  - **Wimp**
  - **Wistia**
- - **wnl**: npo.nl and ntr.nl
+ - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
  - **WorldStarHipHop**
  - **wrzuta.pl**
  - **wrzuta.pl:playlist**
  - **XVideos**
  - **XXXYMovies**
  - **Yahoo**: Yahoo screen and movies
+ - **YandexDisk**
  - **yandexmusic:album**: Яндекс.Музыка - Альбом
  - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
  - **yandexmusic:track**: Яндекс.Музыка - Трек
index 6f52e11f7b4b58c388e1a2bde12a64a76c62d107..f18a823fcf834e4bbae95e9d72f9f3821c307c2b 100644 (file)
@@ -10,6 +10,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from test.helper import FakeYDL, expect_dict, expect_value
+from youtube_dl.compat import compat_etree_fromstring
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
@@ -488,6 +489,91 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
 
+    def test_parse_mpd_formats(self):
+        _TEST_CASES = [
+            (
+                # https://github.com/rg3/youtube-dl/issues/13919
+                'float_duration',
+                'http://unknown/manifest.mpd',
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '318597',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001f',
+                    'tbr': 318.597,
+                    'width': 340,
+                    'height': 192,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '638590',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001f',
+                    'tbr': 638.59,
+                    'width': 512,
+                    'height': 288,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '1022565',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001f',
+                    'tbr': 1022.565,
+                    'width': 688,
+                    'height': 384,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '2046506',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001f',
+                    'tbr': 2046.506,
+                    'width': 1024,
+                    'height': 576,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '3998017',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.640029',
+                    'tbr': 3998.017,
+                    'width': 1280,
+                    'height': 720,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '5997485',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.640032',
+                    'tbr': 5997.485,
+                    'width': 1920,
+                    'height': 1080,
+                }]
+            ),
+        ]
+
+        for mpd_file, mpd_url, expected_formats in _TEST_CASES:
+            with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
+                         mode='r', encoding='utf-8') as f:
+                formats = self.ie._parse_mpd_formats(
+                    compat_etree_fromstring(f.read().encode('utf-8')),
+                    mpd_url=mpd_url)
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
+
 
 if __name__ == '__main__':
     unittest.main()
index 70989e2322d93d00f7fe30dcde96b09d93ec582f..e70cbcd375a4670bb586612ccaa107605dc985dc 100644 (file)
@@ -41,6 +41,7 @@ def _make_result(formats, **kwargs):
         'id': 'testid',
         'title': 'testttitle',
         'extractor': 'testex',
+        'extractor_key': 'TestEx',
     }
     res.update(**kwargs)
     return res
@@ -370,6 +371,19 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL({'format': 'best[height>360]'})
         self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
 
+    def test_format_selection_issue_10083(self):
+        # See https://github.com/rg3/youtube-dl/issues/10083
+        formats = [
+            {'format_id': 'regular', 'height': 360, 'url': TEST_URL},
+            {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+            {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
+        ]
+        info_dict = _make_result(formats)
+
+        ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
+        ydl.process_ie_result(info_dict.copy())
+        self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
+
     def test_invalid_format_specs(self):
         def assert_syntax_error(format_spec):
             ydl = YDL({'format': format_spec})
@@ -448,6 +462,17 @@ class TestFormatSelection(unittest.TestCase):
             pass
         self.assertEqual(ydl.downloaded_info_dicts, [])
 
+    def test_default_format_spec(self):
+        ydl = YDL({'simulate': True})
+        self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+
+        ydl = YDL({'outtmpl': '-'})
+        self.assertEqual(ydl._default_format_spec({}), 'best')
+
+        ydl = YDL({})
+        self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
+
 
 class TestYoutubeDL(unittest.TestCase):
     def test_subtitles(self):
@@ -761,7 +786,8 @@ class TestYoutubeDL(unittest.TestCase):
                     '_type': 'url_transparent',
                     'url': 'foo2:',
                     'ie_key': 'Foo2',
-                    'title': 'foo1 title'
+                    'title': 'foo1 title',
+                    'id': 'foo1_id',
                 }
 
         class Foo2IE(InfoExtractor):
@@ -787,6 +813,9 @@ class TestYoutubeDL(unittest.TestCase):
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['url'], TEST_URL)
         self.assertEqual(downloaded['title'], 'foo1 title')
+        self.assertEqual(downloaded['id'], 'testid')
+        self.assertEqual(downloaded['extractor'], 'testex')
+        self.assertEqual(downloaded['extractor_key'], 'TestEx')
 
 
 if __name__ == '__main__':
diff --git a/test/test_options.py b/test/test_options.py
new file mode 100644 (file)
index 0000000..3a25a6b
--- /dev/null
@@ -0,0 +1,26 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.options import _hide_login_info
+
+
+class TestOptions(unittest.TestCase):
+    def test_hide_login_info(self):
+        self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']),
+                         ['-u', 'PRIVATE', '-p', 'PRIVATE'])
+        self.assertEqual(_hide_login_info(['-u']), ['-u'])
+        self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']),
+                         ['-u', 'PRIVATE', '-u', 'PRIVATE'])
+        self.assertEqual(_hide_login_info(['--username=foo']),
+                         ['--username=PRIVATE'])
+
+
+if __name__ == '__main__':
+    unittest.main()
index 7803e5bc74663371620bf12ac09656c0fbbdaf96..e50f3764e57050c560365eb566979e171538985b 100644 (file)
@@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unescapeHTML('&#47;'), '/')
         self.assertEqual(unescapeHTML('&eacute;'), 'é')
         self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
+        self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
         # HTML5 entities
         self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
 
@@ -1182,6 +1183,10 @@ part 3</font></u>
             cli_bool_option(
                 {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
             ['--check-certificate=true'])
+        self.assertEqual(
+            cli_bool_option(
+                {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+            [])
 
     def test_ohdave_rsa_encrypt(self):
         N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
diff --git a/test/testdata/mpd/float_duration.mpd b/test/testdata/mpd/float_duration.mpd
new file mode 100644 (file)
index 0000000..8dc1d2d
--- /dev/null
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S">
+       <Period bitstreamSwitching="true">
+               <AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true">
+                       <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+                       <Representation id="318597" bandwidth="61587"></Representation>
+               </AdaptationSet>
+               <AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true">
+                       <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+                       <Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation>
+                       <Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation>
+                       <Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation>
+                       <Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation>
+                       <Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation>
+                       <Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation>
+               </AdaptationSet>
+       </Period>
+</MPD>
\ No newline at end of file
index 89c07be290eaad1b198e3acf089831884e24ad21..5f4c93ea370d794e62f0e6eee4f1f81cecd1300b 100755 (executable)
@@ -860,7 +860,7 @@ class YoutubeDL(object):
 
             force_properties = dict(
                 (k, v) for k, v in ie_result.items() if v is not None)
-            for f in ('_type', 'url', 'ie_key'):
+            for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
                 if f in force_properties:
                     del force_properties[f]
             new_result = info.copy()
@@ -1064,6 +1064,25 @@ class YoutubeDL(object):
             return op(actual_value, comparison_value)
         return _filter
 
+    def _default_format_spec(self, info_dict, download=True):
+        req_format_list = []
+
+        def can_have_partial_formats():
+            if self.params.get('simulate', False):
+                return True
+            if not download:
+                return True
+            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+                return False
+            if info_dict.get('is_live'):
+                return False
+            merger = FFmpegMergerPP(self)
+            return merger.available and merger.can_merge()
+        if can_have_partial_formats():
+            req_format_list.append('bestvideo+bestaudio')
+        req_format_list.append('best')
+        return '/'.join(req_format_list)
+
     def build_format_selector(self, format_spec):
         def syntax_error(note, start):
             message = (
@@ -1464,12 +1483,14 @@ class YoutubeDL(object):
 
         def is_wellformed(f):
             url = f.get('url')
-            valid_url = url and isinstance(url, compat_str)
-            if not valid_url:
+            if not url:
                 self.report_warning(
                     '"url" field is missing or empty - skipping format, '
                     'there is an error in extractor')
-            return valid_url
+                return False
+            if isinstance(url, bytes):
+                sanitize_string_field(f, 'url')
+            return True
 
         # Filter out malformed formats for better extraction robustness
         formats = list(filter(is_wellformed, formats))
@@ -1481,7 +1502,7 @@ class YoutubeDL(object):
             sanitize_string_field(format, 'format_id')
             sanitize_numeric_fields(format)
             format['url'] = sanitize_url(format['url'])
-            if format.get('format_id') is None:
+            if not format.get('format_id'):
                 format['format_id'] = compat_str(i)
             else:
                 # Sanitize format_id from characters used in format selector expression
@@ -1534,14 +1555,10 @@ class YoutubeDL(object):
 
         req_format = self.params.get('format')
         if req_format is None:
-            req_format_list = []
-            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
-                    not info_dict.get('is_live')):
-                merger = FFmpegMergerPP(self)
-                if merger.available and merger.can_merge():
-                    req_format_list.append('bestvideo+bestaudio')
-            req_format_list.append('best')
-            req_format = '/'.join(req_format_list)
+            req_format = self._default_format_spec(info_dict, download=download)
+            if self.params.get('verbose'):
+                self.to_stdout('[debug] Default format spec: %s' % req_format)
+
         format_selector = self.build_format_selector(req_format)
 
         # While in format selection we may need to have an access to the original
index 7491fdad857af2f36433b8991b6208fbd9cef99e..576ece6db369254bf491cac972dc845ef1ac2653 100644 (file)
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
+from ..utils import urljoin
 
 
 class DashSegmentsFD(FragmentFD):
@@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD):
     FD_NAME = 'dashsegments'
 
     def real_download(self, filename, info_dict):
-        segments = info_dict['fragments'][:1] if self.params.get(
+        fragment_base_url = info_dict.get('fragment_base_url')
+        fragments = info_dict['fragments'][:1] if self.params.get(
             'test', False) else info_dict['fragments']
 
         ctx = {
             'filename': filename,
-            'total_frags': len(segments),
+            'total_frags': len(fragments),
         }
 
         self._prepare_and_start_frag_download(ctx)
@@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD):
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
         frag_index = 0
-        for i, segment in enumerate(segments):
+        for i, fragment in enumerate(fragments):
             frag_index += 1
             if frag_index <= ctx['fragment_index']:
                 continue
@@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD):
             count = 0
             while count <= fragment_retries:
                 try:
-                    success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+                    fragment_url = fragment.get('url')
+                    if not fragment_url:
+                        assert fragment_base_url
+                        fragment_url = urljoin(fragment_base_url, fragment['path'])
+                    success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                     if not success:
                         return False
                     self._append_fragment(ctx, frag_content)
index 0e29c8a2ad2559737d8c0210e9c6784310616ec6..46308cf072c25086d896bb759adad10a74d2cfc6 100644 (file)
@@ -59,9 +59,9 @@ class HlsFD(FragmentFD):
         man_url = info_dict['url']
         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
 
-        manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
-
-        s = manifest.decode('utf-8', 'ignore')
+        urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+        man_url = urlh.geturl()
+        s = urlh.read().decode('utf-8', 'ignore')
 
         if not self.can_download(s, info_dict):
             if info_dict.get('extra_param_to_segment_url'):
index 3a0ec6776a565f66353ce7f8cb5bb188f5ccbc12..dd3b18d72d05f3deaab902b75cb6064e4b9d16ac 100644 (file)
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
 
 from .theplatform import ThePlatformIE
 from ..utils import (
-    update_url_query,
-    parse_age_limit,
     int_or_none,
+    parse_age_limit,
+    try_get,
+    update_url_query,
 )
 
 
@@ -68,7 +69,8 @@ class AMCNetworksIE(ThePlatformIE):
         info = self._parse_theplatform_metadata(theplatform_metadata)
         video_id = theplatform_metadata['pid']
         title = theplatform_metadata['title']
-        rating = theplatform_metadata['ratings'][0]['rating']
+        rating = try_get(
+            theplatform_metadata, lambda x: x['ratings'][0]['rating'])
         auth_required = self._search_regex(
             r'window\.authRequired\s*=\s*(true|false);',
             webpage, 'auth required')
index 025e29aa46fe5db97c323fa95d947470f1f2023a..e394cb66143e2e72f029d629fe4dd6b4b5e7cf8e 100644 (file)
@@ -3,13 +3,13 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
-    HEADRequest,
+    int_or_none,
+    mimetype2ext,
 )
 
 
 class AparatIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
 
     _TEST = {
         'url': 'http://www.aparat.com/v/wP8On',
@@ -29,30 +29,41 @@ class AparatIE(InfoExtractor):
         # Note: There is an easier-to-parse configuration at
         # http://www.aparat.com/video/video/config/videohash/%video_id
         # but the URL in there does not work
-        embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
-        webpage = self._download_webpage(embed_url, video_id)
-
-        file_list = self._parse_json(self._search_regex(
-            r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
-        for i, item in enumerate(file_list[0]):
-            video_url = item['file']
-            req = HEADRequest(video_url)
-            res = self._request_webpage(
-                req, video_id, note='Testing video URL %d' % i, errnote=False)
-            if res:
-                break
-        else:
-            raise ExtractorError('No working video URLs found')
+        webpage = self._download_webpage(
+            'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
+            video_id)
 
         title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
+
+        file_list = self._parse_json(
+            self._search_regex(
+                r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
+                'file list'),
+            video_id)
+
+        formats = []
+        for item in file_list[0]:
+            file_url = item.get('file')
+            if not file_url:
+                continue
+            ext = mimetype2ext(item.get('type'))
+            label = item.get('label')
+            formats.append({
+                'url': file_url,
+                'ext': ext,
+                'format_id': label or ext,
+                'height': int_or_none(self._search_regex(
+                    r'(\d+)[pP]', label or '', 'height', default=None)),
+            })
+        self._sort_formats(formats)
+
         thumbnail = self._search_regex(
             r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
 
         return {
             'id': video_id,
             'title': title,
-            'url': video_url,
-            'ext': 'mp4',
             'thumbnail': thumbnail,
             'age_limit': self._family_friendly_search(webpage),
+            'formats': formats,
         }
index 2d5599456688eba9756e28c2ffe9dbae48decb2c..3f248b14728ab3655a2e17f7b38a95184042d770 100644 (file)
@@ -93,6 +93,7 @@ class ARDMediathekIE(InfoExtractor):
 
         duration = int_or_none(media_info.get('_duration'))
         thumbnail = media_info.get('_previewImage')
+        is_live = media_info.get('_isLive') is True
 
         subtitles = {}
         subtitle_url = media_info.get('_subtitleUrl')
@@ -106,6 +107,7 @@ class ARDMediathekIE(InfoExtractor):
             'id': video_id,
             'duration': duration,
             'thumbnail': thumbnail,
+            'is_live': is_live,
             'formats': formats,
             'subtitles': subtitles,
         }
@@ -166,9 +168,11 @@ class ARDMediathekIE(InfoExtractor):
         # determine video id from url
         m = re.match(self._VALID_URL, url)
 
+        document_id = None
+
         numid = re.search(r'documentId=([0-9]+)', url)
         if numid:
-            video_id = numid.group(1)
+            document_id = video_id = numid.group(1)
         else:
             video_id = m.group('video_id')
 
@@ -228,12 +232,16 @@ class ARDMediathekIE(InfoExtractor):
                 'formats': formats,
             }
         else:  # request JSON file
+            if not document_id:
+                video_id = self._search_regex(
+                    r'/play/(?:config|media)/(\d+)', webpage, 'media id')
             info = self._extract_media_info(
-                'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
+                'http://www.ardmediathek.de/play/media/%s' % video_id,
+                webpage, video_id)
 
         info.update({
             'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if info.get('is_live') else title,
             'description': description,
             'thumbnail': thumbnail,
         })
index 56baef29d4f644c1b52c7d2e5f26fcca7e89e9e4..02613cf5d9e86b6c10f49caab28916fae78868dc 100644 (file)
@@ -9,12 +9,13 @@ from ..compat import (
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
+    ExtractorError,
     find_xpath_attr,
-    unified_strdate,
     get_element_by_attribute,
     int_or_none,
     NO_DEFAULT,
     qualities,
+    unified_strdate,
 )
 
 # There are different sources of video in arte.tv, the extraction process
@@ -79,6 +80,13 @@ class ArteTVBaseIE(InfoExtractor):
         info = self._download_json(json_url, video_id)
         player_info = info['videoJsonPlayer']
 
+        vsr = player_info['VSR']
+
+        if not vsr and not player_info.get('VRU'):
+            raise ExtractorError(
+                'Video %s is not available' % player_info.get('VID') or video_id,
+                expected=True)
+
         upload_date_str = player_info.get('shootingDate')
         if not upload_date_str:
             upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
@@ -107,7 +115,7 @@ class ArteTVBaseIE(InfoExtractor):
         langcode = LANGS.get(lang, lang)
 
         formats = []
-        for format_id, format_dict in player_info['VSR'].items():
+        for format_id, format_dict in vsr.items():
             f = dict(format_dict)
             versionCode = f.get('versionCode')
             l = re.escape(langcode)
index 9ddb9af1725467ff67c8407f46ce1f891ed701de..be41bd5a22477fce2aca4a043799574e148fdc57 100644 (file)
@@ -242,7 +242,12 @@ class BandcampAlbumIE(InfoExtractor):
             raise ExtractorError('The page doesn\'t contain any tracks')
         # Only tracks with duration info have songs
         entries = [
-            self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
+            self.url_result(
+                compat_urlparse.urljoin(url, t_path),
+                ie=BandcampIE.ie_key(),
+                video_title=self._search_regex(
+                    r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
+                    elem_content, 'track title', fatal=False))
             for elem_content, t_path in track_elements
             if self._html_search_meta('duration', elem_content, default=None)]
 
index 79ded6ba1de68770a5d2fb4d603b5b5a1c03fdcc..911ae6780625003e6f71fb7e4841bc42dd422b8f 100644 (file)
@@ -37,7 +37,8 @@ class BBCCoUkIE(InfoExtractor):
                             programmes/(?!articles/)|
                             iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
                             music/(?:clips|audiovideo/popular)[/#]|
-                            radio/player/
+                            radio/player/|
+                            events/[^/]+/play/[^/]+/
                         )
                         (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
                     ''' % _ID_REGEX
index 78b7a923c7809a363bf26dfb164dea706c3035f1..0c3af23d58270a9710c54e30a0eee7b375be562c 100755 (executable)
@@ -124,7 +124,7 @@ class CDAIE(InfoExtractor):
         }
 
         def extract_format(page, version):
-            json_str = self._search_regex(
+            json_str = self._html_search_regex(
                 r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
                 '%s player_json' % version, fatal=False, group='player_data')
             if not json_str:
index 562c9bbbb4d02305d22a7ad5bc22ef9056d25258..b861d54b0a1a3c37ca9999d4bbb865d10e36abcf 100644 (file)
@@ -9,12 +9,20 @@ from ..utils import (
 
 
 class CinchcastIE(InfoExtractor):
-    _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single',
+        'info_dict': {
+            'id': '5258197',
+            'ext': 'mp3',
+            'title': 'Train Your Brain to Up Your Game with Coach Mandy',
+            'upload_date': '20130816',
+        },
+    }, {
         # Actual test is run in generic, look for undergroundwellness
         'url': 'http://player.cinchcast.com/?platformId=1&#038;assetType=single&#038;assetId=7141703',
         'only_matching': True,
-    }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py
deleted file mode 100644 (file)
index 0920f62..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    unified_strdate,
-)
-
-
-class ClipfishIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
-        'md5': 'b9a5dc46294154c1193e2d10e0c95693',
-        'info_dict': {
-            'id': '4343170',
-            'ext': 'mp4',
-            'title': 'S01 E01 - Ugly Americans - Date in der Hölle',
-            'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.',
-            'upload_date': '20161005',
-            'duration': 1291,
-            'view_count': int,
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        video_info = self._download_json(
-            'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id,
-            video_id)['items'][0]
-
-        formats = []
-
-        m3u8_url = video_info.get('media_videourl_hls')
-        if m3u8_url:
-            formats.append({
-                'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
-                'ext': 'mp4',
-                'format_id': 'hls',
-            })
-
-        mp4_url = video_info.get('media_videourl')
-        if mp4_url:
-            formats.append({
-                'url': mp4_url,
-                'format_id': 'mp4',
-                'width': int_or_none(video_info.get('width')),
-                'height': int_or_none(video_info.get('height')),
-                'tbr': int_or_none(video_info.get('bitrate')),
-            })
-
-        descr = video_info.get('descr')
-        if descr:
-            descr = descr.strip()
-
-        return {
-            'id': video_id,
-            'title': video_info['title'],
-            'description': descr,
-            'formats': formats,
-            'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
-            'duration': int_or_none(video_info.get('media_length')),
-            'upload_date': unified_strdate(video_info.get('pubDate')),
-            'view_count': int_or_none(video_info.get('media_views'))
-        }
diff --git a/youtube_dl/extractor/clippit.py b/youtube_dl/extractor/clippit.py
new file mode 100644 (file)
index 0000000..a1a7a77
--- /dev/null
@@ -0,0 +1,74 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    qualities,
+)
+
+import re
+
+
+class ClippitIE(InfoExtractor):
+
+    _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
+    _TEST = {
+        'url': 'https://www.clippituser.tv/c/evmgm',
+        'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
+        'info_dict': {
+            'id': 'evmgm',
+            'ext': 'mp4',
+            'title': 'Bye bye Brutus. #BattleBots  - Clippit',
+            'uploader': 'lizllove',
+            'uploader_url': 'https://www.clippituser.tv/p/lizllove',
+            'timestamp': 1472183818,
+            'upload_date': '20160826',
+            'description': 'BattleBots | ABC',
+            'thumbnail': r're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
+
+        FORMATS = ('sd', 'hd')
+        quality = qualities(FORMATS)
+        formats = []
+        for format_id in FORMATS:
+            url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id,
+                                          webpage, 'url', fatal=False)
+            if not url:
+                continue
+            match = re.search(r'/(?P<height>\d+)\.mp4', url)
+            formats.append({
+                'url': url,
+                'format_id': format_id,
+                'quality': quality(format_id),
+                'height': int(match.group('height')) if match else None,
+            })
+
+        uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
+                                           webpage, 'uploader', fatal=False)
+        uploader_url = ('https://www.clippituser.tv/p/' + uploader
+                        if uploader else None)
+
+        timestamp = self._html_search_regex(r'datetime="(.+?)"',
+                                            webpage, 'date', fatal=False)
+        thumbnail = self._html_search_regex(r'data-image="(.+?)"',
+                                            webpage, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'uploader': uploader,
+            'uploader_url': uploader_url,
+            'timestamp': parse_iso8601(timestamp),
+            'description': self._og_search_description(webpage),
+            'thumbnail': thumbnail,
+        }
index 9bc8dbea449509cf275d10470d6e01f337288a21..85ca20eccd0c5bb2e8e39468d6ad62428d918059 100644 (file)
@@ -30,7 +30,11 @@ class CloudyIE(InfoExtractor):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(
-            'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id)
+            'https://www.cloudy.ec/embed.php', video_id, query={
+                'id': video_id,
+                'playerPage': 1,
+                'autoplay': 1,
+            })
 
         info = self._parse_html5_media_entries(url, webpage, video_id)[0]
 
index 748b4d59f0af276f375d9ba924273bc61d1d044d..ceba4ca1c415cec0381d9d152dc72a7d6bf255fe 100644 (file)
@@ -940,7 +940,8 @@ class InfoExtractor(object):
 
     def _family_friendly_search(self, html):
         # See http://schema.org/VideoObject
-        family_friendly = self._html_search_meta('isFamilyFriendly', html)
+        family_friendly = self._html_search_meta(
+            'isFamilyFriendly', html, default=None)
 
         if not family_friendly:
             return None
@@ -1785,7 +1786,7 @@ class InfoExtractor(object):
                     ms_info['timescale'] = int(timescale)
                 segment_duration = source.get('duration')
                 if segment_duration:
-                    ms_info['segment_duration'] = int(segment_duration)
+                    ms_info['segment_duration'] = float(segment_duration)
 
             def extract_Initialization(source):
                 initialization = source.find(_add_ns('Initialization'))
@@ -1892,9 +1893,13 @@ class InfoExtractor(object):
                                 'Bandwidth': bandwidth,
                             }
 
+                        def location_key(location):
+                            return 'url' if re.match(r'^https?://', location) else 'path'
+
                         if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
 
                             media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+                            media_location_key = location_key(media_template)
 
                             # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
                             # can't be used at the same time
@@ -1904,7 +1909,7 @@ class InfoExtractor(object):
                                     segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
                                     representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                                 representation_ms_info['fragments'] = [{
-                                    'url': media_template % {
+                                    media_location_key: media_template % {
                                         'Number': segment_number,
                                         'Bandwidth': bandwidth,
                                     },
@@ -1928,7 +1933,7 @@ class InfoExtractor(object):
                                         'Number': segment_number,
                                     }
                                     representation_ms_info['fragments'].append({
-                                        'url': segment_url,
+                                        media_location_key: segment_url,
                                         'duration': float_or_none(segment_d, representation_ms_info['timescale']),
                                     })
 
@@ -1952,8 +1957,9 @@ class InfoExtractor(object):
                             for s in representation_ms_info['s']:
                                 duration = float_or_none(s['d'], timescale)
                                 for r in range(s.get('r', 0) + 1):
+                                    segment_uri = representation_ms_info['segment_urls'][segment_index]
                                     fragments.append({
-                                        'url': representation_ms_info['segment_urls'][segment_index],
+                                        location_key(segment_uri): segment_uri,
                                         'duration': duration,
                                     })
                                     segment_index += 1
@@ -1962,6 +1968,7 @@ class InfoExtractor(object):
                         # No fragments key is present in this case.
                         if 'fragments' in representation_ms_info:
                             f.update({
+                                'fragment_base_url': base_url,
                                 'fragments': [],
                                 'protocol': 'http_dash_segments',
                             })
@@ -1969,10 +1976,8 @@ class InfoExtractor(object):
                                 initialization_url = representation_ms_info['initialization_url']
                                 if not f.get('url'):
                                     f['url'] = initialization_url
-                                f['fragments'].append({'url': initialization_url})
+                                f['fragments'].append({location_key(initialization_url): initialization_url})
                             f['fragments'].extend(representation_ms_info['fragments'])
-                            for fragment in f['fragments']:
-                                fragment['url'] = urljoin(base_url, fragment['url'])
                         try:
                             existing_format = next(
                                 fo for fo in formats
@@ -2110,19 +2115,19 @@ class InfoExtractor(object):
                 return f
             return {}
 
-        def _media_formats(src, cur_media_type):
+        def _media_formats(src, cur_media_type, type_info={}):
             full_url = absolute_url(src)
-            ext = determine_ext(full_url)
+            ext = type_info.get('ext') or determine_ext(full_url)
             if ext == 'm3u8':
                 is_plain_url = False
                 formats = self._extract_m3u8_formats(
                     full_url, video_id, ext='mp4',
                     entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
-                    preference=preference)
+                    preference=preference, fatal=False)
             elif ext == 'mpd':
                 is_plain_url = False
                 formats = self._extract_mpd_formats(
-                    full_url, video_id, mpd_id=mpd_id)
+                    full_url, video_id, mpd_id=mpd_id, fatal=False)
             else:
                 is_plain_url = True
                 formats = [{
@@ -2161,9 +2166,9 @@ class InfoExtractor(object):
                     src = source_attributes.get('src')
                     if not src:
                         continue
-                    is_plain_url, formats = _media_formats(src, media_type)
+                    f = parse_content_type(source_attributes.get('type'))
+                    is_plain_url, formats = _media_formats(src, media_type, f)
                     if is_plain_url:
-                        f = parse_content_type(source_attributes.get('type'))
                         f.update(formats[0])
                         media_info['formats'].append(f)
                     else:
index a78cb8a2a57c4a851798e94d6f88ee56bd9df68c..c05f601e2b1a69dcf4d73e090638ec990c6c7331 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class DigitallySpeakingIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
+    _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
 
     _TESTS = [{
         # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
@@ -28,6 +28,10 @@ class DigitallySpeakingIE(InfoExtractor):
         # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
         'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
         'only_matching': True,
+    }, {
+        # From http://www.gdcvault.com/play/1013700/Advanced-Material
+        'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
+        'only_matching': True,
     }]
 
     def _parse_mp4(self, metadata):
index 1a41760f8814ef69a10811ce858f434a8831fd73..76e784105451293705f0057dbfe5035d73bb5c1d 100644 (file)
@@ -7,16 +7,18 @@ import time
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urlparse,
     compat_HTTPError,
+    compat_str,
+    compat_urlparse,
 )
 from ..utils import (
-    USER_AGENTS,
     ExtractorError,
     int_or_none,
-    unified_strdate,
     remove_end,
+    try_get,
+    unified_strdate,
     update_url_query,
+    USER_AGENTS,
 )
 
 
@@ -183,28 +185,44 @@ class DPlayItIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
 
-        info_url = self._search_regex(
-            r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
-            webpage, 'video id')
-
         title = remove_end(self._og_search_title(webpage), ' | Dplay')
 
-        try:
-            info = self._download_json(
-                info_url, display_id, headers={
-                    'Authorization': 'Bearer %s' % self._get_cookies(url).get(
-                        'dplayit_token').value,
-                    'Referer': url,
-                })
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
-                info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
-                error = info['errors'][0]
-                if error.get('code') == 'access.denied.geoblocked':
-                    self.raise_geo_restricted(
-                        msg=error.get('detail'), countries=self._GEO_COUNTRIES)
-                raise ExtractorError(info['errors'][0]['detail'], expected=True)
-            raise
+        video_id = None
+
+        info = self._search_regex(
+            r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
+            webpage, 'playback JSON', default=None)
+        if info:
+            for _ in range(2):
+                info = self._parse_json(info, display_id, fatal=False)
+                if not info:
+                    break
+            else:
+                video_id = try_get(info, lambda x: x['data']['id'])
+
+        if not info:
+            info_url = self._search_regex(
+                r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
+                webpage, 'info url')
+
+            video_id = info_url.rpartition('/')[-1]
+
+            try:
+                info = self._download_json(
+                    info_url, display_id, headers={
+                        'Authorization': 'Bearer %s' % self._get_cookies(url).get(
+                            'dplayit_token').value,
+                        'Referer': url,
+                    })
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
+                    info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
+                    error = info['errors'][0]
+                    if error.get('code') == 'access.denied.geoblocked':
+                        self.raise_geo_restricted(
+                            msg=error.get('detail'), countries=self._GEO_COUNTRIES)
+                    raise ExtractorError(info['errors'][0]['detail'], expected=True)
+                raise
 
         hls_url = info['data']['attributes']['streaming']['hls']['url']
 
@@ -230,7 +248,7 @@ class DPlayItIE(InfoExtractor):
             season_number = episode_number = upload_date = None
 
         return {
-            'id': info_url.rpartition('/')[-1],
+            'id': compat_str(video_id or display_id),
             'display_id': display_id,
             'title': title,
             'description': self._og_search_description(webpage),
index e7abc888988e9807ee46abc4d4e44f8276b9a084..9a498d72ad6f378ef5dd877354871bcccbfb4faf 100644 (file)
@@ -12,6 +12,7 @@ from ..utils import (
     ExtractorError,
     clean_html,
     int_or_none,
+    remove_end,
     sanitized_Request,
     urlencode_postdata
 )
@@ -72,15 +73,15 @@ class DramaFeverIE(DramaFeverBaseIE):
         'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
         'info_dict': {
             'id': '4512.1',
-            'ext': 'mp4',
-            'title': 'Cooking with Shin 4512.1',
+            'ext': 'flv',
+            'title': 'Cooking with Shin',
             'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
             'episode': 'Episode 1',
             'episode_number': 1,
             'thumbnail': r're:^https?://.*\.jpg',
             'timestamp': 1404336058,
             'upload_date': '20140702',
-            'duration': 343,
+            'duration': 344,
         },
         'params': {
             # m3u8 download
@@ -90,15 +91,15 @@ class DramaFeverIE(DramaFeverBaseIE):
         'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
         'info_dict': {
             'id': '4826.4',
-            'ext': 'mp4',
-            'title': 'Mnet Asian Music Awards 2015 4826.4',
+            'ext': 'flv',
+            'title': 'Mnet Asian Music Awards 2015',
             'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
             'episode': 'Mnet Asian Music Awards 2015 - Part 3',
             'episode_number': 4,
             'thumbnail': r're:^https?://.*\.jpg',
             'timestamp': 1450213200,
             'upload_date': '20151215',
-            'duration': 5602,
+            'duration': 5359,
         },
         'params': {
             # m3u8 download
@@ -122,6 +123,10 @@ class DramaFeverIE(DramaFeverBaseIE):
                     countries=self._GEO_COUNTRIES)
             raise
 
+        # title is postfixed with video id for some reason, removing
+        if info.get('title'):
+            info['title'] = remove_end(info['title'], video_id).strip()
+
         series_id, episode_number = video_id.split('.')
         episode_info = self._download_json(
             # We only need a single episode info, so restricting page size to one episode
index c86f52319a46464dc9c2f329ceee6e19f8124c7e..e4a3046af573fec2961ae84a2137a9395bf68f0b 100644 (file)
@@ -2,6 +2,11 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    try_get,
+    unified_timestamp,
+)
 
 
 class EggheadCourseIE(InfoExtractor):
@@ -33,3 +38,47 @@ class EggheadCourseIE(InfoExtractor):
         return self.playlist_result(
             entries, playlist_id, course.get('title'),
             course.get('description'))
+
+
+class EggheadLessonIE(InfoExtractor):
+    IE_DESC = 'egghead.io lesson'
+    IE_NAME = 'egghead:lesson'
+    _VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
+        'info_dict': {
+            'id': 'fv5yotjxcg',
+            'ext': 'mp4',
+            'title': 'Create linear data flow with container style types (Box)',
+            'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
+            'thumbnail': r're:^https?:.*\.jpg$',
+            'timestamp': 1481296768,
+            'upload_date': '20161209',
+            'duration': 304,
+            'view_count': 0,
+            'tags': ['javascript', 'free'],
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        lesson_id = self._match_id(url)
+
+        lesson = self._download_json(
+            'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'Wistia',
+            'url': 'wistia:%s' % lesson['wistia_id'],
+            'id': lesson['wistia_id'],
+            'title': lesson.get('title'),
+            'description': lesson.get('summary'),
+            'thumbnail': lesson.get('thumb_nail'),
+            'timestamp': unified_timestamp(lesson.get('published_at')),
+            'duration': int_or_none(lesson.get('duration')),
+            'view_count': int_or_none(lesson.get('plays_count')),
+            'tags': try_get(lesson, lambda x: x['tag_list'], list),
+        }
index e8a066b837cc67e8a0b064c4856657d8aa2ea04a..17048fd6e5b497a7b1714c236e9247fca873aec8 100644 (file)
@@ -186,8 +186,8 @@ from .chirbit import (
 )
 from .cinchcast import CinchcastIE
 from .cjsw import CJSWIE
-from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
+from .clippit import ClippitIE
 from .cliprs import ClipRsIE
 from .clipsyndicate import ClipsyndicateIE
 from .closertotruth import CloserToTruthIE
@@ -298,7 +298,10 @@ from .dw import (
 from .eagleplatform import EaglePlatformIE
 from .ebaumsworld import EbaumsWorldIE
 from .echomsk import EchoMskIE
-from .egghead import EggheadCourseIE
+from .egghead import (
+    EggheadCourseIE,
+    EggheadLessonIE,
+)
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .einthusan import EinthusanIE
@@ -348,7 +351,12 @@ from .flipagram import FlipagramIE
 from .folketinget import FolketingetIE
 from .footyroom import FootyRoomIE
 from .formula1 import Formula1IE
-from .fourtube import FourTubeIE
+from .fourtube import (
+    FourTubeIE,
+    PornTubeIE,
+    PornerBrosIE,
+    FuxIE,
+)
 from .fox import FOXIE
 from .fox9 import FOX9IE
 from .foxgay import FoxgayIE
@@ -501,6 +509,7 @@ from .la7 import LA7IE
 from .laola1tv import (
     Laola1TvEmbedIE,
     Laola1TvIE,
+    ITTFIE,
 )
 from .lci import LCIIE
 from .lcp import (
@@ -528,7 +537,10 @@ from .limelight import (
     LimelightChannelListIE,
 )
 from .litv import LiTVIE
-from .liveleak import LiveLeakIE
+from .liveleak import (
+    LiveLeakIE,
+    LiveLeakEmbedIE,
+)
 from .livestream import (
     LivestreamIE,
     LivestreamOriginalIE,
@@ -555,6 +567,7 @@ from .matchtv import MatchTVIE
 from .mdr import MDRIE
 from .mediaset import MediasetIE
 from .medici import MediciIE
+from .megaphone import MegaphoneIE
 from .meipai import MeipaiIE
 from .melonvod import MelonVODIE
 from .meta import METAIE
@@ -581,7 +594,6 @@ from .mixcloud import (
 )
 from .mlb import MLBIE
 from .mnet import MnetIE
-from .mpora import MporaIE
 from .moevideo import MoeVideoIE
 from .mofosex import MofosexIE
 from .mojvideo import MojvideoIE
@@ -670,6 +682,7 @@ from .nick import (
     NickIE,
     NickDeIE,
     NickNightIE,
+    NickRuIE,
 )
 from .niconico import NiconicoIE, NiconicoPlaylistIE
 from .ninecninemedia import (
@@ -837,6 +850,10 @@ from .rai import (
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import RedBullTVIE
+from .reddit import (
+    RedditIE,
+    RedditRIE,
+)
 from .redtube import RedTubeIE
 from .regiotv import RegioTVIE
 from .rentv import (
@@ -930,8 +947,9 @@ from .soundcloud import (
     SoundcloudIE,
     SoundcloudSetIE,
     SoundcloudUserIE,
+    SoundcloudTrackStationIE,
     SoundcloudPlaylistIE,
-    SoundcloudSearchIE
+    SoundcloudSearchIE,
 )
 from .soundgasm import (
     SoundgasmIE,
@@ -989,7 +1007,6 @@ from .teachertube import (
 )
 from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
-from .teamfourstar import TeamFourStarIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tele13 import Tele13IE
@@ -1218,6 +1235,7 @@ from .vodlocker import VodlockerIE
 from .vodpl import VODPlIE
 from .vodplatform import VODPlatformIE
 from .voicerepublic import VoiceRepublicIE
+from .voot import VootIE
 from .voxmedia import VoxMediaIE
 from .vporn import VpornIE
 from .vrt import VRTIE
@@ -1239,6 +1257,7 @@ from .washingtonpost import (
     WashingtonPostArticleIE,
 )
 from .wat import WatIE
+from .watchbox import WatchBoxIE
 from .watchindianporn import WatchIndianPornIE
 from .wdr import (
     WDRIE,
@@ -1293,6 +1312,7 @@ from .yandexmusic import (
     YandexMusicAlbumIE,
     YandexMusicPlaylistIE,
 )
+from .yandexdisk import YandexDiskIE
 from .yesjapan import YesJapanIE
 from .yinyuetai import YinYueTaiIE
 from .ynet import YnetIE
index e3fd08bcfb6610b3394d2309e45f743e8562c97b..ad273a0e70c3fbd9087779d33829b817d9d70127 100644 (file)
@@ -3,39 +3,22 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
     parse_duration,
     parse_iso8601,
-    sanitized_Request,
     str_to_int,
 )
 
 
-class FourTubeIE(InfoExtractor):
-    IE_NAME = '4tube'
-    _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)'
+class FourTubeBaseIE(InfoExtractor):
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
 
-    _TEST = {
-        'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
-        'md5': '6516c8ac63b03de06bc8eac14362db4f',
-        'info_dict': {
-            'id': '209733',
-            'ext': 'mp4',
-            'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
-            'uploader': 'WCP Club',
-            'uploader_id': 'wcp-club',
-            'upload_date': '20131031',
-            'timestamp': 1383263892,
-            'duration': 583,
-            'view_count': int,
-            'like_count': int,
-            'categories': list,
-            'age_limit': 18,
-        }
-    }
+        if kind == 'm' or not display_id:
+            url = self._URL_TEMPLATE % video_id
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_meta('name', webpage)
@@ -43,10 +26,10 @@ class FourTubeIE(InfoExtractor):
             'uploadDate', webpage))
         thumbnail = self._html_search_meta('thumbnailUrl', webpage)
         uploader_id = self._html_search_regex(
-            r'<a class="item-to-subscribe" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
+            r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/([^/"]+)" title="Go to [^"]+ page">',
             webpage, 'uploader id', fatal=False)
         uploader = self._html_search_regex(
-            r'<a class="item-to-subscribe" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
+            r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/[^/"]+" title="Go to ([^"]+) page">',
             webpage, 'uploader', fatal=False)
 
         categories_html = self._search_regex(
@@ -60,10 +43,10 @@ class FourTubeIE(InfoExtractor):
 
         view_count = str_to_int(self._search_regex(
             r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
-            webpage, 'view count', fatal=False))
+            webpage, 'view count', default=None))
         like_count = str_to_int(self._search_regex(
             r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
-            webpage, 'like count', fatal=False))
+            webpage, 'like count', default=None))
         duration = parse_duration(self._html_search_meta('duration', webpage))
 
         media_id = self._search_regex(
@@ -87,12 +70,12 @@ class FourTubeIE(InfoExtractor):
 
         token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
             media_id, '+'.join(sources))
-        headers = {
-            b'Content-Type': b'application/x-www-form-urlencoded',
-            b'Origin': b'https://www.4tube.com',
-        }
-        token_req = sanitized_Request(token_url, b'{}', headers)
-        tokens = self._download_json(token_req, video_id)
+
+        parsed_url = compat_urlparse.urlparse(url)
+        tokens = self._download_json(token_url, video_id, data=b'', headers={
+            'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
+            'Referer': url,
+        })
         formats = [{
             'url': tokens[format]['token'],
             'format_id': format + 'p',
@@ -115,3 +98,126 @@ class FourTubeIE(InfoExtractor):
             'duration': duration,
             'age_limit': 18,
         }
+
+
+class FourTubeIE(FourTubeBaseIE):
+    IE_NAME = '4tube'
+    _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+    _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video'
+    _TESTS = [{
+        'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
+        'md5': '6516c8ac63b03de06bc8eac14362db4f',
+        'info_dict': {
+            'id': '209733',
+            'ext': 'mp4',
+            'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
+            'uploader': 'WCP Club',
+            'uploader_id': 'wcp-club',
+            'upload_date': '20131031',
+            'timestamp': 1383263892,
+            'duration': 583,
+            'view_count': int,
+            'like_count': int,
+            'categories': list,
+            'age_limit': 18,
+        },
+    }, {
+        'url': 'http://www.4tube.com/embed/209733',
+        'only_matching': True,
+    }, {
+        'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
+        'only_matching': True,
+    }]
+
+
+class FuxIE(FourTubeBaseIE):
+    _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+    _URL_TEMPLATE = 'https://www.fux.com/video/%s/video'
+    _TESTS = [{
+        'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
+        'info_dict': {
+            'id': '195359',
+            'ext': 'mp4',
+            'title': 'Awesome fucking in the kitchen ends with cum swallow',
+            'uploader': 'alenci2342',
+            'uploader_id': 'alenci2342',
+            'upload_date': '20131230',
+            'timestamp': 1388361660,
+            'duration': 289,
+            'view_count': int,
+            'like_count': int,
+            'categories': list,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.fux.com/embed/195359',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
+        'only_matching': True,
+    }]
+
+
+class PornTubeIE(FourTubeBaseIE):
+    _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
+    _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
+    _TESTS = [{
+        'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
+        'info_dict': {
+            'id': '7089759',
+            'ext': 'mp4',
+            'title': 'Teen couple doing anal',
+            'uploader': 'Alexy',
+            'uploader_id': 'Alexy',
+            'upload_date': '20150606',
+            'timestamp': 1433595647,
+            'duration': 5052,
+            'view_count': int,
+            'like_count': int,
+            'categories': list,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.porntube.com/embed/7089759',
+        'only_matching': True,
+    }, {
+        'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759',
+        'only_matching': True,
+    }]
+
+
+class PornerBrosIE(FourTubeBaseIE):
+    _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
+    _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s'
+    _TESTS = [{
+        'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
+        'md5': '6516c8ac63b03de06bc8eac14362db4f',
+        'info_dict': {
+            'id': '181369',
+            'ext': 'mp4',
+            'title': 'Skinny brunette takes big cock down her anal hole',
+            'uploader': 'PornerBros HD',
+            'uploader_id': 'pornerbros-hd',
+            'upload_date': '20130130',
+            'timestamp': 1359527401,
+            'duration': 1224,
+            'view_count': int,
+            'categories': list,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.pornerbros.com/embed/181369',
+        'only_matching': True,
+    }, {
+        'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
+        'only_matching': True,
+    }]
index 49409369cc5e72e626b5fbe64f8a346d9695f802..f85e7de1496b07848e19b491b7ef5312652a0d7c 100644 (file)
@@ -1,10 +1,14 @@
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    unified_timestamp,
+)
 
 
 class FunnyOrDieIE(InfoExtractor):
@@ -18,6 +22,10 @@ class FunnyOrDieIE(InfoExtractor):
             'title': 'Heart-Shaped Box: Literal Video Version',
             'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
             'thumbnail': r're:^http:.*\.jpg$',
+            'uploader': 'DASjr',
+            'timestamp': 1317904928,
+            'upload_date': '20111006',
+            'duration': 318.3,
         },
     }, {
         'url': 'http://www.funnyordie.com/embed/e402820827',
@@ -27,6 +35,8 @@ class FunnyOrDieIE(InfoExtractor):
             'title': 'Please Use This Song (Jon Lajoie)',
             'description': 'Please use this to sell something.  www.jonlajoie.com',
             'thumbnail': r're:^http:.*\.jpg$',
+            'timestamp': 1398988800,
+            'upload_date': '20140502',
         },
         'params': {
             'skip_download': True,
@@ -100,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor):
                 'url': 'http://www.funnyordie.com%s' % src,
             }]
 
-        post_json = self._search_regex(
-            r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
-        post = json.loads(post_json)
+        timestamp = unified_timestamp(self._html_search_meta(
+            'uploadDate', webpage, 'timestamp', default=None))
+
+        uploader = self._html_search_regex(
+            r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
+            webpage, 'uploader', default=None)
+
+        title, description, thumbnail, duration = [None] * 4
+
+        medium = self._parse_json(
+            self._search_regex(
+                r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
+                default='{}'),
+            video_id, fatal=False)
+        if medium:
+            title = medium.get('title')
+            duration = float_or_none(medium.get('duration'))
+            if not timestamp:
+                timestamp = unified_timestamp(medium.get('publishDate'))
+
+        post = self._parse_json(
+            self._search_regex(
+                r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
+                default='{}'),
+            video_id, fatal=False)
+        if post:
+            if not title:
+                title = post.get('name')
+            description = post.get('description')
+            thumbnail = post.get('picture')
+
+        if not title:
+            title = self._og_search_title(webpage)
+        if not description:
+            description = self._og_search_description(webpage)
+        if not duration:
+            duration = int_or_none(self._html_search_meta(
+                ('video:duration', 'duration'), webpage, 'duration', default=False))
 
         return {
             'id': video_id,
-            'title': post['name'],
-            'description': post.get('description'),
-            'thumbnail': post.get('picture'),
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'timestamp': timestamp,
+            'duration': duration,
             'formats': formats,
             'subtitles': subtitles,
         }
index 0ab2ef2d635743a4dfed477a1fc39e9533dd5b48..49b00b87ee86bc31eec17a2a8712abae01491f44 100644 (file)
@@ -97,6 +97,8 @@ from .washingtonpost import WashingtonPostIE
 from .wistia import WistiaIE
 from .mediaset import MediasetIE
 from .joj import JojIE
+from .megaphone import MegaphoneIE
+from .vzaar import VzaarIE
 
 
 class GenericIE(InfoExtractor):
@@ -574,6 +576,19 @@ class GenericIE(InfoExtractor):
             },
             'skip': 'movie expired',
         },
+        # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
+        {
+            'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
+            'info_dict': {
+                'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
+                'ext': 'mp4',
+                'title': 'Steampunk Fest Comes to Honesdale',
+                'duration': 43.276,
+            },
+            'params': {
+                'skip_download': True,
+            }
+        },
         # embed.ly video
         {
             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@@ -1504,14 +1519,27 @@ class GenericIE(InfoExtractor):
         # LiveLeak embed
         {
             'url': 'http://www.wykop.pl/link/3088787/',
-            'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
+            'md5': '7619da8c820e835bef21a1efa2a0fc71',
             'info_dict': {
                 'id': '874_1459135191',
                 'ext': 'mp4',
                 'title': 'Man shows poor quality of new apartment building',
                 'description': 'The wall is like a sand pile.',
                 'uploader': 'Lake8737',
-            }
+            },
+            'add_ie': [LiveLeakIE.ie_key()],
+        },
+        # Another LiveLeak embed pattern (#13336)
+        {
+            'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
+            'info_dict': {
+                'id': '2eb_1496309988',
+                'ext': 'mp4',
+                'title': 'Thief robs place where everyone was armed',
+                'description': 'md5:694d73ee79e535953cf2488562288eee',
+                'uploader': 'brazilwtf',
+            },
+            'add_ie': [LiveLeakIE.ie_key()],
         },
         # Duplicated embedded video URLs
         {
@@ -1569,27 +1597,6 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
-        # Nexx iFrame embed
-        {
-            'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
-            'info_dict': {
-                'id': '161464',
-                'ext': 'mp4',
-                'title': 'Nervenkitzel Achterbahn',
-                'alt_title': 'Karussellbauer in Deutschland',
-                'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
-                'release_year': 2005,
-                'creator': 'SPIEGEL TV',
-                'thumbnail': r're:^https?://.*\.jpg$',
-                'duration': 2761,
-                'timestamp': 1394021479,
-                'upload_date': '20140305',
-            },
-            'params': {
-                'format': 'bestvideo',
-                'skip_download': True,
-            },
-        },
         # Facebook <iframe> embed
         {
             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
@@ -1791,6 +1798,21 @@ class GenericIE(InfoExtractor):
             },
             'playlist_mincount': 5,
         },
+        {
+            # Limelight embed (LimelightPlayerUtil.embed)
+            'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
+            'info_dict': {
+                'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
+                'ext': 'mp4',
+                'title': '07448641',
+                'timestamp': 1499890639,
+                'upload_date': '20170712',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': ['LimelightMedia'],
+        },
         {
             'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
             'info_dict': {
@@ -1847,6 +1869,16 @@ class GenericIE(InfoExtractor):
                 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
             },
         },
+        {
+            # vzaar embed
+            'url': 'http://help.vzaar.com/article/165-embedding-video',
+            'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
+            'info_dict': {
+                'id': '8707641',
+                'ext': 'mp4',
+                'title': 'Building A Business Online: Principal Chairs Q & A',
+            },
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
@@ -1996,7 +2028,7 @@ class GenericIE(InfoExtractor):
 
         if head_response is not False:
             # Check for redirect
-            new_url = head_response.geturl()
+            new_url = compat_str(head_response.geturl())
             if url != new_url:
                 self.report_following_redirect(new_url)
                 if force_videoid:
@@ -2097,7 +2129,7 @@ class GenericIE(InfoExtractor):
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                 info_dict['formats'] = self._parse_mpd_formats(
                     doc, video_id,
-                    mpd_base_url=full_response.geturl().rpartition('/')[0],
+                    mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
                     mpd_url=url)
                 self._sort_formats(info_dict['formats'])
                 return info_dict
@@ -2313,6 +2345,7 @@ class GenericIE(InfoExtractor):
         # Look for Ooyala videos
         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
+                re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
         if mobj is not None:
@@ -2737,9 +2770,9 @@ class GenericIE(InfoExtractor):
                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
 
         # Look for LiveLeak embeds
-        liveleak_url = LiveLeakIE._extract_url(webpage)
-        if liveleak_url:
-            return self.url_result(liveleak_url, 'LiveLeak')
+        liveleak_urls = LiveLeakIE._extract_urls(webpage)
+        if liveleak_urls:
+            return self.playlist_from_matches(liveleak_urls, video_id, video_title)
 
         # Look for 3Q SDN embeds
         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
@@ -2811,6 +2844,18 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 joj_urls, video_id, video_title, ie=JojIE.ie_key())
 
+        # Look for megaphone.fm embeds
+        mpfn_urls = MegaphoneIE._extract_urls(webpage)
+        if mpfn_urls:
+            return self.playlist_from_matches(
+                mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
+
+        # Look for vzaar embeds
+        vzaar_urls = VzaarIE._extract_urls(webpage)
+        if vzaar_urls:
+            return self.playlist_from_matches(
+                vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
+
         def merge_dicts(dict1, dict2):
             merged = {}
             for k, v in dict1.items():
index f3156804d8a072509045563017046dc6d670918e..26c48e4b889545f4e2ffacccfdb859678c4bf2c3 100644 (file)
@@ -59,12 +59,18 @@ class ITVIE(InfoExtractor):
         def _add_sub_element(element, name):
             return etree.SubElement(element, _add_ns(name))
 
+        production_id = (
+            params.get('data-video-autoplay-id') or
+            '%s#001' % (
+                params.get('data-video-episode-id') or
+                video_id.replace('a', '/')))
+
         req_env = etree.Element(_add_ns('soapenv:Envelope'))
         _add_sub_element(req_env, 'soapenv:Header')
         body = _add_sub_element(req_env, 'soapenv:Body')
         get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
         request = _add_sub_element(get_playlist, 'tem:request')
-        _add_sub_element(request, 'itv:ProductionId').text = params['data-video-id']
+        _add_sub_element(request, 'itv:ProductionId').text = production_id
         _add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
         vodcrid = _add_sub_element(request, 'itv:Vodcrid')
         _add_sub_element(vodcrid, 'com:Id')
index 1f91ba0173429aa8b86969b9dc40e45695213e6d..c7f813370162bb8582db8abd9a3f49e3e4bb1bc1 100644 (file)
@@ -215,3 +215,21 @@ class Laola1TvIE(Laola1TvEmbedIE):
             'formats': formats,
             'is_live': is_live,
         }
+
+
+class ITTFIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        return self.url_result(
+            update_url_query('https://www.laola1.tv/titanplayer.php', {
+                'videoid': self._match_id(url),
+                'type': 'V',
+                'lang': 'en',
+                'portal': 'int',
+                'customer': 1024,
+            }), Laola1TvEmbedIE.ie_key())
index 0a5a3956c66827a745daa70ed3b33e61ea676d64..ad65b2759d7245d2129e238f703a8e66a6cb333c 100644 (file)
@@ -26,14 +26,16 @@ class LimelightBaseIE(InfoExtractor):
             'Channel': 'channel',
             'ChannelList': 'channel_list',
         }
+
+        def smuggle(url):
+            return smuggle_url(url, {'source_url': source_url})
+
         entries = []
         for kind, video_id in re.findall(
                 r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
                 webpage):
             entries.append(cls.url_result(
-                smuggle_url(
-                    'limelight:%s:%s' % (lm[kind], video_id),
-                    {'source_url': source_url}),
+                smuggle('limelight:%s:%s' % (lm[kind], video_id)),
                 'Limelight%s' % kind, video_id))
         for mobj in re.finditer(
                 # As per [1] class attribute should be exactly equal to
@@ -49,10 +51,15 @@ class LimelightBaseIE(InfoExtractor):
                 ''', webpage):
             kind, video_id = mobj.group('kind'), mobj.group('id')
             entries.append(cls.url_result(
-                smuggle_url(
-                    'limelight:%s:%s' % (kind, video_id),
-                    {'source_url': source_url}),
+                smuggle('limelight:%s:%s' % (kind, video_id)),
                 'Limelight%s' % kind.capitalize(), video_id))
+        # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
+        for video_id in re.findall(
+                r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
+                webpage):
+            entries.append(cls.url_result(
+                smuggle('limelight:media:%s' % video_id),
+                LimelightMediaIE.ie_key(), video_id))
         return entries
 
     def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
index b2247a84d625c76482e140b8f891f8512670a3c5..246aac576a2c6b275ed38ef614eb7a569b9e2ffb 100644 (file)
@@ -72,15 +72,20 @@ class LiveLeakIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        'url': 'https://www.liveleak.com/view?i=677_1439397581',
+        'info_dict': {
+            'id': '677_1439397581',
+            'title': 'Fuel Depot in China Explosion caught on video',
+        },
+        'playlist_count': 3,
     }]
 
     @staticmethod
-    def _extract_url(webpage):
-        mobj = re.search(
-            r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
             webpage)
-        if mobj:
-            return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -111,23 +116,54 @@ class LiveLeakIE(InfoExtractor):
                 'age_limit': age_limit,
             }
 
-        info_dict = entries[0]
+        for idx, info_dict in enumerate(entries):
+            for a_format in info_dict['formats']:
+                if not a_format.get('height'):
+                    a_format['height'] = int_or_none(self._search_regex(
+                        r'([0-9]+)p\.mp4', a_format['url'], 'height label',
+                        default=None))
+
+            self._sort_formats(info_dict['formats'])
+
+            # Don't append entry ID for one-video pages to keep backward compatibility
+            if len(entries) > 1:
+                info_dict['id'] = '%s_%s' % (video_id, idx + 1)
+            else:
+                info_dict['id'] = video_id
 
-        for a_format in info_dict['formats']:
-            if not a_format.get('height'):
-                a_format['height'] = int_or_none(self._search_regex(
-                    r'([0-9]+)p\.mp4', a_format['url'], 'height label',
-                    default=None))
+            info_dict.update({
+                'title': video_title,
+                'description': video_description,
+                'uploader': video_uploader,
+                'age_limit': age_limit,
+                'thumbnail': video_thumbnail,
+            })
 
-        self._sort_formats(info_dict['formats'])
+        return self.playlist_result(entries, video_id, video_title)
+
+
+class LiveLeakEmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
+
+    # See generic.py for actual test cases
+    _TESTS = [{
+        'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        kind, video_id = mobj.group('kind', 'id')
 
-        info_dict.update({
-            'id': video_id,
-            'title': video_title,
-            'description': video_description,
-            'uploader': video_uploader,
-            'age_limit': age_limit,
-            'thumbnail': video_thumbnail,
-        })
+        if kind == 'f':
+            webpage = self._download_webpage(url, video_id)
+            liveleak_url = self._search_regex(
+                r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
+                webpage, 'LiveLeak URL', group='url')
+        elif kind == 'i':
+            liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
 
-        return info_dict
+        return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
diff --git a/youtube_dl/extractor/megaphone.py b/youtube_dl/extractor/megaphone.py
new file mode 100644 (file)
index 0000000..60e3caf
--- /dev/null
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class MegaphoneIE(InfoExtractor):
+    IE_NAME = 'megaphone.fm'
+    IE_DESC = 'megaphone.fm embedded players'
+    _VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
+    _TEST = {
+        'url': 'https://player.megaphone.fm/GLT9749789991?"',
+        'md5': '4816a0de523eb3e972dc0dda2c191f96',
+        'info_dict': {
+            'id': 'GLT9749789991',
+            'ext': 'mp3',
+            'title': '#97 What Kind Of Idiot Gets Phished?',
+            'thumbnail': 're:^https://.*\.png.*$',
+            'duration': 1776.26375,
+            'author': 'Reply All',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_property('audio:title', webpage)
+        author = self._og_search_property('audio:artist', webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON')
+        episode_data = self._parse_json(episode_json, video_id, js_to_json)
+        video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:')
+
+        formats = [{
+            'url': video_url,
+        }]
+
+        return {
+            'id': video_id,
+            'thumbnail': thumbnail,
+            'title': title,
+            'author': author,
+            'duration': episode_data['duration'],
+            'formats': formats,
+        }
+
+    @classmethod
+    def _extract_urls(cls, webpage):
+        return [m[0] for m in re.findall(
+            r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)]
index 0efbe660a5d2a88b41e198078da84baf8bfc7a9e..798968ae3dd8a7c16ede20eb7e328c848e61b8b9 100644 (file)
@@ -9,6 +9,7 @@ from .common import InfoExtractor
 from ..compat import (
     compat_chr,
     compat_ord,
+    compat_str,
     compat_urllib_parse_unquote,
     compat_urlparse,
 )
@@ -53,16 +54,27 @@ class MixcloudIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
-    @staticmethod
-    def _decrypt_play_info(play_info):
-        KEY = 'pleasedontdownloadourmusictheartistswontgetpaid'
+    _keys = [
+        'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };',
+        'pleasedontdownloadourmusictheartistswontgetpaid',
+        'window.addEventListener = window.addEventListener || function() {};',
+        '(function() { return new Date().toLocaleDateString(); })()'
+    ]
+    _current_key = None
 
+    # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
+    def _decrypt_play_info(self, play_info, video_id):
         play_info = base64.b64decode(play_info.encode('ascii'))
-
-        return ''.join([
-            compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)]))
-            for idx, ch in enumerate(play_info)])
+        for num, key in enumerate(self._keys, start=1):
+            try:
+                return self._parse_json(
+                    ''.join([
+                        compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)]))
+                        for idx, ch in enumerate(play_info)]),
+                    video_id)
+            except ExtractorError:
+                if num == len(self._keys):
+                    raise
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -72,14 +84,30 @@ class MixcloudIE(InfoExtractor):
 
         webpage = self._download_webpage(url, track_id)
 
+        if not self._current_key:
+            js_url = self._search_regex(
+                r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)',
+                webpage, 'js url', default=None)
+            if js_url:
+                js = self._download_webpage(js_url, track_id, fatal=False)
+                if js:
+                    KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
+                    for key_name in ('value', 'key_value'):
+                        key = self._search_regex(
+                            KEY_RE_TEMPLATE % key_name, js, 'key',
+                            default=None, group='key')
+                        if key and isinstance(key, compat_str):
+                            self._keys.insert(0, key)
+                            self._current_key = key
+
         message = self._html_search_regex(
             r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
             webpage, 'error message', default=None)
 
         encrypted_play_info = self._search_regex(
             r'm-play-info="([^"]+)"', webpage, 'play info')
-        play_info = self._parse_json(
-            self._decrypt_play_info(encrypted_play_info), track_id)
+
+        play_info = self._decrypt_play_info(encrypted_play_info, track_id)
 
         if message and 'stream_url' not in play_info:
             raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
index 59cd4b8389f28a72f9d16df70edfa64a7ce2ba40..675ff687374a9a94928f3a899ffdb4a45b1b743c 100644 (file)
@@ -15,7 +15,7 @@ class MLBIE(InfoExtractor):
                         (?:[\da-z_-]+\.)*mlb\.com/
                         (?:
                             (?:
-                                (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
+                                (?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
                                 (?:
                                     shared/video/embed/(?:embed|m-internal-embed)\.html|
                                     (?:[^/]+/)+(?:play|index)\.jsp|
@@ -84,7 +84,7 @@ class MLBIE(InfoExtractor):
         },
         {
             'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
-            'md5': 'b190e70141fb9a1552a85426b4da1b5d',
+            'md5': 'aafaf5b0186fee8f32f20508092f8111',
             'info_dict': {
                 'id': '75609783',
                 'ext': 'mp4',
@@ -94,6 +94,10 @@ class MLBIE(InfoExtractor):
                 'upload_date': '20150415',
             }
         },
+        {
+            'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
+            'only_matching': True,
+        },
         {
             'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
             'only_matching': True,
diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py
deleted file mode 100644 (file)
index 5a1bee5..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class MporaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
-    IE_NAME = 'MPORA'
-
-    _TEST = {
-        'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
-        'md5': 'a7a228473eedd3be741397cf452932eb',
-        'info_dict': {
-            'id': 'AAdo8okx4wiz',
-            'ext': 'mp4',
-            'title': 'Katy Curd -  Winter in the Forest',
-            'duration': 416,
-            'uploader': 'Peter Newman Media',
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        data_json = self._search_regex(
-            [r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
-             r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
-            webpage, 'json')
-        data = self._parse_json(data_json, video_id)
-
-        uploader = data['info_overlay'].get('username')
-        duration = data['video']['duration'] // 1000
-        thumbnail = data['video']['encodings']['sd']['poster']
-        title = data['info_overlay']['title']
-
-        formats = []
-        for encoding_id, edata in data['video']['encodings'].items():
-            for src in edata['sources']:
-                width_str = self._search_regex(
-                    r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
-                    False, default=None)
-                vcodec = src['type'].partition('/')[2]
-
-                formats.append({
-                    'format_id': encoding_id + '-' + vcodec,
-                    'url': src['src'],
-                    'vcodec': vcodec,
-                    'width': int_or_none(width_str),
-                })
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'uploader': uploader,
-            'duration': duration,
-            'thumbnail': thumbnail,
-        }
index 8acea1461a662dc40840526c4efabcbe7a7c29b0..25af5ddfda4765132fec413caca9a09fc2ba2bb9 100644 (file)
@@ -50,8 +50,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         thumb_node = itemdoc.find(search_path)
         if thumb_node is None:
             return None
-        else:
-            return thumb_node.attrib['url']
+        return thumb_node.get('url') or thumb_node.text or None
 
     def _extract_mobile_video_formats(self, mtvn_id):
         webpage_url = self._MOBILE_TEMPLATE % mtvn_id
@@ -83,7 +82,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
                 hls_url = rendition.find('./src').text
                 formats.extend(self._extract_m3u8_formats(
                     hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls'))
+                    m3u8_id='hls', fatal=False))
             else:
                 # fms
                 try:
@@ -106,7 +105,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
                     }])
                 except (KeyError, TypeError):
                     raise ExtractorError('Invalid rendition field.')
-        self._sort_formats(formats)
+        if formats:
+            self._sort_formats(formats)
         return formats
 
     def _extract_subtitles(self, mdoc, mtvn_id):
@@ -133,8 +133,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
             mediagen_url += 'acceptMethods='
             mediagen_url += 'hls' if use_hls else 'fms'
 
-        mediagen_doc = self._download_xml(mediagen_url, video_id,
-                                          'Downloading video urls')
+        mediagen_doc = self._download_xml(
+            mediagen_url, video_id, 'Downloading video urls', fatal=False)
+
+        if mediagen_doc is False:
+            return None
 
         item = mediagen_doc.find('./video/item')
         if item is not None and item.get('type') == 'text':
@@ -174,6 +177,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
 
         formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
 
+        # Some parts of complete video may be missing (e.g. missing Act 3 in
+        # http://www.southpark.de/alle-episoden/s14e01-sexual-healing)
+        if not formats:
+            return None
+
+        self._sort_formats(formats)
+
         return {
             'title': title,
             'formats': formats,
@@ -205,9 +215,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
         title = xpath_text(idoc, './channel/title')
         description = xpath_text(idoc, './channel/description')
 
+        entries = []
+        for item in idoc.findall('.//item'):
+            info = self._get_video_info(item, use_hls)
+            if info:
+                entries.append(info)
+
         return self.playlist_result(
-            [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')],
-            playlist_title=title, playlist_description=description)
+            entries, playlist_title=title, playlist_description=description)
 
     def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
         triforce_feed = self._parse_json(self._search_regex(
index 08a75929e1e049249759f94e6179cfa8932ba87c..510b1c41fd42dd3f77214fd804e9962a78e075af 100644 (file)
@@ -12,6 +12,7 @@ class NickIE(MTVServicesInfoExtractor):
     IE_NAME = 'nick.com'
     _VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
     _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
+    _GEO_COUNTRIES = ['US']
     _TESTS = [{
         'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
         'playlist': [
@@ -74,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor):
 
 class NickDeIE(MTVServicesInfoExtractor):
     IE_NAME = 'nick.de'
-    _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
         'only_matching': True,
@@ -87,6 +88,9 @@ class NickDeIE(MTVServicesInfoExtractor):
     }, {
         'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
+        'only_matching': True,
     }]
 
     def _extract_mrss_url(self, webpage, host):
@@ -124,3 +128,21 @@ class NickNightIE(NickDeIE):
         return self._search_regex(
             r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage,
             'mrss url', group='url')
+
+
+class NickRuIE(MTVServicesInfoExtractor):
+    IE_NAME = 'nickelodeonru'
+    _VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        mgid = self._extract_mgid(webpage)
+        return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
index 695e32e59b84318f82eb90dcaf967315c4790aa1..026329d3ea4210e2d17af374b67c4a87af252ee1 100644 (file)
@@ -11,10 +11,15 @@ from ..compat import (
 )
 from ..utils import (
     determine_ext,
+    dict_get,
     ExtractorError,
     int_or_none,
+    float_or_none,
     parse_duration,
     parse_iso8601,
+    remove_start,
+    try_get,
+    unified_timestamp,
     urlencode_postdata,
     xpath_text,
 )
@@ -31,12 +36,15 @@ class NiconicoIE(InfoExtractor):
             'id': 'sm22312215',
             'ext': 'mp4',
             'title': 'Big Buck Bunny',
+            'thumbnail': r're:https?://.*',
             'uploader': 'takuya0301',
             'uploader_id': '2698420',
             'upload_date': '20131123',
             'timestamp': 1385182762,
             'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
             'duration': 33,
+            'view_count': int,
+            'comment_count': int,
         },
         'skip': 'Requires an account',
     }, {
@@ -48,6 +56,7 @@ class NiconicoIE(InfoExtractor):
             'ext': 'swf',
             'title': '【鏡音リン】Dance on media【オリジナル】take2!',
             'description': 'md5:689f066d74610b3b22e0f1739add0f58',
+            'thumbnail': r're:https?://.*',
             'uploader': 'りょうた',
             'uploader_id': '18822557',
             'upload_date': '20110429',
@@ -64,9 +73,11 @@ class NiconicoIE(InfoExtractor):
             'ext': 'unknown_video',
             'description': 'deleted',
             'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
+            'thumbnail': r're:https?://.*',
             'upload_date': '20071224',
             'timestamp': int,  # timestamp field has different value if logged in
             'duration': 304,
+            'view_count': int,
         },
         'skip': 'Requires an account',
     }, {
@@ -76,12 +87,51 @@ class NiconicoIE(InfoExtractor):
             'ext': 'mp4',
             'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
             'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
+            'thumbnail': r're:https?://.*',
             'timestamp': 1388851200,
             'upload_date': '20140104',
             'uploader': 'アニメロチャンネル',
             'uploader_id': '312',
         },
         'skip': 'The viewing period of the video you were searching for has expired.',
+    }, {
+        # video not available via `getflv`; "old" HTML5 video
+        'url': 'http://www.nicovideo.jp/watch/sm1151009',
+        'md5': '8fa81c364eb619d4085354eab075598a',
+        'info_dict': {
+            'id': 'sm1151009',
+            'ext': 'mp4',
+            'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
+            'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
+            'thumbnail': r're:https?://.*',
+            'duration': 184,
+            'timestamp': 1190868283,
+            'upload_date': '20070927',
+            'uploader': 'denden2',
+            'uploader_id': '1392194',
+            'view_count': int,
+            'comment_count': int,
+        },
+        'skip': 'Requires an account',
+    }, {
+        # "New" HTML5 video
+        'url': 'http://www.nicovideo.jp/watch/sm31464864',
+        'md5': '351647b4917660986dc0fa8864085135',
+        'info_dict': {
+            'id': 'sm31464864',
+            'ext': 'mp4',
+            'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
+            'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
+            'timestamp': 1498514060,
+            'upload_date': '20170626',
+            'uploader': 'ゲス',
+            'uploader_id': '40826363',
+            'thumbnail': r're:https?://.*',
+            'duration': 198,
+            'view_count': int,
+            'comment_count': int,
+        },
+        'skip': 'Requires an account',
     }, {
         'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
         'only_matching': True,
@@ -119,6 +169,84 @@ class NiconicoIE(InfoExtractor):
             self._downloader.report_warning('unable to log in: bad username or password')
         return login_ok
 
+    def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
+        def yesno(boolean):
+            return 'yes' if boolean else 'no'
+
+        session_api_data = api_data['video']['dmcInfo']['session_api']
+        session_api_endpoint = session_api_data['urls'][0]
+
+        format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+
+        session_response = self._download_json(
+            session_api_endpoint['url'], video_id,
+            query={'_format': 'json'},
+            headers={'Content-Type': 'application/json'},
+            note='Downloading JSON metadata for %s' % format_id,
+            data=json.dumps({
+                'session': {
+                    'client_info': {
+                        'player_id': session_api_data['player_id'],
+                    },
+                    'content_auth': {
+                        'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
+                        'content_key_timeout': session_api_data['content_key_timeout'],
+                        'service_id': 'nicovideo',
+                        'service_user_id': session_api_data['service_user_id']
+                    },
+                    'content_id': session_api_data['content_id'],
+                    'content_src_id_sets': [{
+                        'content_src_ids': [{
+                            'src_id_to_mux': {
+                                'audio_src_ids': [audio_quality['id']],
+                                'video_src_ids': [video_quality['id']],
+                            }
+                        }]
+                    }],
+                    'content_type': 'movie',
+                    'content_uri': '',
+                    'keep_method': {
+                        'heartbeat': {
+                            'lifetime': session_api_data['heartbeat_lifetime']
+                        }
+                    },
+                    'priority': session_api_data['priority'],
+                    'protocol': {
+                        'name': 'http',
+                        'parameters': {
+                            'http_parameters': {
+                                'parameters': {
+                                    'http_output_download_parameters': {
+                                        'use_ssl': yesno(session_api_endpoint['is_ssl']),
+                                        'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    'recipe_id': session_api_data['recipe_id'],
+                    'session_operation_auth': {
+                        'session_operation_auth_by_signature': {
+                            'signature': session_api_data['signature'],
+                            'token': session_api_data['token'],
+                        }
+                    },
+                    'timing_constraint': 'unlimited'
+                }
+            }))
+
+        resolution = video_quality.get('resolution', {})
+
+        return {
+            'url': session_response['data']['session']['content_uri'],
+            'format_id': format_id,
+            'ext': 'mp4',  # Session API are used in HTML5, which always serves mp4
+            'abr': float_or_none(audio_quality.get('bitrate'), 1000),
+            'vbr': float_or_none(video_quality.get('bitrate'), 1000),
+            'height': resolution.get('height'),
+            'width': resolution.get('width'),
+        }
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -130,30 +258,84 @@ class NiconicoIE(InfoExtractor):
         if video_id.startswith('so'):
             video_id = self._match_id(handle.geturl())
 
-        video_info = self._download_xml(
-            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
-            note='Downloading video info page')
-
-        # Get flv info
-        flv_info_webpage = self._download_webpage(
-            'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
-            video_id, 'Downloading flv info')
-
-        flv_info = compat_urlparse.parse_qs(flv_info_webpage)
-        if 'url' not in flv_info:
-            if 'deleted' in flv_info:
-                raise ExtractorError('The video has been deleted.',
-                                     expected=True)
-            elif 'closed' in flv_info:
-                raise ExtractorError('Niconico videos now require logging in',
-                                     expected=True)
-            else:
-                raise ExtractorError('Unable to find video URL')
-
-        video_real_url = flv_info['url'][0]
+        api_data = self._parse_json(self._html_search_regex(
+            'data-api-data="([^"]+)"', webpage,
+            'API data', default='{}'), video_id)
+
+        def _format_id_from_url(video_url):
+            return 'economy' if video_real_url.endswith('low') else 'normal'
+
+        try:
+            video_real_url = api_data['video']['smileInfo']['url']
+        except KeyError:  # Flash videos
+            # Get flv info
+            flv_info_webpage = self._download_webpage(
+                'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
+                video_id, 'Downloading flv info')
+
+            flv_info = compat_urlparse.parse_qs(flv_info_webpage)
+            if 'url' not in flv_info:
+                if 'deleted' in flv_info:
+                    raise ExtractorError('The video has been deleted.',
+                                         expected=True)
+                elif 'closed' in flv_info:
+                    raise ExtractorError('Niconico videos now require logging in',
+                                         expected=True)
+                elif 'error' in flv_info:
+                    raise ExtractorError('%s reports error: %s' % (
+                        self.IE_NAME, flv_info['error'][0]), expected=True)
+                else:
+                    raise ExtractorError('Unable to find video URL')
+
+            video_info_xml = self._download_xml(
+                'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
+                video_id, note='Downloading video info page')
+
+            def get_video_info(items):
+                if not isinstance(items, list):
+                    items = [items]
+                for item in items:
+                    ret = xpath_text(video_info_xml, './/' + item)
+                    if ret:
+                        return ret
+
+            video_real_url = flv_info['url'][0]
+
+            extension = get_video_info('movie_type')
+            if not extension:
+                extension = determine_ext(video_real_url)
+
+            formats = [{
+                'url': video_real_url,
+                'ext': extension,
+                'format_id': _format_id_from_url(video_real_url),
+            }]
+        else:
+            formats = []
+
+            dmc_info = api_data['video'].get('dmcInfo')
+            if dmc_info:  # "New" HTML5 videos
+                quality_info = dmc_info['quality']
+                for audio_quality in quality_info['audios']:
+                    for video_quality in quality_info['videos']:
+                        if not audio_quality['available'] or not video_quality['available']:
+                            continue
+                        formats.append(self._extract_format_for_quality(
+                            api_data, video_id, audio_quality, video_quality))
+
+                self._sort_formats(formats)
+            else:  # "Old" HTML5 videos
+                formats = [{
+                    'url': video_real_url,
+                    'ext': 'mp4',
+                    'format_id': _format_id_from_url(video_real_url),
+                }]
+
+            def get_video_info(items):
+                return dict_get(api_data['video'], items)
 
         # Start extracting information
-        title = xpath_text(video_info, './/title')
+        title = get_video_info('title')
         if not title:
             title = self._og_search_title(webpage, default=None)
         if not title:
@@ -167,18 +349,15 @@ class NiconicoIE(InfoExtractor):
         watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
         video_detail = watch_api_data.get('videoDetail', {})
 
-        extension = xpath_text(video_info, './/movie_type')
-        if not extension:
-            extension = determine_ext(video_real_url)
-
         thumbnail = (
-            xpath_text(video_info, './/thumbnail_url') or
+            get_video_info(['thumbnail_url', 'thumbnailURL']) or
             self._html_search_meta('image', webpage, 'thumbnail', default=None) or
             video_detail.get('thumbnail'))
 
-        description = xpath_text(video_info, './/description')
+        description = get_video_info('description')
 
-        timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve'))
+        timestamp = (parse_iso8601(get_video_info('first_retrieve')) or
+                     unified_timestamp(get_video_info('postedDateTime')))
         if not timestamp:
             match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
             if match:
@@ -188,7 +367,7 @@ class NiconicoIE(InfoExtractor):
                 video_detail['postedAt'].replace('/', '-'),
                 delimiter=' ', timezone=datetime.timedelta(hours=9))
 
-        view_count = int_or_none(xpath_text(video_info, './/view_counter'))
+        view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
         if not view_count:
             match = self._html_search_regex(
                 r'>Views: <strong[^>]*>([^<]+)</strong>',
@@ -197,38 +376,33 @@ class NiconicoIE(InfoExtractor):
                 view_count = int_or_none(match.replace(',', ''))
         view_count = view_count or video_detail.get('viewCount')
 
-        comment_count = int_or_none(xpath_text(video_info, './/comment_num'))
+        comment_count = (int_or_none(get_video_info('comment_num')) or
+                         video_detail.get('commentCount') or
+                         try_get(api_data, lambda x: x['thread']['commentCount']))
         if not comment_count:
             match = self._html_search_regex(
                 r'>Comments: <strong[^>]*>([^<]+)</strong>',
                 webpage, 'comment count', default=None)
             if match:
                 comment_count = int_or_none(match.replace(',', ''))
-        comment_count = comment_count or video_detail.get('commentCount')
 
         duration = (parse_duration(
-            xpath_text(video_info, './/length') or
+            get_video_info('length') or
             self._html_search_meta(
                 'video:duration', webpage, 'video duration', default=None)) or
-            video_detail.get('length'))
+            video_detail.get('length') or
+            get_video_info('duration'))
 
-        webpage_url = xpath_text(video_info, './/watch_url') or url
+        webpage_url = get_video_info('watch_url') or url
 
-        if video_info.find('.//ch_id') is not None:
-            uploader_id = video_info.find('.//ch_id').text
-            uploader = video_info.find('.//ch_name').text
-        elif video_info.find('.//user_id') is not None:
-            uploader_id = video_info.find('.//user_id').text
-            uploader = video_info.find('.//user_nickname').text
-        else:
-            uploader_id = uploader = None
+        owner = api_data.get('owner', {})
+        uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
+        uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
 
         return {
             'id': video_id,
-            'url': video_real_url,
             'title': title,
-            'ext': extension,
-            'format_id': 'economy' if video_real_url.endswith('low') else 'normal',
+            'formats': formats,
             'thumbnail': thumbnail,
             'description': description,
             'uploader': uploader,
index 516b1e94147abc76f4cde13652ac2ef9153e6046..fa4ef20c52959240af41a0c8a7b08c02fd3eb54c 100644 (file)
@@ -28,7 +28,7 @@ class NPOBaseIE(InfoExtractor):
 
 class NPOIE(NPOBaseIE):
     IE_NAME = 'npo'
-    IE_DESC = 'npo.nl and ntr.nl'
+    IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
     _VALID_URL = r'''(?x)
                     (?:
                         npo:|
@@ -38,7 +38,7 @@ class NPOIE(NPOBaseIE):
                                 npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
                                 ntr\.nl/(?:[^/]+/){2,}|
                                 omroepwnl\.nl/video/fragment/[^/]+__|
-                                zapp\.nl/[^/]+/[^/]+/
+                                (?:zapp|npo3)\.nl/(?:[^/]+/){2}
                             )
                         )
                         (?P<id>[^/?#]+)
@@ -146,6 +146,9 @@ class NPOIE(NPOBaseIE):
     }, {
         'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
         'only_matching': True,
+    }, {
+        'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
+        'only_matching': True,
     }, {
         # live stream
         'url': 'npo:LI_NL1_4188102',
index 3b4f51f61b2f44c2ccb79eb3f9d4f6c8a1166b14..18ead94260650e9312cc8adab491423485a59494 100644 (file)
@@ -237,7 +237,7 @@ class NRKTVIE(NRKBaseIE):
                             (?:/\d{2}-\d{2}-\d{4})?
                             (?:\#del=(?P<part_id>\d+))?
                     ''' % _EPISODE_RE
-    _API_HOST = 'psapi-we.nrk.no'
+    _API_HOST = 'psapi-ne.nrk.no'
 
     _TESTS = [{
         'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
index 16cc667d025514f64a66852d7e4ad0e8952297b8..8889e4a1aaa3e41f49a63b53c010cf69d0842b1b 100644 (file)
@@ -189,7 +189,7 @@ class PBSIE(InfoExtractor):
            # Direct video URL
            (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
            # Article with embedded player (or direct video)
-           (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
+           (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
            # Player
            (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
         )
@@ -345,6 +345,21 @@ class PBSIE(InfoExtractor):
                 'formats': 'mincount:8',
             },
         },
+        {
+            # https://github.com/rg3/youtube-dl/issues/13801
+            'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
+            'info_dict': {
+                'id': '3003333873',
+                'ext': 'mp4',
+                'title': 'PBS NewsHour - full episode July 31, 2017',
+                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+                'duration': 3265,
+                'thumbnail': r're:^https?://.*\.jpg$',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         {
             'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
             'only_matching': True,
@@ -433,6 +448,9 @@ class PBSIE(InfoExtractor):
                 if url:
                     break
 
+            if not url:
+                url = self._og_search_url(webpage)
+
             mobj = re.match(self._VALID_URL, url)
 
         player_id = mobj.group('player_id')
index bfa12edc925d148ffecad3cdac4ff22ae321d70e..e5e08538c3dcde0797cd3805dad67d247f66b80f 100644 (file)
@@ -80,18 +80,24 @@ class PeriscopeIE(PeriscopeBaseIE):
         stream = self._call_api(
             'getAccessPublic', {'broadcast_id': token}, token)
 
+        video_urls = set()
         formats = []
-        for format_id in ('replay', 'rtmp', 'hls', 'https_hls'):
+        for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
             video_url = stream.get(format_id + '_url')
-            if not video_url:
+            if not video_url or video_url in video_urls:
                 continue
-            f = {
+            video_urls.add(video_url)
+            if format_id != 'rtmp':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, token, 'mp4',
+                    entry_protocol='m3u8_native'
+                    if state in ('ended', 'timed_out') else 'm3u8',
+                    m3u8_id=format_id, fatal=False))
+                continue
+            formats.append({
                 'url': video_url,
                 'ext': 'flv' if format_id == 'rtmp' else 'mp4',
-            }
-            if format_id != 'rtmp':
-                f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8'
-            formats.append(f)
+            })
         self._sort_formats(formats)
 
         return {
index e45d9fe552e2c5af4c3f35182aa8063dcca9e390..f6a9131b19bf693511a84cfa3118d89030053c6d 100644 (file)
@@ -18,6 +18,7 @@ from ..utils import (
     parse_duration,
     qualities,
     srt_subtitles_timecode,
+    try_get,
     update_url_query,
     urlencode_postdata,
 )
@@ -26,6 +27,39 @@ from ..utils import (
 class PluralsightBaseIE(InfoExtractor):
     _API_BASE = 'https://app.pluralsight.com'
 
+    def _download_course(self, course_id, url, display_id):
+        try:
+            return self._download_course_rpc(course_id, url, display_id)
+        except ExtractorError:
+            # Old API fallback
+            return self._download_json(
+                'https://app.pluralsight.com/player/user/api/v1/player/payload',
+                display_id, data=urlencode_postdata({'courseId': course_id}),
+                headers={'Referer': url})
+
+    def _download_course_rpc(self, course_id, url, display_id):
+        response = self._download_json(
+            '%s/player/functions/rpc' % self._API_BASE, display_id,
+            'Downloading course JSON',
+            data=json.dumps({
+                'fn': 'bootstrapPlayer',
+                'payload': {
+                    'courseId': course_id,
+                },
+            }).encode('utf-8'),
+            headers={
+                'Content-Type': 'application/json;charset=utf-8',
+                'Referer': url,
+            })
+
+        course = try_get(response, lambda x: x['payload']['course'], dict)
+        if course:
+            return course
+
+        raise ExtractorError(
+            '%s said: %s' % (self.IE_NAME, response['error']['message']),
+            expected=True)
+
 
 class PluralsightIE(PluralsightBaseIE):
     IE_NAME = 'pluralsight'
@@ -162,10 +196,7 @@ class PluralsightIE(PluralsightBaseIE):
 
         display_id = '%s-%s' % (name, clip_id)
 
-        course = self._download_json(
-            'https://app.pluralsight.com/player/user/api/v1/player/payload',
-            display_id, data=urlencode_postdata({'courseId': course_name}),
-            headers={'Referer': url})
+        course = self._download_course(course_name, url, display_id)
 
         collection = course['modules']
 
@@ -224,6 +255,7 @@ class PluralsightIE(PluralsightBaseIE):
                 req_format_split = req_format.split('-', 1)
                 if len(req_format_split) > 1:
                     req_ext, req_quality = req_format_split
+                    req_quality = '-'.join(req_quality.split('-')[:2])
                     for allowed_quality in ALLOWED_QUALITIES:
                         if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
                             return (AllowedQuality(req_ext, (req_quality, )), )
@@ -330,18 +362,7 @@ class PluralsightCourseIE(PluralsightBaseIE):
 
         # TODO: PSM cookie
 
-        course = self._download_json(
-            '%s/player/functions/rpc' % self._API_BASE, course_id,
-            'Downloading course JSON',
-            data=json.dumps({
-                'fn': 'bootstrapPlayer',
-                'payload': {
-                    'courseId': course_id,
-                }
-            }).encode('utf-8'),
-            headers={
-                'Content-Type': 'application/json;charset=utf-8'
-            })['payload']['course']
+        course = self._download_course(course_id, url, course_id)
 
         title = course['title']
         course_name = course['name']
index f20946a2bd0616d8d90cebf360570a3f0bc40e94..25fcebf9fa6a06a6bf955468ddf0740b648593a5 100644 (file)
@@ -9,39 +9,46 @@ from ..utils import int_or_none
 
 class PodomaticIE(InfoExtractor):
     IE_NAME = 'podomatic'
-    _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
+    _VALID_URL = r'''(?x)
+                    (?P<proto>https?)://
+                        (?:
+                            (?P<channel>[^.]+)\.podomatic\.com/entry|
+                            (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes
+                        )/
+                        (?P<id>[^/?#&]+)
+                '''
 
-    _TESTS = [
-        {
-            'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
-            'md5': '84bb855fcf3429e6bf72460e1eed782d',
-            'info_dict': {
-                'id': '2009-01-02T16_03_35-08_00',
-                'ext': 'mp3',
-                'uploader': 'Science Teaching Tips',
-                'uploader_id': 'scienceteachingtips',
-                'title': '64.  When the Moon Hits Your Eye',
-                'duration': 446,
-            }
-        },
-        {
-            'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
-            'md5': 'd2cf443931b6148e27638650e2638297',
-            'info_dict': {
-                'id': '2013-11-15T16_31_21-08_00',
-                'ext': 'mp3',
-                'uploader': 'Ostbahnhof / Techno Mix',
-                'uploader_id': 'ostbahnhof',
-                'title': 'Einunddreizig',
-                'duration': 3799,
-            }
-        },
-    ]
+    _TESTS = [{
+        'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
+        'md5': '84bb855fcf3429e6bf72460e1eed782d',
+        'info_dict': {
+            'id': '2009-01-02T16_03_35-08_00',
+            'ext': 'mp3',
+            'uploader': 'Science Teaching Tips',
+            'uploader_id': 'scienceteachingtips',
+            'title': '64.  When the Moon Hits Your Eye',
+            'duration': 446,
+        }
+    }, {
+        'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
+        'md5': 'd2cf443931b6148e27638650e2638297',
+        'info_dict': {
+            'id': '2013-11-15T16_31_21-08_00',
+            'ext': 'mp3',
+            'uploader': 'Ostbahnhof / Techno Mix',
+            'uploader_id': 'ostbahnhof',
+            'title': 'Einunddreizig',
+            'duration': 3799,
+        }
+    }, {
+        'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        channel = mobj.group('channel')
+        channel = mobj.group('channel') or mobj.group('channel_2')
 
         json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
                      '?permalink=true&rtmp=0') %
index 842317e6c9cc2312064fae4e61e5703352aa0096..36761788dd9ca0c3f1a0a0e15493b2a812642357 100644 (file)
@@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
              r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
 
         sources = self._parse_json(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
+            r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
             webpage, 'sources', default='{}')), video_id)
 
         if not sources:
index e032817f2c18634561dda71f786782373c0b38f8..3428458afa987fb3eb8c3be061742cefce6e2734 100644 (file)
@@ -186,7 +186,7 @@ class PornHubIE(InfoExtractor):
             title, thumbnail, duration = [None] * 3
 
         video_uploader = self._html_search_regex(
-            r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
+            r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
             webpage, 'uploader', fatal=False)
 
         view_count = self._extract_count(
@@ -227,13 +227,20 @@ class PornHubIE(InfoExtractor):
 
 class PornHubPlaylistBaseIE(InfoExtractor):
     def _extract_entries(self, webpage):
+        # Only process container div with main playlist content skipping
+        # drop-down menu that uses similar pattern for videos (see
+        # https://github.com/rg3/youtube-dl/issues/11594).
+        container = self._search_regex(
+            r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+            'container', default=webpage)
+
         return [
             self.url_result(
                 'http://www.pornhub.com/%s' % video_url,
                 PornHubIE.ie_key(), video_title=title)
             for video_url, title in orderedSet(re.findall(
                 r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
-                webpage))
+                container))
         ]
 
     def _real_extract(self, url):
@@ -241,14 +248,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
 
         webpage = self._download_webpage(url, playlist_id)
 
-        # Only process container div with main playlist content skipping
-        # drop-down menu that uses similar pattern for videos (see
-        # https://github.com/rg3/youtube-dl/issues/11594).
-        container = self._search_regex(
-            r'(?s)(<div[^>]+class=["\']container.+)', webpage,
-            'container', default=webpage)
-
-        entries = self._extract_entries(container)
+        entries = self._extract_entries(webpage)
 
         playlist = self._parse_json(
             self._search_regex(
index 17c27da46da7576205afba3c53254728f711d974..084308aeb8922a1632eafe5a8395381710952c60 100644 (file)
@@ -2,38 +2,37 @@
 from __future__ import unicode_literals
 
 import random
-import time
 import re
+import time
 
 from .common import InfoExtractor
 from ..utils import (
-    sanitized_Request,
-    strip_jsonp,
-    unescapeHTML,
     clean_html,
     ExtractorError,
+    strip_jsonp,
+    unescapeHTML,
 )
 
 
 class QQMusicIE(InfoExtractor):
     IE_NAME = 'qqmusic'
     IE_DESC = 'QQ音乐'
-    _VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
     _TESTS = [{
-        'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
-        'md5': '9ce1c1c8445f561506d2e3cfb0255705',
+        'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
+        'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
         'info_dict': {
             'id': '004295Et37taLD',
             'ext': 'mp3',
             'title': '可惜没如果',
             'release_date': '20141227',
             'creator': '林俊杰',
-            'description': 'md5:d327722d0361576fde558f1ac68a7065',
+            'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
             'thumbnail': r're:^https?://.*\.jpg$',
         }
     }, {
         'note': 'There is no mp3-320 version of this song.',
-        'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV',
+        'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
         'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
         'info_dict': {
             'id': '004MsGEo3DdNxV',
@@ -46,14 +45,14 @@ class QQMusicIE(InfoExtractor):
         }
     }, {
         'note': 'lyrics not in .lrc format',
-        'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6',
+        'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
         'info_dict': {
             'id': '001JyApY11tIp6',
             'ext': 'mp3',
             'title': 'Shadows Over Transylvania',
             'release_date': '19970225',
             'creator': 'Dark Funeral',
-            'description': 'md5:ed14d5bd7ecec19609108052c25b2c11',
+            'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
             'thumbnail': r're:^https?://.*\.jpg$',
         },
         'params': {
@@ -105,7 +104,7 @@ class QQMusicIE(InfoExtractor):
             [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
             detail_info_page, 'album mid', default=None)
         if albummid:
-            thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \
+            thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \
                             % (albummid[-2:-1], albummid[-1], albummid)
 
         guid = self.m_r_get_ruin()
@@ -156,15 +155,39 @@ class QQPlaylistBaseIE(InfoExtractor):
     def qq_static_url(category, mid):
         return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
 
-    @classmethod
-    def get_entries_from_page(cls, page):
+    def get_singer_all_songs(self, singmid, num):
+        return self._download_webpage(
+            r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
+            query={
+                'format': 'json',
+                'inCharset': 'utf8',
+                'outCharset': 'utf-8',
+                'platform': 'yqq',
+                'needNewCode': 0,
+                'singermid': singmid,
+                'order': 'listen',
+                'begin': 0,
+                'num': num,
+                'songstatus': 1,
+            })
+
+    def get_entries_from_page(self, singmid):
         entries = []
 
-        for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
-            song_mid = unescapeHTML(item).split('|')[-5]
-            entries.append(cls.url_result(
-                'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
-                song_mid))
+        default_num = 1
+        json_text = self.get_singer_all_songs(singmid, default_num)
+        json_obj_all_songs = self._parse_json(json_text, singmid)
+
+        if json_obj_all_songs['code'] == 0:
+            total = json_obj_all_songs['data']['total']
+            json_text = self.get_singer_all_songs(singmid, total)
+            json_obj_all_songs = self._parse_json(json_text, singmid)
+
+        for item in json_obj_all_songs['data']['list']:
+            if item['musicData'].get('songmid') is not None:
+                songmid = item['musicData']['songmid']
+                entries.append(self.url_result(
+                    r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid))
 
         return entries
 
@@ -172,42 +195,32 @@ class QQPlaylistBaseIE(InfoExtractor):
 class QQMusicSingerIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:singer'
     IE_DESC = 'QQ音乐 - 歌手'
-    _VALID_URL = r'https?://y\.qq\.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
     _TEST = {
-        'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
+        'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
         'info_dict': {
             'id': '001BLpXF2DyJe2',
             'title': '林俊杰',
             'description': 'md5:870ec08f7d8547c29c93010899103751',
         },
-        'playlist_count': 12,
+        'playlist_mincount': 12,
     }
 
     def _real_extract(self, url):
         mid = self._match_id(url)
 
-        singer_page = self._download_webpage(
-            self.qq_static_url('singer', mid), mid, 'Download singer page')
-
-        entries = self.get_entries_from_page(singer_page)
-
+        entries = self.get_entries_from_page(mid)
+        singer_page = self._download_webpage(url, mid, 'Download singer page')
         singer_name = self._html_search_regex(
-            r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
-            default=None)
-
-        singer_id = self._html_search_regex(
-            r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
-            default=None)
-
+            r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
         singer_desc = None
 
-        if singer_id:
-            req = sanitized_Request(
-                'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
-            req.add_header(
-                'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
+        if mid:
             singer_desc_page = self._download_xml(
-                req, mid, 'Donwload singer description XML')
+                'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
+                'Donwload singer description XML',
+                query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
+                headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
 
             singer_desc = singer_desc_page.find('./data/info/desc').text
 
@@ -217,10 +230,10 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
 class QQMusicAlbumIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:album'
     IE_DESC = 'QQ音乐 - 专辑'
-    _VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
 
     _TESTS = [{
-        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
+        'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
         'info_dict': {
             'id': '000gXCTb2AhRR1',
             'title': '我们都是这样长大的',
@@ -228,7 +241,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
         },
         'playlist_count': 4,
     }, {
-        'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3',
+        'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
         'info_dict': {
             'id': '002Y5a3b3AlCu3',
             'title': '그리고...',
@@ -246,7 +259,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
 
         entries = [
             self.url_result(
-                'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
+                'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']
             ) for song in album['list']
         ]
         album_name = album.get('name')
@@ -260,31 +273,30 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
 class QQMusicToplistIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:toplist'
     IE_DESC = 'QQ音乐 - 排行榜'
-    _VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
+    _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
 
     _TESTS = [{
-        'url': 'http://y.qq.com/#type=toplist&p=global_123',
+        'url': 'https://y.qq.com/n/yqq/toplist/123.html',
         'info_dict': {
-            'id': 'global_123',
+            'id': '123',
             'title': '美国iTunes榜',
+            'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
         },
-        'playlist_count': 10,
+        'playlist_count': 100,
     }, {
-        'url': 'http://y.qq.com/#type=toplist&p=top_3',
+        'url': 'https://y.qq.com/n/yqq/toplist/3.html',
         'info_dict': {
-            'id': 'top_3',
+            'id': '3',
             'title': '巅峰榜·欧美',
-            'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统'
-                           '计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据'
-                           '歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:'
-                           '登录用户完整播放一首歌曲,记为一次有效播放;同一用户收听同一首歌曲,每天记录为1次有效播放'
+            'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
         },
         'playlist_count': 100,
     }, {
-        'url': 'http://y.qq.com/#type=toplist&p=global_106',
+        'url': 'https://y.qq.com/n/yqq/toplist/106.html',
         'info_dict': {
-            'id': 'global_106',
+            'id': '106',
             'title': '韩国Mnet榜',
+            'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
         },
         'playlist_count': 50,
     }]
@@ -292,18 +304,15 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
     def _real_extract(self, url):
         list_id = self._match_id(url)
 
-        list_type, num_id = list_id.split("_")
-
         toplist_json = self._download_json(
-            'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json'
-            % (list_type, num_id),
-            list_id, 'Download toplist page')
+            'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id,
+            note='Download toplist page',
+            query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
 
-        entries = [
-            self.url_result(
-                'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid']
-            ) for song in toplist_json['songlist']
-        ]
+        entries = [self.url_result(
+            'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
+            song['data']['songmid'])
+            for song in toplist_json['songlist']]
 
         topinfo = toplist_json.get('topinfo', {})
         list_name = topinfo.get('ListName')
@@ -314,10 +323,10 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
 class QQMusicPlaylistIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:playlist'
     IE_DESC = 'QQ音乐 - 歌单'
-    _VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
 
     _TESTS = [{
-        'url': 'http://y.qq.com/#type=taoge&id=3462654915',
+        'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
         'info_dict': {
             'id': '3462654915',
             'title': '韩国5月新歌精选下旬',
@@ -326,7 +335,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
         'playlist_count': 40,
         'skip': 'playlist gone',
     }, {
-        'url': 'http://y.qq.com/#type=taoge&id=1374105607',
+        'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
         'info_dict': {
             'id': '1374105607',
             'title': '易入人心的华语民谣',
@@ -339,8 +348,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
         list_id = self._match_id(url)
 
         list_json = self._download_json(
-            'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
-            % list_id, list_id, 'Download list page',
+            'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
+            list_id, 'Download list page',
+            query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
             transform_source=strip_jsonp)
         if not len(list_json.get('cdlist', [])):
             if list_json.get('code'):
@@ -350,11 +360,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
             raise ExtractorError('Unable to get playlist info')
 
         cdlist = list_json['cdlist'][0]
-        entries = [
-            self.url_result(
-                'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
-            ) for song in cdlist['songlist']
-        ]
+        entries = [self.url_result(
+            'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
+            for song in cdlist['songlist']]
 
         list_name = cdlist.get('dissname')
         list_description = clean_html(unescapeHTML(cdlist.get('desc')))
diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py
new file mode 100644 (file)
index 0000000..01c85ee
--- /dev/null
@@ -0,0 +1,114 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    float_or_none,
+)
+
+
+class RedditIE(InfoExtractor):
+    _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
+    _TEST = {
+        # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
+        'url': 'https://v.redd.it/zv89llsvexdz',
+        'md5': '655d06ace653ea3b87bccfb1b27ec99d',
+        'info_dict': {
+            'id': 'zv89llsvexdz',
+            'ext': 'mp4',
+            'title': 'zv89llsvexdz',
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        formats = self._extract_m3u8_formats(
+            'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
+            'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+
+        formats.extend(self._extract_mpd_formats(
+            'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
+            mpd_id='dash', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'formats': formats,
+        }
+
+
+class RedditRIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
+    _TESTS = [{
+        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
+        'info_dict': {
+            'id': 'zv89llsvexdz',
+            'ext': 'mp4',
+            'title': 'That small heart attack.',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': 1501941939,
+            'upload_date': '20170805',
+            'uploader': 'Antw87',
+            'like_count': int,
+            'dislike_count': int,
+            'comment_count': int,
+            'age_limit': 0,
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
+        'only_matching': True,
+    }, {
+        # imgur
+        'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
+        'only_matching': True,
+    }, {
+        # streamable
+        'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
+        'only_matching': True,
+    }, {
+        # youtube
+        'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        data = self._download_json(
+            url + '.json', video_id)[0]['data']['children'][0]['data']
+
+        video_url = data['url']
+
+        # Avoid recursing into the same reddit URL
+        if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
+            raise ExtractorError('No media found', expected=True)
+
+        over_18 = data.get('over_18')
+        if over_18 is True:
+            age_limit = 18
+        elif over_18 is False:
+            age_limit = 0
+        else:
+            age_limit = None
+
+        return {
+            '_type': 'url_transparent',
+            'url': video_url,
+            'title': data.get('title'),
+            'thumbnail': data.get('thumbnail'),
+            'timestamp': float_or_none(data.get('created_utc')),
+            'uploader': data.get('author'),
+            'like_count': int_or_none(data.get('ups')),
+            'dislike_count': int_or_none(data.get('downs')),
+            'comment_count': int_or_none(data.get('num_comments')),
+            'age_limit': age_limit,
+        }
index 3f1a46bb2c2ce9a5e03bd0222e897fbc9248e171..2e52e092b0d1193c0f55a718262b1d308408888b 100644 (file)
@@ -31,6 +31,7 @@ class SoundcloudIE(InfoExtractor):
 
     _VALID_URL = r'''(?x)^(?:https?://)?
                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
+                            (?!stations/track)
                             (?P<uploader>[\w\d-]+)/
                             (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
                             (?P<title>[\w\d-]+)/?
@@ -121,7 +122,7 @@ class SoundcloudIE(InfoExtractor):
         },
     ]
 
-    _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z'
+    _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
 
     @staticmethod
@@ -330,7 +331,63 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
         }
 
 
-class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
+class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
+    _API_BASE = 'https://api.soundcloud.com'
+    _API_V2_BASE = 'https://api-v2.soundcloud.com'
+
+    def _extract_playlist(self, base_url, playlist_id, playlist_title):
+        COMMON_QUERY = {
+            'limit': 50,
+            'client_id': self._CLIENT_ID,
+            'linked_partitioning': '1',
+        }
+
+        query = COMMON_QUERY.copy()
+        query['offset'] = 0
+
+        next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
+
+        entries = []
+        for i in itertools.count():
+            response = self._download_json(
+                next_href, playlist_id, 'Downloading track page %s' % (i + 1))
+
+            collection = response['collection']
+            if not collection:
+                break
+
+            def resolve_permalink_url(candidates):
+                for cand in candidates:
+                    if isinstance(cand, dict):
+                        permalink_url = cand.get('permalink_url')
+                        entry_id = self._extract_id(cand)
+                        if permalink_url and permalink_url.startswith('http'):
+                            return permalink_url, entry_id
+
+            for e in collection:
+                permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
+                if permalink_url:
+                    entries.append(self.url_result(permalink_url, video_id=entry_id))
+
+            next_href = response.get('next_href')
+            if not next_href:
+                break
+
+            parsed_next_href = compat_urlparse.urlparse(response['next_href'])
+            qs = compat_urlparse.parse_qs(parsed_next_href.query)
+            qs.update(COMMON_QUERY)
+            next_href = compat_urlparse.urlunparse(
+                parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': playlist_title,
+            'entries': entries,
+        }
+
+
+class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
     _VALID_URL = r'''(?x)
                         https?://
                             (?:(?:www|m)\.)?soundcloud\.com/
@@ -385,16 +442,13 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
         'playlist_mincount': 1,
     }]
 
-    _API_BASE = 'https://api.soundcloud.com'
-    _API_V2_BASE = 'https://api-v2.soundcloud.com'
-
     _BASE_URL_MAP = {
-        'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
-        'tracks': '%s/users/%%s/tracks' % _API_BASE,
-        'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
-        'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
-        'likes': '%s/users/%%s/likes' % _API_V2_BASE,
-        'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
+        'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+        'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE,
+        'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+        'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+        'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+        'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
     }
 
     _TITLE_MAP = {
@@ -416,57 +470,36 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
             resolv_url, uploader, 'Downloading user info')
 
         resource = mobj.group('rsrc') or 'all'
-        base_url = self._BASE_URL_MAP[resource] % user['id']
 
-        COMMON_QUERY = {
-            'limit': 50,
-            'client_id': self._CLIENT_ID,
-            'linked_partitioning': '1',
-        }
+        return self._extract_playlist(
+            self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
+            '%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
 
-        query = COMMON_QUERY.copy()
-        query['offset'] = 0
 
-        next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
-
-        entries = []
-        for i in itertools.count():
-            response = self._download_json(
-                next_href, uploader, 'Downloading track page %s' % (i + 1))
-
-            collection = response['collection']
-            if not collection:
-                break
-
-            def resolve_permalink_url(candidates):
-                for cand in candidates:
-                    if isinstance(cand, dict):
-                        permalink_url = cand.get('permalink_url')
-                        entry_id = self._extract_id(cand)
-                        if permalink_url and permalink_url.startswith('http'):
-                            return permalink_url, entry_id
+class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
+    IE_NAME = 'soundcloud:trackstation'
+    _TESTS = [{
+        'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
+        'info_dict': {
+            'id': '286017854',
+            'title': 'Track station: your-text',
+        },
+        'playlist_mincount': 47,
+    }]
 
-            for e in collection:
-                permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
-                if permalink_url:
-                    entries.append(self.url_result(permalink_url, video_id=entry_id))
+    def _real_extract(self, url):
+        track_name = self._match_id(url)
 
-            next_href = response.get('next_href')
-            if not next_href:
-                break
+        webpage = self._download_webpage(url, track_name)
 
-            parsed_next_href = compat_urlparse.urlparse(response['next_href'])
-            qs = compat_urlparse.parse_qs(parsed_next_href.query)
-            qs.update(COMMON_QUERY)
-            next_href = compat_urlparse.urlunparse(
-                parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+        track_id = self._search_regex(
+            r'soundcloud:track-stations:(\d+)', webpage, 'track id')
 
-        return {
-            '_type': 'playlist',
-            'id': compat_str(user['id']),
-            'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
-            'entries': entries,
-        }
+        return self._extract_playlist(
+            '%s/stations/soundcloud:track-stations:%s/tracks'
+            % (self._API_V2_BASE, track_id),
+            track_id, 'Track station: %s' % track_name)
 
 
 class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
index 8598377b0148a447fb5ef31ca9c1f881b8d9a347..84298fee4279bb1484f5e52edbfa03f0bc148b01 100644 (file)
@@ -122,6 +122,26 @@ class SpiegelArticleIE(InfoExtractor):
 
         },
         'playlist_count': 6,
+    }, {
+        # Nexx iFrame embed
+        'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
+        'info_dict': {
+            'id': '161464',
+            'ext': 'mp4',
+            'title': 'Nervenkitzel Achterbahn',
+            'alt_title': 'Karussellbauer in Deutschland',
+            'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
+            'release_year': 2005,
+            'creator': 'SPIEGEL TV',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 2761,
+            'timestamp': 1394021479,
+            'upload_date': '20140305',
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
index e7bd5bf91921752757e1bc9beb390798b86a9c8a..54497c880ec2cc9cbfc5ff20756cbc19d4ee6c65 100644 (file)
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    js_to_json,
+)
 
 
 class SportBoxEmbedIE(InfoExtractor):
@@ -14,8 +18,10 @@ class SportBoxEmbedIE(InfoExtractor):
         'info_dict': {
             'id': '211355',
             'ext': 'mp4',
-            'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
+            'title': '211355',
             'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 292,
+            'view_count': int,
         },
         'params': {
             # m3u8 download
@@ -24,6 +30,9 @@ class SportBoxEmbedIE(InfoExtractor):
     }, {
         'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
         'only_matching': True,
+    }, {
+        'url': 'https://news.sportbox.ru/vdl/player/media/193095',
+        'only_matching': True,
     }]
 
     @staticmethod
@@ -37,36 +46,34 @@ class SportBoxEmbedIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        formats = []
-
-        def cleanup_js(code):
-            # desktop_advert_config contains complex Javascripts and we don't need it
-            return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
-
-        jwplayer_data = self._parse_json(self._search_regex(
-            r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
-            transform_source=cleanup_js)
-
-        hls_url = jwplayer_data.get('hls_url')
-        if hls_url:
-            formats.extend(self._extract_m3u8_formats(
-                hls_url, video_id, ext='mp4', m3u8_id='hls'))
-
-        rtsp_url = jwplayer_data.get('rtsp_url')
-        if rtsp_url:
-            formats.append({
-                'url': rtsp_url,
-                'format_id': 'rtsp',
-            })
+        wjplayer_data = self._parse_json(
+            self._search_regex(
+                r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'),
+            video_id, transform_source=js_to_json)
 
+        formats = []
+        for source in wjplayer_data['sources']:
+            src = source.get('src')
+            if not src:
+                continue
+            if determine_ext(src) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.append({
+                    'url': src,
+                })
         self._sort_formats(formats)
 
-        title = jwplayer_data['node_title']
-        thumbnail = jwplayer_data.get('image_url')
+        view_count = int_or_none(self._search_regex(
+            r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
 
         return {
             'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
+            'title': video_id,
+            'thumbnail': wjplayer_data.get('poster'),
+            'duration': int_or_none(wjplayer_data.get('duration')),
+            'view_count': view_count,
             'formats': formats,
         }
index 1b5afb73ee473b23e78c27caba6708ce9ceb348f..48bc4529e6ae8a265a672c066ac59b388ea3a5d5 100644 (file)
@@ -181,7 +181,8 @@ class SVTPlayIE(SVTBaseIE):
 
         if video_id:
             data = self._download_json(
-                'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id)
+                'https://api.svt.se/videoplayer-api/video/%s' % video_id,
+                video_id, headers=self.geo_verification_headers())
             info_dict = self._extract_video(data, video_id)
             if not info_dict.get('title'):
                 info_dict['title'] = re.sub(
index bf93eb8682e1f7424216ebd08438dbb6a3a9b955..e9474533f4dc66072539f75adf7abc7b20723240 100644 (file)
@@ -8,6 +8,9 @@ from ..utils import extract_attributes
 
 
 class TBSIE(TurnerBaseIE):
+    # https://github.com/rg3/youtube-dl/issues/13658
+    _WORKING = False
+
     _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html'
     _TESTS = [{
         'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
@@ -17,7 +20,8 @@ class TBSIE(TurnerBaseIE):
             'ext': 'mp4',
             'title': 'Theatrical Trailer',
             'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
-        }
+        },
+        'skip': 'TBS videos are deleted after a while',
     }, {
         'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
         'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
@@ -26,7 +30,8 @@ class TBSIE(TurnerBaseIE):
             'ext': 'mp4',
             'title': 'You Better Run',
             'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
-        }
+        },
+        'skip': 'TBS videos are deleted after a while',
     }]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py
deleted file mode 100644 (file)
index a8c6ed7..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .jwplatform import JWPlatformIE
-from ..utils import unified_strdate
-
-
-class TeamFourStarIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)'
-    _TEST = {
-        'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/',
-        'info_dict': {
-            'id': '0WdZO31W',
-            'title': 'TFS Abridged Parody Episode 1',
-            'description': 'md5:d60bc389588ebab2ee7ad432bda953ae',
-            'ext': 'mp4',
-            'timestamp': 1394168400,
-            'upload_date': '20080508',
-        },
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        jwplatform_url = JWPlatformIE._extract_url(webpage)
-
-        video_title = self._html_search_regex(
-            r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>',
-            webpage, 'title')
-        video_date = unified_strdate(self._html_search_regex(
-            r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>',
-            webpage, 'date', fatal=False))
-        video_description = self._html_search_regex(
-            r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>',
-            webpage, 'description', fatal=False)
-        video_thumbnail = self._og_search_thumbnail(webpage)
-
-        return {
-            '_type': 'url_transparent',
-            'display_id': display_id,
-            'title': video_title,
-            'description': video_description,
-            'upload_date': video_date,
-            'thumbnail': video_thumbnail,
-            'url': jwplatform_url,
-        }
index 4fd1aa4bfbdaea2ec5abbac2161f6aea25e5fbbd..a42977f397e03ab71f70c585e1f3c34113b78eda 100644 (file)
@@ -50,7 +50,7 @@ class TwentyMinutenIE(InfoExtractor):
     @staticmethod
     def _extract_urls(webpage):
         return [m.group('url') for m in re.finditer(
-            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
             webpage)]
 
     def _real_extract(self, url):
index 160be1b1b913f3b1013ab4d1296239fd110f976a..207c4a6a7ee8131c3e2e5d5823aefb336ad47c47 100644 (file)
@@ -15,6 +15,7 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     int_or_none,
+    js_to_json,
     sanitized_Request,
     unescapeHTML,
     urlencode_postdata,
@@ -73,7 +74,7 @@ class UdemyIE(InfoExtractor):
             return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
 
         checkout_url = unescapeHTML(self._search_regex(
-            r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1',
+            r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
             webpage, 'checkout url', group='url', default=None))
         if checkout_url:
             raise ExtractorError(
@@ -268,6 +269,25 @@ class UdemyIE(InfoExtractor):
                     f = add_output_format_meta(f, format_id)
                 formats.append(f)
 
+        def extract_subtitles(track_list):
+            if not isinstance(track_list, list):
+                return
+            for track in track_list:
+                if not isinstance(track, dict):
+                    continue
+                if track.get('kind') != 'captions':
+                    continue
+                src = track.get('src')
+                if not src or not isinstance(src, compat_str):
+                    continue
+                lang = track.get('language') or track.get(
+                    'srclang') or track.get('label')
+                sub_dict = automatic_captions if track.get(
+                    'autogenerated') is True else subtitles
+                sub_dict.setdefault(lang, []).append({
+                    'url': src,
+                })
+
         download_urls = asset.get('download_urls')
         if isinstance(download_urls, dict):
             extract_formats(download_urls.get('Video'))
@@ -315,23 +335,16 @@ class UdemyIE(InfoExtractor):
                 extract_formats(data.get('sources'))
                 if not duration:
                     duration = int_or_none(data.get('duration'))
-                tracks = data.get('tracks')
-                if isinstance(tracks, list):
-                    for track in tracks:
-                        if not isinstance(track, dict):
-                            continue
-                        if track.get('kind') != 'captions':
-                            continue
-                        src = track.get('src')
-                        if not src or not isinstance(src, compat_str):
-                            continue
-                        lang = track.get('language') or track.get(
-                            'srclang') or track.get('label')
-                        sub_dict = automatic_captions if track.get(
-                            'autogenerated') is True else subtitles
-                        sub_dict.setdefault(lang, []).append({
-                            'url': src,
-                        })
+                extract_subtitles(data.get('tracks'))
+
+            if not subtitles and not automatic_captions:
+                text_tracks = self._parse_json(
+                    self._search_regex(
+                        r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
+                        'text tracks', default='{}', group='data'), video_id,
+                    transform_source=lambda s: js_to_json(unescapeHTML(s)),
+                    fatal=False)
+                extract_subtitles(text_tracks)
 
         self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
 
index 6be3774b7aa7367e9a067b417d7a749fece55d05..570fa45ea7d7e492ac924882f04708d4ecbf5c59 100644 (file)
@@ -121,7 +121,11 @@ class VH1IE(MTVIE):
         idoc = self._download_xml(
             doc_url, video_id,
             'Downloading info', transform_source=fix_xml_ampersands)
-        return self.playlist_result(
-            [self._get_video_info(item) for item in idoc.findall('.//item')],
-            playlist_id=video_id,
-        )
+
+        entries = []
+        for item in idoc.findall('.//item'):
+            info = self._get_video_info(item)
+            if info:
+                entries.append(info)
+
+        return self.playlist_result(entries, playlist_id=video_id)
index 701bb1d01c646b75091b769701566d92853ec365..01da32f1cdd05505a6d53eff4f9bc6691aaae512 100644 (file)
@@ -56,7 +56,8 @@ class VidioIE(InfoExtractor):
         self._sort_formats(formats)
 
         duration = int_or_none(duration or self._search_regex(
-            r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration'))
+            r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage,
+            'duration', fatal=False, group='duration'))
         thumbnail = thumbnail or self._og_search_thumbnail(webpage)
 
         like_count = int_or_none(self._search_regex(
index e9ff336c4f5cb2e5a4b08fe5a97aa9993bdf87e0..a7971d72ee8479c843dde552d88bc0c324db0562 100644 (file)
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
 import itertools
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+    compat_HTTPError,
+    compat_str,
+)
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -161,13 +164,28 @@ class VidmeIE(InfoExtractor):
                 'or for violating the terms of use.',
                 expected=True)
 
-        formats = [{
-            'format_id': f.get('type'),
-            'url': f['uri'],
-            'width': int_or_none(f.get('width')),
-            'height': int_or_none(f.get('height')),
-            'preference': 0 if f.get('type', '').endswith('clip') else 1,
-        } for f in video.get('formats', []) if f.get('uri')]
+        formats = []
+        for f in video.get('formats', []):
+            format_url = f.get('uri')
+            if not format_url or not isinstance(format_url, compat_str):
+                continue
+            format_type = f.get('type')
+            if format_type == 'dash':
+                formats.extend(self._extract_mpd_formats(
+                    format_url, video_id, mpd_id='dash', fatal=False))
+            elif format_type == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.append({
+                    'format_id': f.get('type'),
+                    'url': format_url,
+                    'width': int_or_none(f.get('width')),
+                    'height': int_or_none(f.get('height')),
+                    'preference': 0 if f.get('type', '').endswith(
+                        'clip') else 1,
+                })
 
         if not formats and video.get('complete_url'):
             formats.append({
index 77c120a57d45fda4bf49aedc704413313592e3d5..64d0224e6f92779c68e2170564c2b1ad459269ed 100644 (file)
@@ -236,7 +236,12 @@ class VLiveChannelIE(InfoExtractor):
                 query={
                     'app_id': app_id,
                     'channelSeq': channel_seq,
-                    'maxNumOfRows': 1000,
+                    # Large values of maxNumOfRows (~300 or above) may cause
+                    # empty responses (see [1]), e.g. this happens for [2] that
+                    # has more than 300 videos.
+                    # 1. https://github.com/rg3/youtube-dl/issues/13830
+                    # 2. http://channels.vlive.tv/EDBF.
+                    'maxNumOfRows': 100,
                     '_': int(time.time()),
                     'pageNo': page_num
                 }
diff --git a/youtube_dl/extractor/voot.py b/youtube_dl/extractor/voot.py
new file mode 100644 (file)
index 0000000..5de3deb
--- /dev/null
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    try_get,
+    unified_timestamp,
+)
+
+
+class VootIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?voot\.com/(?:[^/]+/)+(?P<id>\d+)'
+    _GEO_COUNTRIES = ['IN']
+    _TESTS = [{
+        'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
+        'info_dict': {
+            'id': '0_8ledb18o',
+            'ext': 'mp4',
+            'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
+            'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
+            'uploader_id': 'batchUser',
+            'timestamp': 1472162937,
+            'upload_date': '20160825',
+            'duration': 1146,
+            'series': 'Ishq Ka Rang Safed',
+            'season_number': 1,
+            'episode': 'Is this the end of Kamini?',
+            'episode_number': 340,
+            'view_count': int,
+            'like_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'expected_warnings': ['Failed to download m3u8 information'],
+    }, {
+        'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.voot.com/movies/pandavas-5/424627',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        media_info = self._download_json(
+            'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id,
+            query={
+                'platform': 'Web',
+                'pId': 2,
+                'mediaId': video_id,
+            })
+
+        status_code = try_get(media_info, lambda x: x['status']['code'], int)
+        if status_code != 0:
+            raise ExtractorError(media_info['status']['message'], expected=True)
+
+        media = media_info['assets']
+
+        entry_id = media['EntryId']
+        title = media['MediaName']
+
+        description, series, season_number, episode, episode_number = [None] * 5
+
+        for meta in try_get(media, lambda x: x['Metas'], list) or []:
+            key, value = meta.get('Key'), meta.get('Value')
+            if not key or not value:
+                continue
+            if key == 'ContentSynopsis':
+                description = value
+            elif key == 'RefSeriesTitle':
+                series = value
+            elif key == 'RefSeriesSeason':
+                season_number = int_or_none(value)
+            elif key == 'EpisodeMainTitle':
+                episode = value
+            elif key == 'EpisodeNo':
+                episode_number = int_or_none(value)
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'kaltura:1982551:%s' % entry_id,
+            'ie_key': KalturaIE.ie_key(),
+            'title': title,
+            'description': description,
+            'series': series,
+            'season_number': season_number,
+            'episode': episode,
+            'episode_number': episode_number,
+            'timestamp': unified_timestamp(media.get('CreationDate')),
+            'duration': int_or_none(media.get('Duration')),
+            'view_count': int_or_none(media.get('ViewCounter')),
+            'like_count': int_or_none(media.get('like_counter')),
+        }
index b270f08d1e8796889ac54e4885c2234e4e7eb46b..02fcd52c74c268ea8274359af46810a6beeb0636 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
@@ -28,6 +30,12 @@ class VzaarIE(InfoExtractor):
         },
     }]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
+            webpage)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_data = self._download_json(
diff --git a/youtube_dl/extractor/watchbox.py b/youtube_dl/extractor/watchbox.py
new file mode 100644 (file)
index 0000000..b382338
--- /dev/null
@@ -0,0 +1,151 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    strip_or_none,
+    try_get,
+    unified_timestamp,
+)
+
+
+class WatchBoxIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?P<kind>serien|filme)/(?:[^/]+/)*[^/]+-(?P<id>\d+)'
+    _TESTS = [{
+        # film
+        'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html',
+        'info_dict': {
+            'id': '341368',
+            'ext': 'mp4',
+            'title': 'Free Jimmy',
+            'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 4890,
+            'age_limit': 16,
+            'release_year': 2009,
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': True,
+        },
+        'expected_warnings': ['Failed to download m3u8 information'],
+    }, {
+        # episode
+        'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html',
+        'info_dict': {
+            'id': '328286',
+            'ext': 'mp4',
+            'title': 'S01 E01 - Date in der Hölle',
+            'description': 'md5:2f31c74a8186899f33cb5114491dae2b',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 1291,
+            'age_limit': 12,
+            'release_year': 2010,
+            'series': 'Ugly Americans',
+            'season_number': 1,
+            'episode': 'Date in der Hölle',
+            'episode_number': 1,
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': True,
+        },
+        'expected_warnings': ['Failed to download m3u8 information'],
+    }, {
+        'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        kind, video_id = mobj.group('kind', 'id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        source = self._parse_json(
+            self._search_regex(
+                r'(?s)source\s*:\s*({.+?})\s*,\s*\n', webpage, 'source',
+                default='{}'),
+            video_id, transform_source=js_to_json, fatal=False) or {}
+
+        video_id = compat_str(source.get('videoId') or video_id)
+
+        devapi = self._download_json(
+            'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={
+                'format': 'json',
+                'apikey': 'hbbtv',
+            }, fatal=False)
+
+        item = try_get(devapi, lambda x: x['items'][0], dict) or {}
+
+        title = item.get('title') or try_get(
+            item, lambda x: x['movie']['headline_movie'],
+            compat_str) or source['title']
+
+        formats = []
+        hls_url = item.get('media_videourl_hls') or source.get('hls')
+        if hls_url:
+            formats.extend(self._extract_m3u8_formats(
+                hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls', fatal=False))
+        dash_url = item.get('media_videourl_wv') or source.get('dash')
+        if dash_url:
+            formats.extend(self._extract_mpd_formats(
+                dash_url, video_id, mpd_id='dash', fatal=False))
+        mp4_url = item.get('media_videourl')
+        if mp4_url:
+            formats.append({
+                'url': mp4_url,
+                'format_id': 'mp4',
+                'width': int_or_none(item.get('width')),
+                'height': int_or_none(item.get('height')),
+                'tbr': int_or_none(item.get('bitrate')),
+            })
+        self._sort_formats(formats)
+
+        description = strip_or_none(item.get('descr'))
+        thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail')
+        duration = int_or_none(item.get('media_length') or source.get('length'))
+        timestamp = unified_timestamp(item.get('pubDate'))
+        view_count = int_or_none(item.get('media_views'))
+        age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk']))
+        release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year']))
+
+        info = {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'view_count': view_count,
+            'age_limit': age_limit,
+            'release_year': release_year,
+            'formats': formats,
+        }
+
+        if kind.lower() == 'serien':
+            series = try_get(
+                item, lambda x: x['special']['title'],
+                compat_str) or source.get('format')
+            season_number = int_or_none(self._search_regex(
+                r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number',
+                default=None) or self._search_regex(
+                    r'/staffel-(\d+)/', url, 'season number', default=None))
+            episode = source.get('title')
+            episode_number = int_or_none(self._search_regex(
+                r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number',
+                default=None))
+            info.update({
+                'series': series,
+                'season_number': season_number,
+                'episode': episode,
+                'episode_number': episode_number,
+            })
+
+        return info
index 5c8f17eb2fa1c3bb12924ca189699181eb2b598e..e34ebe3a6612264815295dfe043f0eca1f5b2dcc 100644 (file)
@@ -39,8 +39,8 @@ class XXXYMoviesIE(InfoExtractor):
             r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
 
         title = self._html_search_regex(
-            [r'<div class="block_header">\s*<h1>([^<]+)</h1>',
-             r'<title>(.*?)\s*-\s*XXXYMovies\.com</title>'],
+            [r'<div[^>]+\bclass="block_header"[^>]*>\s*<h1>([^<]+)<',
+             r'<title>(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)</title>'],
             webpage, 'title')
 
         thumbnail = self._search_regex(
diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dl/extractor/yandexdisk.py
new file mode 100644 (file)
index 0000000..e8f6ae1
--- /dev/null
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    determine_ext,
+    float_or_none,
+    int_or_none,
+    try_get,
+    urlencode_postdata,
+)
+
+
+class YandexDiskIE(InfoExtractor):
+    _VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
+        'md5': '33955d7ae052f15853dc41f35f17581c',
+        'info_dict': {
+            'id': 'VdOeDou8eZs6Y',
+            'ext': 'mp4',
+            'title': '4.mp4',
+            'duration': 168.6,
+            'uploader': 'y.botova',
+            'uploader_id': '300043621',
+            'view_count': int,
+        },
+    }, {
+        'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        status = self._download_webpage(
+            'https://disk.yandex.com/auth/status', video_id, query={
+                'urlOrigin': url,
+                'source': 'public',
+                'md5': 'false',
+            })
+
+        sk = self._search_regex(
+            r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2',
+            status, 'sk', group='value')
+
+        webpage = self._download_webpage(url, video_id)
+
+        models = self._parse_json(
+            self._search_regex(
+                r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script',
+                webpage, 'video JSON'),
+            video_id)
+
+        data = next(
+            model['data'] for model in models
+            if model.get('model') == 'resource')
+
+        video_hash = data['id']
+        title = data['name']
+
+        models = self._download_json(
+            'https://disk.yandex.com/models/', video_id,
+            data=urlencode_postdata({
+                '_model.0': 'videoInfo',
+                'id.0': video_hash,
+                '_model.1': 'do-get-resource-url',
+                'id.1': video_hash,
+                'version': '13.6',
+                'sk': sk,
+            }), query={'_m': 'videoInfo'})['models']
+
+        videos = try_get(models, lambda x: x[0]['data']['videos'], list) or []
+        source_url = try_get(
+            models, lambda x: x[1]['data']['file'], compat_str)
+
+        formats = []
+        if source_url:
+            formats.append({
+                'url': source_url,
+                'format_id': 'source',
+                'ext': determine_ext(title, 'mp4'),
+                'quality': 1,
+            })
+        for video in videos:
+            format_url = video.get('url')
+            if not format_url:
+                continue
+            if determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.append({
+                    'url': format_url,
+                })
+        self._sort_formats(formats)
+
+        duration = float_or_none(try_get(
+            models, lambda x: x[0]['data']['duration']), 1000)
+        uploader = try_get(
+            data, lambda x: x['user']['display_name'], compat_str)
+        uploader_id = try_get(
+            data, lambda x: x['user']['uid'], compat_str)
+        view_count = int_or_none(try_get(
+            data, lambda x: x['meta']['views_counter']))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': duration,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'view_count': view_count,
+            'formats': formats,
+        }
index b50f34e9bb30e47c679940ca1577ea8cc6683934..f33fabe194daceb9ac6ffaf838536e7c9d53cd34 100644 (file)
@@ -1,39 +1,95 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    parse_duration,
+)
 
 
 class YouJizzIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
+    _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))'
     _TESTS = [{
         'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
-        'md5': '78fc1901148284c69af12640e01c6310',
+        'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4',
         'info_dict': {
             'id': '2189178',
             'ext': 'mp4',
             'title': 'Zeichentrick 1',
             'age_limit': 18,
+            'duration': 2874,
         }
     }, {
         'url': 'http://www.youjizz.com/videos/-2189178.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.youjizz.com/videos/embed/31991001',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id') or mobj.group('embed_id')
+
         webpage = self._download_webpage(url, video_id)
-        # YouJizz's HTML5 player has invalid HTML
-        webpage = webpage.replace('"controls', '" controls')
-        age_limit = self._rta_search(webpage)
-        video_title = self._html_search_regex(
-            r'<title>\s*(.*)\s*</title>', webpage, 'title')
 
-        info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
+        title = self._html_search_regex(
+            r'<title>(.+?)</title>', webpage, 'title')
+
+        formats = []
+
+        encodings = self._parse_json(
+            self._search_regex(
+                r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
+                default='[]'),
+            video_id, fatal=False)
+        for encoding in encodings:
+            if not isinstance(encoding, dict):
+                continue
+            format_url = encoding.get('filename')
+            if not isinstance(format_url, compat_str):
+                continue
+            if determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                format_id = encoding.get('name') or encoding.get('quality')
+                height = int_or_none(self._search_regex(
+                    r'^(\d+)[pP]', format_id, 'height', default=None))
+                formats.append({
+                    'url': format_url,
+                    'format_id': format_id,
+                    'height': height,
+                })
+
+        if formats:
+            info_dict = {
+                'formats': formats,
+            }
+        else:
+            # YouJizz's HTML5 player has invalid HTML
+            webpage = webpage.replace('"controls', '" controls')
+            info_dict = self._parse_html5_media_entries(
+                url, webpage, video_id)[0]
+
+        duration = parse_duration(self._search_regex(
+            r'<strong>Runtime:</strong>([^<]+)', webpage, 'duration',
+            default=None))
+        uploader = self._search_regex(
+            r'<strong>Uploaded By:.*?<a[^>]*>([^<]+)', webpage, 'uploader',
+            default=None)
 
         info_dict.update({
             'id': video_id,
-            'title': video_title,
-            'age_limit': age_limit,
+            'title': title,
+            'age_limit': self._rta_search(webpage),
+            'duration': duration,
+            'uploader': uploader,
         })
 
         return info_dict
index dcce15d779eeb7b715d9cbc48e628a48f5daa9ed..0c4bc2edab616cceff7cf68f7f64d12010c6e16d 100644 (file)
@@ -1,7 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import itertools
 import random
 import re
 import string
@@ -14,7 +13,6 @@ from ..utils import (
     js_to_json,
     str_or_none,
     strip_jsonp,
-    urljoin,
 )
 
 
@@ -222,17 +220,42 @@ class YoukuShowIE(InfoExtractor):
     _VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
     IE_NAME = 'youku:show'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
         'info_dict': {
             'id': 'zc7c670be07ff11e48b3f',
-            'title': '花千骨 未删减版',
+            'title': '花千骨 DVD版',
             'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
         },
         'playlist_count': 50,
-    }
+    }, {
+        # Episode number not starting from 1
+        'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html',
+        'info_dict': {
+            'id': 'zefbfbd70efbfbd780bef',
+            'title': '超级飞侠3',
+            'description': 'md5:275715156abebe5ccc2a1992e9d56b98',
+        },
+        'playlist_count': 24,
+    }, {
+        # Ongoing playlist. The initial page is the last one
+        'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
+        'only_matchine': True,
+    }]
 
-    _PAGE_SIZE = 40
+    def _extract_entries(self, playlist_data_url, show_id, note, query):
+        query['callback'] = 'cb'
+        playlist_data = self._download_json(
+            playlist_data_url, show_id, query=query, note=note,
+            transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
+        drama_list = (get_element_by_class('p-drama-grid', playlist_data) or
+                      get_element_by_class('p-drama-half-row', playlist_data))
+        if drama_list is None:
+            raise ExtractorError('No episodes found')
+        video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)
+        return playlist_data, [
+            self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key())
+            for video_url in video_urls]
 
     def _real_extract(self, url):
         show_id = self._match_id(url)
@@ -242,30 +265,29 @@ class YoukuShowIE(InfoExtractor):
         page_config = self._parse_json(self._search_regex(
             r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
             show_id, transform_source=js_to_json)
-        for idx in itertools.count(0):
-            if idx == 0:
-                playlist_data_url = 'http://list.youku.com/show/module'
-                query = {'id': page_config['showid'], 'tab': 'point'}
-            else:
-                playlist_data_url = 'http://list.youku.com/show/point'
-                query = {
-                    'id': page_config['showid'],
-                    'stage': 'reload_%d' % (self._PAGE_SIZE * idx + 1),
-                }
-            query['callback'] = 'cb'
-            playlist_data = self._download_json(
-                playlist_data_url, show_id, query=query,
+        first_page, initial_entries = self._extract_entries(
+            'http://list.youku.com/show/module', show_id,
+            note='Downloading initial playlist data page',
+            query={
+                'id': page_config['showid'],
+                'tab': 'showInfo',
+            })
+        first_page_reload_id = self._html_search_regex(
+            r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id')
+        # The first reload_id has the same items as first_page
+        reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)
+        for idx, reload_id in enumerate(reload_ids):
+            if reload_id == first_page_reload_id:
+                entries.extend(initial_entries)
+                continue
+            _, new_entries = self._extract_entries(
+                'http://list.youku.com/show/episode', show_id,
                 note='Downloading playlist data page %d' % (idx + 1),
-                transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
-            video_urls = re.findall(
-                r'<div[^>]+class="p-thumb"[^<]+<a[^>]+href="([^"]+)"',
-                playlist_data)
-            new_entries = [
-                self.url_result(urljoin(url, video_url), YoukuIE.ie_key())
-                for video_url in video_urls]
+                query={
+                    'id': page_config['showid'],
+                    'stage': reload_id,
+                })
             entries.extend(new_entries)
-            if len(new_entries) < self._PAGE_SIZE:
-                break
 
         desc = self._html_search_meta('description', webpage, fatal=False)
         playlist_title = desc.split(',')[0] if desc else None
index 79e9fd12cd0c3be0a95f427ee01cd9bd5899bd90..38439c97165e18e37d9bf4cd3ebf906f099c9b30 100644 (file)
@@ -20,6 +20,24 @@ from .utils import (
 from .version import __version__
 
 
+def _hide_login_info(opts):
+    PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
+    eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
+
+    def _scrub_eq(o):
+        m = eqre.match(o)
+        if m:
+            return m.group('key') + '=PRIVATE'
+        else:
+            return o
+
+    opts = list(map(_scrub_eq, opts))
+    for idx, opt in enumerate(opts):
+        if opt in PRIVATE_OPTS and idx + 1 < len(opts):
+            opts[idx + 1] = 'PRIVATE'
+    return opts
+
+
 def parseOpts(overrideArguments=None):
     def _readOptions(filename_bytes, default=[]):
         try:
@@ -93,26 +111,6 @@ def parseOpts(overrideArguments=None):
     def _comma_separated_values_options_callback(option, opt_str, value, parser):
         setattr(parser.values, option.dest, value.split(','))
 
-    def _hide_login_info(opts):
-        PRIVATE_OPTS = ['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username']
-        eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
-
-        def _scrub_eq(o):
-            m = eqre.match(o)
-            if m:
-                return m.group('key') + '=PRIVATE'
-            else:
-                return o
-
-        opts = list(map(_scrub_eq, opts))
-        for private_opt in PRIVATE_OPTS:
-            try:
-                i = opts.index(private_opt)
-                opts[i + 1] = 'PRIVATE'
-            except ValueError:
-                pass
-        return opts
-
     # No need to wrap help messages if we're on a wide console
     columns = compat_get_terminal_size().columns
     max_width = columns if columns else 80
index fdf5e29e7d417b94ba7f83129a8420f93ed31217..2554a2abd7e86eeac1c92dddf6e90a307f8b9c9f 100644 (file)
@@ -596,7 +596,7 @@ def unescapeHTML(s):
     assert type(s) == compat_str
 
     return re.sub(
-        r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 
 
 def get_subprocess_encoding():
@@ -2733,6 +2733,8 @@ def cli_option(params, command_option, param):
 
 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
     param = params.get(param)
+    if param is None:
+        return []
     assert isinstance(param, bool)
     if separator:
         return [command_option + separator + (true_value if param else false_value)]
index 82e166fef8f6edad1b89da56e3b7c46c20c3ff7c..4358cd3f215c17b9241c95773325bbbcf054b5fb 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2017.07.15'
+__version__ = '2017.08.18'