Merge pull request #10819 from raleeper/adobepass
authorremitamine <remitamine@gmail.com>
Wed, 19 Oct 2016 19:16:24 +0000 (20:16 +0100)
committerGitHub <noreply@github.com>
Wed, 19 Oct 2016 19:16:24 +0000 (20:16 +0100)
[adobepass] Add Comcast

162 files changed:
.github/ISSUE_TEMPLATE.md
AUTHORS
CONTRIBUTING.md
ChangeLog
Makefile
README.md
devscripts/lazy_load_template.py
docs/conf.py
docs/supportedsites.md
setup.py
test/test_http.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/http.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/adobepass.py
youtube_dl/extractor/allocine.py
youtube_dl/extractor/amcnetworks.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/beatport.py [new file with mode: 0644]
youtube_dl/extractor/beatportpro.py [deleted file]
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/byutv.py
youtube_dl/extractor/canalplus.py
youtube_dl/extractor/carambatv.py
youtube_dl/extractor/cbsinteractive.py
youtube_dl/extractor/cbsnews.py
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/chirbit.py
youtube_dl/extractor/clipfish.py
youtube_dl/extractor/clubic.py
youtube_dl/extractor/cmt.py
youtube_dl/extractor/comcarcoff.py
youtube_dl/extractor/common.py
youtube_dl/extractor/commonprotocols.py
youtube_dl/extractor/criterion.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/daum.py
youtube_dl/extractor/dctp.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/eitb.py
youtube_dl/extractor/embedly.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/faz.py
youtube_dl/extractor/firsttv.py
youtube_dl/extractor/folketinget.py
youtube_dl/extractor/footyroom.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/goshgay.py
youtube_dl/extractor/hark.py
youtube_dl/extractor/hbo.py
youtube_dl/extractor/helsinki.py
youtube_dl/extractor/huajiao.py [new file with mode: 0644]
youtube_dl/extractor/ina.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/jpopsukitv.py
youtube_dl/extractor/jwplatform.py
youtube_dl/extractor/kickstarter.py
youtube_dl/extractor/kontrtube.py
youtube_dl/extractor/krasview.py
youtube_dl/extractor/lego.py [new file with mode: 0644]
youtube_dl/extractor/lifenews.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/m6.py
youtube_dl/extractor/mailru.py
youtube_dl/extractor/moviezine.py
youtube_dl/extractor/musicplayon.py
youtube_dl/extractor/myspace.py
youtube_dl/extractor/nationalgeographic.py
youtube_dl/extractor/naver.py
youtube_dl/extractor/newstube.py
youtube_dl/extractor/nextmedia.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/normalboots.py
youtube_dl/extractor/nova.py
youtube_dl/extractor/nowness.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/ntvru.py
youtube_dl/extractor/nuevo.py
youtube_dl/extractor/nytimes.py
youtube_dl/extractor/nzz.py [new file with mode: 0644]
youtube_dl/extractor/oktoberfesttv.py
youtube_dl/extractor/orf.py
youtube_dl/extractor/pandoratv.py
youtube_dl/extractor/parliamentliveuk.py
youtube_dl/extractor/patreon.py
youtube_dl/extractor/periscope.py
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/porn91.py
youtube_dl/extractor/pornoxo.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/puls4.py
youtube_dl/extractor/radiobremen.py
youtube_dl/extractor/reverbnation.py
youtube_dl/extractor/rmcdecouverte.py
youtube_dl/extractor/rtl2.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/rudo.py
youtube_dl/extractor/ruhd.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/rutv.py
youtube_dl/extractor/ruutu.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/sapo.py
youtube_dl/extractor/sbs.py
youtube_dl/extractor/screencast.py
youtube_dl/extractor/screenwavemedia.py
youtube_dl/extractor/slutload.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/sohu.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/southpark.py
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/srmediathek.py
youtube_dl/extractor/streamable.py
youtube_dl/extractor/streamcz.py
youtube_dl/extractor/swrmediathek.py
youtube_dl/extractor/sztvhu.py
youtube_dl/extractor/tagesschau.py
youtube_dl/extractor/tass.py
youtube_dl/extractor/teachertube.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/techtalks.py
youtube_dl/extractor/theintercept.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/theweatherchannel.py [new file with mode: 0644]
youtube_dl/extractor/thisoldhouse.py [new file with mode: 0644]
youtube_dl/extractor/tlc.py
youtube_dl/extractor/tonline.py [new file with mode: 0644]
youtube_dl/extractor/toypics.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/tv2.py
youtube_dl/extractor/tvigle.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/urplay.py
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/vesti.py
youtube_dl/extractor/videomore.py
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/vier.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vodlocker.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/wrzuta.py
youtube_dl/extractor/wsj.py
youtube_dl/extractor/xboxclips.py
youtube_dl/extractor/xnxx.py
youtube_dl/extractor/xuite.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zingmp3.py
youtube_dl/postprocessor/embedthumbnail.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

index 273eb8c0b11b3b05031ada446745b54da4951269..2c9e1f3706bf74aed23a3c587830fff6076b0071 100644 (file)
@@ -6,8 +6,8 @@
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.27**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.19**
 
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.09.27
+[debug] youtube-dl version 2016.10.19
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
diff --git a/AUTHORS b/AUTHORS
index 937742c5dc8aea8108cfcc75dede9a3805227352..e6562b4176645c5f824da3d4479248d54cd07340 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -26,7 +26,7 @@ Albert Kim
 Pierre Rudloff
 Huarong Huo
 Ismael Mejía
-Steffan 'Ruirize' James
+Steffan Donal
 Andras Elso
 Jelle van der Waa
 Marcin Cieślak
@@ -185,3 +185,5 @@ Aleksander Nitecki
 Sebastian Blunt
 Matěj Cepl
 Xie Yanbo
+Philip Xu
+John Hawkinson
index 95392030ea2fc7f78cbb23b368ee37a9fd5ee010..29f52cbe8eb6fea94c1d108afe91232e4fa581e7 100644 (file)
@@ -12,7 +12,7 @@ $ youtube-dl -v <your command line>
 [debug] Proxy map: {}
 ...
 ```
-**Do not post screenshots of verbose log only plain text is acceptable.**
+**Do not post screenshots of verbose logs; only plain text is acceptable.**
 
 The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
 
@@ -66,7 +66,7 @@ Only post features that you (or an incapacitated friend you can personally talk
 
 ###  Is your question about youtube-dl?
 
-It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
+It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
 
 # DEVELOPER INSTRUCTIONS
 
@@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
 If you want to create a build of youtube-dl yourself, you'll need
 
 * python
-* make (both GNU make and BSD make are supported)
+* make (only GNU make is supported)
 * pandoc
 * zip
 * nosetests
@@ -167,19 +167,19 @@ In any case, thank you very much for your contributions!
 
 This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
 
-Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
+Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
 
 ### Mandatory and optional metafields
 
-For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
+For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
 
  - `id` (media identifier)
  - `title` (media title)
  - `url` (media download URL) or `formats`
 
-In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
+In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
 
-[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
 
 #### Example
 
@@ -199,7 +199,7 @@ Assume at this point `meta`'s layout is:
 }
 ```
 
-Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
+Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
 
 ```python
 description = meta.get('summary')  # correct
@@ -211,7 +211,7 @@ and not like:
 description = meta['summary']  # incorrect
 ```
 
-The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data). 
+The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
 
 Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
 
@@ -231,21 +231,21 @@ description = self._search_regex(
     webpage, 'description', default=None)
 ```
 
-On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
+On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
  
 ### Provide fallbacks
 
-When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
+When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
 
 #### Example
 
-Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
+Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
 
 ```python
 title = meta['title']
 ```
 
-If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
+If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
 
 Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
 
@@ -282,7 +282,7 @@ title = self._search_regex(
     webpage, 'title', group='title')
 ```
 
-Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute: 
+Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: 
 
 The code definitely should not look like:
 
index 70da55c903570e892acefdff9aae839633389e27..3e53bcd54abf64bd855c356275ea867e26161800 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,108 @@
 version <unreleased>
 
+
+version 2016.10.19
+
+Core
++ [utils] Expose PACKED_CODES_RE
++ [extractor/common] Extract non smil wowza mpd manifests
++ [extractor/common] Detect f4m audio-only formats
+
+Extractors
+* [vidzi] Fix extraction (#10908, #10952)
+* [urplay] Fix subtitles extraction
++ [urplay] Add support for urskola.se (#10915)
++ [orf] Add subtitles support (#10939)
+* [youtube] Fix --no-playlist behavior for youtu.be/id URLs (#10896)
+* [nrk] Relax URL regular expression (#10928)
++ [nytimes] Add support for podcasts (#10926)
+* [pluralsight] Relax URL regular expression (#10941)
+
+
+version 2016.10.16
+
+Core
+* [postprocessor/ffmpeg] Return correct filepath and ext in updated information
+  in FFmpegExtractAudioPP (#10879)
+
+Extractors
++ [ruutu] Add support for supla.fi (#10849)
++ [theoperaplatform] Add support for theoperaplatform.eu (#10914)
+* [lynda] Fix height for prioritized streams
++ [lynda] Add fallback extraction scenario
+* [lynda] Switch to https (#10916)
++ [huajiao] New extractor (#10917)
+* [cmt] Fix mgid extraction (#10813)
++ [safari:course] Add support for techbus.safaribooksonline.com
+* [orf:tvthek] Fix extraction and modernize (#10898)
+* [chirbit] Fix extraction of user profile pages
+* [carambatv] Fix extraction
+* [canalplus] Fix extraction for some videos
+* [cbsinteractive] Fix extraction for cnet.com
+* [parliamentliveuk] Lower case URLs are now recognized (#10912)
+
+
+version 2016.10.12
+
+Core
++ Support HTML media elements without child nodes
+* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
+
+Extractors
+* [dailymotion] Fix extraction (#10901)
+* [vimeo:review] Fix extraction (#10900)
+* [nhl] Correctly handle invalid formats (#10713)
+* [footyroom] Fix extraction (#10810)
+* [abc.net.au:iview] Fix for standalone (non series) videos (#10895)
++ [hbo] Add support for episode pages (#10892)
+* [allocine] Fix extraction (#10860)
++ [nextmedia] Recognize action news on AppleDaily
+* [lego] Improve info extraction and bypass geo restriction (#10872)
+
+
+version 2016.10.07
+
+Extractors
++ [iprima] Detect geo restriction
+* [facebook] Fix video extraction (#10846)
++ [commonprotocols] Support direct MMS links (#10838)
++ [generic] Add support for multiple vimeo embeds (#10862)
++ [nzz] Add support for nzz.ch (#4407)
++ [npo] Detect geo restriction
++ [npo] Add support for 2doc.nl (#10842)
++ [lego] Add support for lego.com (#10369)
++ [tonline] Add support for t-online.de (#10376)
+* [techtalks] Relax URL regular expression (#10840)
+* [youtube:live] Extend URL regular expression (#10839)
++ [theweatherchannel] Add support for weather.com (#7188)
++ [thisoldhouse] Add support for thisoldhouse.com (#10837)
++ [nhl] Add support for wch2016.com (#10833)
+* [pornoxo] Use JWPlatform to improve metadata extraction
+
+
+version 2016.10.02
+
+Core
+* Fix possibly lost extended attributes during post-processing
++ Support pyxattr as well as python-xattr for --xattrs and
+  --xattr-set-filesize (#9054)
+
 Extractors
++ [jwplatform] Support DASH streams in JWPlayer
++ [jwplatform] Support old-style JWPlayer playlists
++ [byutv:event] Add extractor
+* [periscope:user] Fix extraction (#10820)
+* [dctp] Fix extraction (#10734)
++ [instagram] Extract video dimensions (#10790)
++ [tvland] Extend URL regular expression (#10812)
++ [vgtv] Add support for tv.aftonbladet.se (#10800)
+- [aftonbladet] Remove extractor
+* [vk] Fix timestamp and view count extraction (#10760)
++ [vk] Add support for running and finished live streams (#10799)
 + [leeco] Recognize more Le Sports URLs (#10794)
++ [instagram] Extract comments (#10788)
++ [ketnet] Extract mzsource formats (#10770)
+* [limelight:media] Improve HTTP formats extraction
 
 
 version 2016.09.27
index a2763a664188102662cc4f2c5b69518cc6664693..8d66e48c9328f9262c8d7bc493d0d460293d0ad9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
 PYTHON ?= /usr/bin/env python
 
 # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
-SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
+SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
 
 install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
        install -d $(DESTDIR)$(BINDIR)
@@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish
 
 lazy-extractors: youtube_dl/extractor/lazy_extractors.py
 
-_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py'
+_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
 youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
        $(PYTHON) devscripts/make_lazy_extractors.py $@
 
index 4debe15feccd2614bd7841f04583ff1b2660cd62..0fbf325452d33a759d2d52207ee18c37a0954fa3 100644 (file)
--- a/README.md
+++ b/README.md
@@ -449,12 +449,12 @@ You can use `--ignore-config` if you want to disable the configuration file for
 
 ### Authentication with `.netrc` file
 
-You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
 ```
 touch $HOME/.netrc
 chmod a-rwx,u+rw $HOME/.netrc
 ```
-After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
+After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
 ```
 machine <extractor> login <login> password <password>
 ```
@@ -550,13 +550,13 @@ Available for the media that is a track or a part of a music album:
  - `disc_number`: Number of the disc or other physical medium the track belongs to
  - `release_year`: Year (YYYY) when the album was released
 
-Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
+Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
 
-For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
+For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
 
-Output template can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` that will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
+Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
 
-To specify percent literal in output template use `%%`. To output to stdout use `-o -`.
+To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
 
 The current default template is `%(title)s-%(id)s.%(ext)s`.
 
@@ -564,7 +564,7 @@ In some cases, you don't want special characters such as 中, spaces, or &, such
 
 #### Output template and Windows batch files
 
-If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
+If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
 
 #### Output template examples
 
@@ -597,7 +597,7 @@ $ youtube-dl -o - BaW_jenozKc
 
 By default youtube-dl tries to download the best available quality, i.e. if you want the best quality you **don't need** to pass any special options, youtube-dl will guess it for you by **default**.
 
-But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
+But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so-called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
 
 The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
 
@@ -605,21 +605,21 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM
 
 The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. 
 
-You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download best quality format of particular file extension served as a single file, e.g. `-f webm` will download best quality format with `webm` extension served as a single file.
+You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
 
-You can also use special names to select particular edge case format:
- - `best`: Select best quality format represented by single file with video and audio
- - `worst`: Select worst quality format represented by single file with video and audio
- - `bestvideo`: Select best quality video only format (e.g. DASH video), may not be available
- - `worstvideo`: Select worst quality video only format, may not be available
- - `bestaudio`: Select best quality audio only format, may not be available
- - `worstaudio`: Select worst quality audio only format, may not be available
+You can also use special names to select particular edge case formats:
+ - `best`: Select the best quality format represented by a single file with video and audio.
+ - `worst`: Select the worst quality format represented by a single file with video and audio.
+ - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
+ - `worstvideo`: Select the worst quality video-only format. May not be available.
+ - `bestaudio`: Select the best quality audio only-format. May not be available.
+ - `worstaudio`: Select the worst quality audio only-format. May not be available.
 
-For example, to download worst quality video only format you can use `-f worstvideo`.
+For example, to download the worst quality video-only format you can use `-f worstvideo`.
 
 If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that slash is left-associative, i.e. formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
 
-If you want to download several formats of the same video use comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or more sophisticated example combined with precedence feature `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
+If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
 
 You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
 
@@ -641,15 +641,15 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
  - `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native`
  - `format_id`: A short description of the format
 
-Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by video hoster.
+Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
 
 Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
 
-You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download best video only format, best audio only format and mux them together with ffmpeg/avconv.
+You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv.
 
 Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
 
-Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
+Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
 
 If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
 
@@ -728,7 +728,7 @@ Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
 
 YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
 
-If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
+If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging people](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
 
 ### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
 
@@ -902,7 +902,7 @@ If you want to find out whether a given URL is supported, simply call youtube-dl
 
 # Why do I need to go through that much red tape when filing bugs?
 
-Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
+Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was already reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
 
 youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
 
@@ -923,7 +923,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
 If you want to create a build of youtube-dl yourself, you'll need
 
 * python
-* make (both GNU make and BSD make are supported)
+* make (only GNU make is supported)
 * pandoc
 * zip
 * nosetests
@@ -1005,19 +1005,19 @@ In any case, thank you very much for your contributions!
 
 This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
 
-Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
+Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
 
 ### Mandatory and optional metafields
 
-For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
+For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
 
  - `id` (media identifier)
  - `title` (media title)
  - `url` (media download URL) or `formats`
 
-In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
+In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
 
-[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
 
 #### Example
 
@@ -1037,7 +1037,7 @@ Assume at this point `meta`'s layout is:
 }
 ```
 
-Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
+Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
 
 ```python
 description = meta.get('summary')  # correct
@@ -1049,7 +1049,7 @@ and not like:
 description = meta['summary']  # incorrect
 ```
 
-The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data). 
+The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
 
 Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
 
@@ -1069,21 +1069,21 @@ description = self._search_regex(
     webpage, 'description', default=None)
 ```
 
-On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
+On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
  
 ### Provide fallbacks
 
-When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
+When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
 
 #### Example
 
-Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
+Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
 
 ```python
 title = meta['title']
 ```
 
-If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
+If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
 
 Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
 
@@ -1120,7 +1120,7 @@ title = self._search_regex(
     webpage, 'title', group='title')
 ```
 
-Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute: 
+Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: 
 
 The code definitely should not look like:
 
@@ -1190,7 +1190,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
 
 # BUGS
 
-Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
 
 **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
 ```
@@ -1206,7 +1206,7 @@ $ youtube-dl -v <your command line>
 [debug] Proxy map: {}
 ...
 ```
-**Do not post screenshots of verbose log only plain text is acceptable.**
+**Do not post screenshots of verbose logs; only plain text is acceptable.**
 
 The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
 
@@ -1260,7 +1260,7 @@ Only post features that you (or an incapacitated friend you can personally talk
 
 ###  Is your question about youtube-dl?
 
-It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
+It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
 
 # COPYRIGHT
 
index 2e6e6641b8e385beba9a9b742125eb1a4d1e0cc3..c4e5fc1f40e72f987ddedd5c91d3367a8692150c 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 594ca61a6bf984d173620a3e95eaca28b22cda5a..0aaf1b8fcf8220301d63250e83cb1587b618388c 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 #
 # youtube-dl documentation build configuration file, created by
 # sphinx-quickstart on Fri Mar 14 21:05:43 2014.
index 26f27557713ce6ccfbee28cf3b0d92dc1000beff..0a518881ab7aba035d6ae5ab5ed9e932b0bf38d7 100644 (file)
@@ -34,7 +34,6 @@
  - **AdultSwim**
  - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
  - **AfreecaTV**: afreecatv.com
- - **Aftonbladet**
  - **AirMozilla**
  - **AlJazeera**
  - **Allocine**
@@ -87,7 +86,7 @@
  - **bbc.co.uk:article**: BBC articles
  - **bbc.co.uk:iplayer:playlist**
  - **bbc.co.uk:playlist**
- - **BeatportPro**
+ - **Beatport**
  - **Beeg**
  - **BehindKink**
  - **BellMedia**
  - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
  - **BuzzFeed**
  - **BYUtv**
+ - **BYUtvEvent**
  - **Camdemy**
  - **CamdemyFolder**
  - **CamWithHer**
  - **Groupon**
  - **Hark**
  - **HBO**
+ - **HBOEpisode**
  - **HearThisAt**
  - **Heise**
  - **HellPorno**
  - **HowStuffWorks**
  - **HRTi**
  - **HRTiPlaylist**
+ - **Huajiao**: 花椒直播
  - **HuffPost**: Huffington Post
  - **Hypem**
  - **Iconosquare**
  - **Le**: 乐视网
  - **Learnr**
  - **Lecture2Go**
+ - **LEGO**
  - **Lemonde**
  - **LePlaylist**
  - **LetvCloud**: 乐视云
  - **Nuvid**
  - **NYTimes**
  - **NYTimesArticle**
+ - **NZZ**
  - **ocw.mit.edu**
  - **OdaTV**
  - **Odnoklassniki**
  - **SWRMediathek**
  - **Syfy**
  - **SztvHu**
+ - **t-online.de**
  - **Tagesschau**
  - **tagesschau:player**
  - **Tass**
  - **TF1**
  - **TFO**
  - **TheIntercept**
+ - **theoperaplatform**
  - **ThePlatform**
  - **ThePlatformFeed**
  - **TheScene**
  - **TheSixtyOne**
  - **TheStar**
+ - **TheWeatherChannel**
  - **ThisAmericanLife**
  - **ThisAV**
+ - **ThisOldHouse**
  - **tinypic**: tinypic.com videos
  - **tlc.de**
  - **TMZ**
index 508b27f3707898d07d303cd1ce44b7e4d54b152f..ce6dd1870bc52951d268f96aa4dc68ea6f92e04d 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import print_function
 
index fdc68ccb42c85410788ecb7bcb1eafd802b3a794..bb0a098e48f5cae93cade9b4bc99c4a4d1a545ed 100644 (file)
@@ -87,7 +87,7 @@ class TestHTTP(unittest.TestCase):
 
         ydl = YoutubeDL({'logger': FakeLogger()})
         r = ydl.extract_info('http://localhost:%d/302' % self.port)
-        self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
+        self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
 
 
 class TestHTTPS(unittest.TestCase):
@@ -111,7 +111,7 @@ class TestHTTPS(unittest.TestCase):
 
         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
         r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
-        self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
+        self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
 
 
 def _build_proxy_handler(name):
index 442aa663bf4bfa181ef32c9bde148448d0b9a189..99825e343e1f731c0d3534c7a65fe6dfd6964b8c 100755 (executable)
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import absolute_import, unicode_literals
 
index 1cf3140a038cdcc27737c6ae018cb0db3eb9014b..643393558b2d1c19c71fcbd25f3c60f5465912aa 100644 (file)
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import unicode_literals
 
@@ -266,8 +266,6 @@ def _real_main(argv=None):
         postprocessors.append({
             'key': 'FFmpegEmbedSubtitle',
         })
-    if opts.xattrs:
-        postprocessors.append({'key': 'XAttrMetadata'})
     if opts.embedthumbnail:
         already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
         postprocessors.append({
@@ -276,6 +274,10 @@ def _real_main(argv=None):
         })
         if not already_have_thumbnail:
             opts.writethumbnail = True
+    # XAttrMetadataPP should be run after post-processors that may change file
+    # contents
+    if opts.xattrs:
+        postprocessors.append({'key': 'XAttrMetadata'})
     # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
     # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
     if opts.exec_cmd:
@@ -283,12 +285,6 @@ def _real_main(argv=None):
             'key': 'ExecAfterDownload',
             'exec_cmd': opts.exec_cmd,
         })
-    if opts.xattr_set_filesize:
-        try:
-            import xattr
-            xattr  # Confuse flake8
-        except ImportError:
-            parser.error('setting filesize xattr requested but python-xattr is not available')
     external_downloader_args = None
     if opts.external_downloader_args:
         external_downloader_args = compat_shlex_split(opts.external_downloader_args)
index 8482cbd8423dae254db7efff873588cd8fb10b8a..3dc144b4e19f208d4075d6423ce3278b3a614330 100644 (file)
@@ -346,7 +346,6 @@ class FileDownloader(object):
         min_sleep_interval = self.params.get('sleep_interval')
         if min_sleep_interval:
             max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
-            print(min_sleep_interval, max_sleep_interval)
             sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
             self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
             time.sleep(sleep_interval)
index f8b69d186ac5ee93c8402f85bc66e7ed59570118..af405b9509572bfd42bb11bd48bec5300d8105b3 100644 (file)
@@ -13,6 +13,9 @@ from ..utils import (
     encodeFilename,
     sanitize_open,
     sanitized_Request,
+    write_xattr,
+    XAttrMetadataError,
+    XAttrUnavailableError,
 )
 
 
@@ -179,9 +182,8 @@ class HttpFD(FileDownloader):
 
                 if self.params.get('xattr_set_filesize', False) and data_len is not None:
                     try:
-                        import xattr
-                        xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
-                    except(OSError, IOError, ImportError) as err:
+                        write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+                    except (XAttrUnavailableError, XAttrMetadataError) as err:
                         self.report_error('unable to set filesize xattr: %s' % str(err))
 
             try:
index 465249bbf6bdf92357944b065174076b60567295..0247cabf9df8a6c61602085dcabe5f139b53420a 100644 (file)
@@ -102,16 +102,16 @@ class ABCIViewIE(InfoExtractor):
 
     # ABC iview programs are normally available for 14 days only.
     _TESTS = [{
-        'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
-        'md5': '979d10b2939101f0d27a06b79edad536',
+        'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
+        'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
         'info_dict': {
-            'id': 'FA1505V024S00',
+            'id': 'ZX9735A001S00',
             'ext': 'mp4',
-            'title': 'Series 27 Ep 24',
-            'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d',
-            'upload_date': '20160820',
-            'uploader_id': 'abc1',
-            'timestamp': 1471719600,
+            'title': 'Diaries Of A Broken Mind',
+            'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
+            'upload_date': '20161010',
+            'uploader_id': 'abc2',
+            'timestamp': 1476064920,
         },
         'skip': 'Video gone',
     }]
@@ -121,7 +121,7 @@ class ABCIViewIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         video_params = self._parse_json(self._search_regex(
             r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
-        title = video_params['title']
+        title = video_params.get('title') or video_params['seriesTitle']
         stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
 
         formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
@@ -144,8 +144,8 @@ class ABCIViewIE(InfoExtractor):
             'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
             'series': video_params.get('seriesTitle'),
             'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
-            'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)),
-            'episode': self._html_search_meta('episode_title', webpage),
+            'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
+            'episode': self._html_search_meta('episode_title', webpage, default=None),
             'uploader_id': video_params.get('channel'),
             'formats': formats,
             'subtitles': subtitles,
index 76ab69d8c842677a24ca5088293022c6ce41002a..b6d215a554350ca7b7e21f34e5f207b2af4f468f 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 190bc2cc8730853a23b9025f1849bf234a32e001..517b06def4d2ff690628eece4b1e85e647aea267 100644 (file)
@@ -1,29 +1,26 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-import json
-
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
+    remove_end,
     qualities,
-    unescapeHTML,
-    xpath_element,
+    url_basename,
 )
 
 
 class AllocineIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
+    _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
 
     _TESTS = [{
         'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
         'md5': '0c9fcf59a841f65635fa300ac43d8269',
         'info_dict': {
             'id': '19546517',
+            'display_id': '18635087',
             'ext': 'mp4',
             'title': 'Astérix - Le Domaine des Dieux Teaser VF',
-            'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
+            'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
@@ -31,64 +28,82 @@ class AllocineIE(InfoExtractor):
         'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
         'info_dict': {
             'id': '19540403',
+            'display_id': '19540403',
             'ext': 'mp4',
             'title': 'Planes 2 Bande-annonce VF',
             'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
-        'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
+        'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
         'md5': '101250fb127ef9ca3d73186ff22a47ce',
         'info_dict': {
             'id': '19544709',
+            'display_id': '19544709',
             'ext': 'mp4',
             'title': 'Dragons 2 - Bande annonce finale VF',
-            'description': 'md5:601d15393ac40f249648ef000720e7e3',
+            'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
         'url': 'http://www.allocine.fr/video/video-19550147/',
-        'only_matching': True,
+        'md5': '3566c0668c0235e2d224fd8edb389f67',
+        'info_dict': {
+            'id': '19550147',
+            'ext': 'mp4',
+            'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
+            'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
+            'thumbnail': 're:http://.*\.jpg',
+        },
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        typ = mobj.group('typ')
-        display_id = mobj.group('id')
+        display_id = self._match_id(url)
 
         webpage = self._download_webpage(url, display_id)
 
-        if typ == 'film':
-            video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
-        else:
-            player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
-            if player:
-                player_data = json.loads(player)
-                video_id = compat_str(player_data['refMedia'])
-            else:
-                model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
-                model_data = self._parse_json(unescapeHTML(model), display_id)
-                video_id = compat_str(model_data['id'])
+        formats = []
+        quality = qualities(['ld', 'md', 'hd'])
 
-        xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
+        model = self._html_search_regex(
+            r'data-model="([^"]+)"', webpage, 'data model', default=None)
+        if model:
+            model_data = self._parse_json(model, display_id)
 
-        video = xpath_element(xml, './/AcVisionVideo').attrib
-        quality = qualities(['ld', 'md', 'hd'])
+            for video_url in model_data['sources'].values():
+                video_id, format_id = url_basename(video_url).split('_')[:2]
+                formats.append({
+                    'format_id': format_id,
+                    'quality': quality(format_id),
+                    'url': video_url,
+                })
 
-        formats = []
-        for k, v in video.items():
-            if re.match(r'.+_path', k):
-                format_id = k.split('_')[0]
+            title = model_data['title']
+        else:
+            video_id = display_id
+            media_data = self._download_json(
+                'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
+            for key, value in media_data['video'].items():
+                if not key.endswith('Path'):
+                    continue
+
+                format_id = key[:-len('Path')]
                 formats.append({
                     'format_id': format_id,
                     'quality': quality(format_id),
-                    'url': v,
+                    'url': value,
                 })
+
+            title = remove_end(self._html_search_regex(
+                r'(?s)<title>(.+?)</title>', webpage, 'title'
+            ).strip(), ' - AlloCiné')
+
         self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': video['videoTitle'],
+            'display_id': display_id,
+            'title': title,
             'thumbnail': self._og_search_thumbnail(webpage),
             'formats': formats,
             'description': self._og_search_description(webpage),
index c739d2c99167dbd27e44dee326fcb49222029187..d2b03b177c1fd46c88552d0355365d2fae7772c9 100644 (file)
@@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE):
             # m3u8 download
             'skip_download': True,
         },
+        'skip': 'Requires TV provider accounts',
     }, {
         'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
         'only_matching': True,
index e0c5c18045312a064d8663a025a9fdaabb7a28df..69a23e88c5b08738a3ce66cce47215fe58e4fcc0 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -410,6 +410,22 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
         return self._extract_from_json_url(json_url, video_id, lang)
 
 
+class TheOperaPlatformIE(ArteTVPlus7IE):
+    IE_NAME = 'theoperaplatform'
+    _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello',
+        'md5': '970655901fa2e82e04c00b955e9afe7b',
+        'info_dict': {
+            'id': '060338-009-A',
+            'ext': 'mp4',
+            'title': 'Verdi - OTELLO',
+            'upload_date': '20160927',
+        },
+    }]
+
+
 class ArteTVPlaylistIE(ArteTVBaseIE):
     IE_NAME = 'arte.tv:playlist'
     _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
diff --git a/youtube_dl/extractor/beatport.py b/youtube_dl/extractor/beatport.py
new file mode 100644 (file)
index 0000000..e607094
--- /dev/null
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class BeatportIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://beatport.com/track/synesthesia-original-mix/5379371',
+        'md5': 'b3c34d8639a2f6a7f734382358478887',
+        'info_dict': {
+            'id': '5379371',
+            'display_id': 'synesthesia-original-mix',
+            'ext': 'mp4',
+            'title': 'Froxic - Synesthesia (Original Mix)',
+        },
+    }, {
+        'url': 'https://beatport.com/track/love-and-war-original-mix/3756896',
+        'md5': 'e44c3025dfa38c6577fbaeb43da43514',
+        'info_dict': {
+            'id': '3756896',
+            'display_id': 'love-and-war-original-mix',
+            'ext': 'mp3',
+            'title': 'Wolfgang Gartner - Love & War (Original Mix)',
+        },
+    }, {
+        'url': 'https://beatport.com/track/birds-original-mix/4991738',
+        'md5': 'a1fd8e8046de3950fd039304c186c05f',
+        'info_dict': {
+            'id': '4991738',
+            'display_id': 'birds-original-mix',
+            'ext': 'mp4',
+            'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        track_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        playables = self._parse_json(
+            self._search_regex(
+                r'window\.Playables\s*=\s*({.+?});', webpage,
+                'playables info', flags=re.DOTALL),
+            track_id)
+
+        track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
+
+        title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
+        if track['mix']:
+            title += ' (' + track['mix'] + ')'
+
+        formats = []
+        for ext, info in track['preview'].items():
+            if not info['url']:
+                continue
+            fmt = {
+                'url': info['url'],
+                'ext': ext,
+                'format_id': ext,
+                'vcodec': 'none',
+            }
+            if ext == 'mp3':
+                fmt['preference'] = 0
+                fmt['acodec'] = 'mp3'
+                fmt['abr'] = 96
+                fmt['asr'] = 44100
+            elif ext == 'mp4':
+                fmt['preference'] = 1
+                fmt['acodec'] = 'aac'
+                fmt['abr'] = 96
+                fmt['asr'] = 44100
+            formats.append(fmt)
+        self._sort_formats(formats)
+
+        images = []
+        for name, info in track['images'].items():
+            image_url = info.get('url')
+            if name == 'dynamic' or not image_url:
+                continue
+            image = {
+                'id': name,
+                'url': image_url,
+                'height': int_or_none(info.get('height')),
+                'width': int_or_none(info.get('width')),
+            }
+            images.append(image)
+
+        return {
+            'id': compat_str(track.get('id')) or track_id,
+            'display_id': track.get('slug') or display_id,
+            'title': title,
+            'formats': formats,
+            'thumbnails': images,
+        }
diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py
deleted file mode 100644 (file)
index 3c7775d..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import int_or_none
-
-
-class BeatportProIE(InfoExtractor):
-    _VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
-    _TESTS = [{
-        'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
-        'md5': 'b3c34d8639a2f6a7f734382358478887',
-        'info_dict': {
-            'id': '5379371',
-            'display_id': 'synesthesia-original-mix',
-            'ext': 'mp4',
-            'title': 'Froxic - Synesthesia (Original Mix)',
-        },
-    }, {
-        'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
-        'md5': 'e44c3025dfa38c6577fbaeb43da43514',
-        'info_dict': {
-            'id': '3756896',
-            'display_id': 'love-and-war-original-mix',
-            'ext': 'mp3',
-            'title': 'Wolfgang Gartner - Love & War (Original Mix)',
-        },
-    }, {
-        'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
-        'md5': 'a1fd8e8046de3950fd039304c186c05f',
-        'info_dict': {
-            'id': '4991738',
-            'display_id': 'birds-original-mix',
-            'ext': 'mp4',
-            'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
-        }
-    }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        track_id = mobj.group('id')
-        display_id = mobj.group('display_id')
-
-        webpage = self._download_webpage(url, display_id)
-
-        playables = self._parse_json(
-            self._search_regex(
-                r'window\.Playables\s*=\s*({.+?});', webpage,
-                'playables info', flags=re.DOTALL),
-            track_id)
-
-        track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
-
-        title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
-        if track['mix']:
-            title += ' (' + track['mix'] + ')'
-
-        formats = []
-        for ext, info in track['preview'].items():
-            if not info['url']:
-                continue
-            fmt = {
-                'url': info['url'],
-                'ext': ext,
-                'format_id': ext,
-                'vcodec': 'none',
-            }
-            if ext == 'mp3':
-                fmt['preference'] = 0
-                fmt['acodec'] = 'mp3'
-                fmt['abr'] = 96
-                fmt['asr'] = 44100
-            elif ext == 'mp4':
-                fmt['preference'] = 1
-                fmt['acodec'] = 'aac'
-                fmt['abr'] = 96
-                fmt['asr'] = 44100
-            formats.append(fmt)
-        self._sort_formats(formats)
-
-        images = []
-        for name, info in track['images'].items():
-            image_url = info.get('url')
-            if name == 'dynamic' or not image_url:
-                continue
-            image = {
-                'id': name,
-                'url': image_url,
-                'height': int_or_none(info.get('height')),
-                'width': int_or_none(info.get('width')),
-            }
-            images.append(image)
-
-        return {
-            'id': compat_str(track.get('id')) or track_id,
-            'display_id': track.get('slug') or display_id,
-            'title': title,
-            'formats': formats,
-            'thumbnails': images,
-        }
index 2ec55b185509da92eeb39577b601d61fa304a887..945cf19e8bce0f1f9576d26abc455c9795a250d3 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 3aec601f8e7179570088e1ea5ad1f7b6d30f219d..4be175d7039dd845f7c961af552bc1153b73598e 100644 (file)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
@@ -8,15 +7,15 @@ from ..utils import ExtractorError
 
 
 class BYUtvIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
+    _TESTS = [{
         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
-        'md5': '05850eb8c749e2ee05ad5a1c34668493',
         'info_dict': {
-            'id': 'studio-c-season-5-episode-5',
+            'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+            'display_id': 'studio-c-season-5-episode-5',
             'ext': 'mp4',
-            'description': 'md5:e07269172baff037f8e8bf9956bc9747',
             'title': 'Season 5 Episode 5',
+            'description': 'md5:e07269172baff037f8e8bf9956bc9747',
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 1486.486,
         },
@@ -24,28 +23,71 @@ class BYUtvIE(InfoExtractor):
             'skip_download': True,
         },
         'add_ie': ['Ooyala'],
-    }
+    }, {
+        'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('video_id')
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id') or video_id
 
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)
         episode_code = self._search_regex(
             r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
-        episode_json = re.sub(
-            r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
-        ep = json.loads(episode_json)
-
-        if ep['providerType'] == 'Ooyala':
-            return {
-                '_type': 'url_transparent',
-                'ie_key': 'Ooyala',
-                'url': 'ooyala:%s' % ep['providerId'],
-                'id': video_id,
-                'title': ep['title'],
-                'description': ep.get('description'),
-                'thumbnail': ep.get('imageThumbnail'),
-            }
-        else:
+
+        ep = self._parse_json(
+            episode_code, display_id, transform_source=lambda s:
+            re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
+
+        if ep['providerType'] != 'Ooyala':
             raise ExtractorError('Unsupported provider %s' % ep['provider'])
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'Ooyala',
+            'url': 'ooyala:%s' % ep['providerId'],
+            'id': video_id,
+            'display_id': display_id,
+            'title': ep['title'],
+            'description': ep.get('description'),
+            'thumbnail': ep.get('imageThumbnail'),
+        }
+
+
+class BYUtvEventIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
+    _TEST = {
+        'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
+        'info_dict': {
+            'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
+            'ext': 'mp4',
+            'title': 'Toledo vs. BYU (9/30/16)',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': ['Ooyala'],
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        ooyala_id = self._search_regex(
+            r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+            webpage, 'ooyala id', group='id')
+
+        title = self._search_regex(
+            r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
+            'title').strip()
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'Ooyala',
+            'url': 'ooyala:%s' % ooyala_id,
+            'id': video_id,
+            'title': title,
+        }
index 69e8f4f5704602972cecd0d3ab48d8aa382a8920..1c3c41d26619ec2fa347c4a75093b2a1cf7003a2 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -6,11 +6,13 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
+    dict_get,
     ExtractorError,
     HEADRequest,
-    unified_strdate,
-    qualities,
     int_or_none,
+    qualities,
+    remove_end,
+    unified_strdate,
 )
 
 
@@ -43,47 +45,46 @@ class CanalplusIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
-        'md5': '41f438a4904f7664b91b4ed0dec969dc',
         'info_dict': {
-            'id': '1192814',
+            'id': '1405510',
+            'display_id': 'pid1830-c-zapping',
             'ext': 'mp4',
-            'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
-            'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
-            'upload_date': '20150105',
+            'title': 'Zapping - 02/07/2016',
+            'description': 'Le meilleur de toutes les chaînes, tous les jours',
+            'upload_date': '20160702',
         },
     }, {
         'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
         'info_dict': {
             'id': '1108190',
-            'ext': 'flv',
-            'title': 'Le labyrinthe - Boing super ranger',
+            'display_id': 'pid1405-le-labyrinthe-boing-super-ranger',
+            'ext': 'mp4',
+            'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',
             'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
             'upload_date': '20140724',
         },
         'skip': 'Only works from France',
     }, {
-        'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
+        'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
+        'md5': '4b47b12b4ee43002626b97fad8fb1de5',
         'info_dict': {
-            'id': '1390231',
+            'id': '1420213',
+            'display_id': 'pid6318-videos-integrales',
             'ext': 'mp4',
-            'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
-            'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
-            'upload_date': '20160512',
-        },
-        'params': {
-            'skip_download': True,
+            'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
+            'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
+            'upload_date': '20161014',
         },
+        'skip': 'Only works from France',
     }, {
-        'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
+        'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
         'info_dict': {
-            'id': '1398334',
+            'id': '1420176',
+            'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
             'ext': 'mp4',
-            'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
-            'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
-            'upload_date': '20160607',
-        },
-        'params': {
-            'skip_download': True,
+            'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
+            'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
+            'upload_date': '20161014',
         },
     }, {
         'url': 'http://m.canalplus.fr/?vid=1398231',
@@ -95,18 +96,17 @@ class CanalplusIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
 
         site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
 
         # Beware, some subclasses do not define an id group
-        display_id = mobj.group('display_id') or video_id
+        display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
 
-        if video_id is None:
-            webpage = self._download_webpage(url, display_id)
-            video_id = self._search_regex(
-                [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', r'id=["\']canal_video_player(?P<id>\d+)'],
-                webpage, 'video id', group='id')
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
+             r'id=["\']canal_video_player(?P<id>\d+)'],
+            webpage, 'video id', group='id')
 
         info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
         video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
index 5797fb95142f556bade163051f4f51a40e32cda7..66c0f900a402664653a846e9b39fc44c1da2853e 100644 (file)
@@ -9,6 +9,8 @@ from ..utils import (
     try_get,
 )
 
+from .videomore import VideomoreIE
+
 
 class CarambaTVIE(InfoExtractor):
     _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
@@ -62,14 +64,16 @@ class CarambaTVPageIE(InfoExtractor):
     _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
-        'md5': '',
+        'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
         'info_dict': {
-            'id': '191910501',
-            'ext': 'mp4',
+            'id': '475222',
+            'ext': 'flv',
             'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'duration': 2678.31,
+            'thumbnail': 're:^https?://.*\.jpg',
+            # duration reported by videomore is incorrect
+            'duration': int,
         },
+        'add_ie': [VideomoreIE.ie_key()],
     }
 
     def _real_extract(self, url):
@@ -77,6 +81,16 @@ class CarambaTVPageIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        videomore_url = VideomoreIE._extract_url(webpage)
+        if videomore_url:
+            title = self._og_search_title(webpage)
+            return {
+                '_type': 'url_transparent',
+                'url': videomore_url,
+                'ie_key': VideomoreIE.ie_key(),
+                'title': title,
+            }
+
         video_url = self._og_search_property('video:iframe', webpage, default=None)
 
         if not video_url:
index 821db20b23052ca71d594c6c05ad705a400129a3..57b18e81d412b20162f60e8d8e44699b76f2e3af 100644 (file)
@@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE):
         webpage = self._download_webpage(url, display_id)
 
         data_json = self._html_search_regex(
-            r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'",
+            r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
             webpage, 'data json')
         data = self._parse_json(data_json, display_id)
         vdata = data.get('video') or data['videos'][0]
index 216989230c3a20046045c2c0b658af23385c464b..91b0f5fa94c7ba919e01fd097cbdfc71fe6992b4 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 87c2e7089f4567bc30ec0c54e43fe7f25c607cc0..4ec79d19dd9db6402752ee65d462631985009cbf 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 61aed016753b28ceac34974c76aca0f9e1639f35..f35df143a604695c0b1fe7b0e33d7384192d1d98 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import base64
+import re
 
 from .common import InfoExtractor
 from ..utils import parse_duration
@@ -70,7 +71,6 @@ class ChirbitProfileIE(InfoExtractor):
         'url': 'http://chirbit.com/ScarletBeauty',
         'info_dict': {
             'id': 'ScarletBeauty',
-            'title': 'Chirbits by ScarletBeauty',
         },
         'playlist_mincount': 3,
     }
@@ -78,13 +78,10 @@ class ChirbitProfileIE(InfoExtractor):
     def _real_extract(self, url):
         profile_id = self._match_id(url)
 
-        rss = self._download_xml(
-            'http://chirbit.com/rss/%s' % profile_id, profile_id)
+        webpage = self._download_webpage(url, profile_id)
 
         entries = [
-            self.url_result(audio_url.text, 'Chirbit')
-            for audio_url in rss.findall('./channel/item/link')]
+            self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
+            for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
 
-        title = rss.find('./channel/title').text
-
-        return self.playlist_result(entries, profile_id, title)
+        return self.playlist_result(entries, profile_id)
index 3a47f6fa4e1cdf734670ff64abb9aa4c02c94a6e..bb52e0c6ff75178626f83cd0a6d2de6607e861ad 100644 (file)
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
@@ -10,15 +11,15 @@ from ..utils import (
 class ClipfishIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
     _TEST = {
-        'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
-        'md5': '79bc922f3e8a9097b3d68a93780fd475',
+        'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
+        'md5': '720563e467b86374c194bdead08d207d',
         'info_dict': {
-            'id': '3966754',
+            'id': '4343170',
             'ext': 'mp4',
-            'title': 'FIFA 14 - E3 2013 Trailer',
-            'description': 'Video zu FIFA 14: E3 2013 Trailer',
-            'upload_date': '20130611',
-            'duration': 82,
+            'title': 'S01 E01 - Ugly Americans - Date in der Hölle',
+            'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.',
+            'upload_date': '20161005',
+            'duration': 1291,
             'view_count': int,
         }
     }
@@ -50,10 +51,14 @@ class ClipfishIE(InfoExtractor):
                 'tbr': int_or_none(video_info.get('bitrate')),
             })
 
+        descr = video_info.get('descr')
+        if descr:
+            descr = descr.strip()
+
         return {
             'id': video_id,
             'title': video_info['title'],
-            'description': video_info.get('descr'),
+            'description': descr,
             'formats': formats,
             'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
             'duration': int_or_none(video_info.get('media_length')),
index 2fba93543474cd7ebd53848aca62848c32bf7164..f7ee3a8f8ebe4715b2d2a5f4634bc50836cc33f7 100644 (file)
@@ -1,9 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     clean_html,
@@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
         player_page = self._download_webpage(player_url, video_id)
 
-        config_json = self._search_regex(
+        config = self._parse_json(self._search_regex(
             r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
-            'configuration')
-        config = json.loads(config_json)
+            'configuration'), video_id)
 
         video_info = config['videoInfo']
         sources = config['sources']
index ac3bdfe8f2fcaf2344d02c00f53dcfc9cb12af32..7d3e9b0c9ce89fff9b8094f2d86beaa5fb35e7e0 100644 (file)
@@ -26,7 +26,7 @@ class CMTIE(MTVIE):
             'id': '1504699',
             'ext': 'mp4',
             'title': 'Still The King Ep. 109 in 3 Minutes',
-            'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
+            'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.',
             'timestamp': 1469421000.0,
             'upload_date': '20160725',
         },
@@ -42,3 +42,8 @@ class CMTIE(MTVIE):
                 '%s said: video is not available' % cls.IE_NAME, expected=True)
 
         return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
+
+    def _extract_mgid(self, webpage):
+        return self._search_regex(
+            r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
+            webpage, 'mgid', group='mgid')
index 747c245c844171958637213b37daec3dd03f3a7e..588aad0d911038229a4a3a97e5c74284f7bafc56 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 1076b46da773b5c90cf0c898202f9a8fc5279dbf..415dc84c82d537fad9493faf38d5348143a7e521 100644 (file)
@@ -21,6 +21,7 @@ from ..compat import (
     compat_os_name,
     compat_str,
     compat_urllib_error,
+    compat_urllib_parse_unquote,
     compat_urllib_parse_urlencode,
     compat_urllib_request,
     compat_urlparse,
@@ -234,7 +235,7 @@ class InfoExtractor(object):
     chapter_id:     Id of the chapter the video belongs to, as a unicode string.
 
     The following fields should only be used when the video is an episode of some
-    series or programme:
+    series, programme or podcast:
 
     series:         Title of the series or programme the video episode belongs to.
     season:         Title of the season the video episode belongs to.
@@ -1099,6 +1100,13 @@ class InfoExtractor(object):
             manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
             'bootstrap info', default=None)
 
+        vcodec = None
+        mime_type = xpath_text(
+            manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
+            'base URL', default=None)
+        if mime_type and mime_type.startswith('audio/'):
+            vcodec = 'none'
+
         for i, media_el in enumerate(media_nodes):
             tbr = int_or_none(media_el.attrib.get('bitrate'))
             width = int_or_none(media_el.attrib.get('width'))
@@ -1139,6 +1147,7 @@ class InfoExtractor(object):
                             'width': f.get('width') or width,
                             'height': f.get('height') or height,
                             'format_id': f.get('format_id') if not tbr else format_id,
+                            'vcodec': vcodec,
                         })
                     formats.extend(f4m_formats)
                     continue
@@ -1155,6 +1164,7 @@ class InfoExtractor(object):
                 'tbr': tbr,
                 'width': width,
                 'height': height,
+                'vcodec': vcodec,
                 'preference': preference,
             })
         return formats
@@ -1801,7 +1811,11 @@ class InfoExtractor(object):
             return is_plain_url, formats
 
         entries = []
-        for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
+        media_tags = [(media_tag, media_type, '')
+                      for media_tag, media_type
+                      in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+        media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+        for media_tag, media_type, media_content in media_tags:
             media_info = {
                 'formats': [],
                 'subtitles': {},
@@ -1870,11 +1884,11 @@ class InfoExtractor(object):
             formats.extend(self._extract_f4m_formats(
                 http_base_url + '/manifest.f4m',
                 video_id, f4m_id='hds', fatal=False))
+        if 'dash' not in skip_protocols:
+            formats.extend(self._extract_mpd_formats(
+                http_base_url + '/manifest.mpd',
+                video_id, mpd_id='dash', fatal=False))
         if re.search(r'(?:/smil:|\.smil)', url_base):
-            if 'dash' not in skip_protocols:
-                formats.extend(self._extract_mpd_formats(
-                    http_base_url + '/manifest.mpd',
-                    video_id, mpd_id='dash', fatal=False))
             if 'smil' not in skip_protocols:
                 rtmp_formats = self._extract_smil_formats(
                     http_base_url + '/jwplayer.smil',
@@ -2020,6 +2034,12 @@ class InfoExtractor(object):
             headers['Ytdl-request-proxy'] = geo_verification_proxy
         return headers
 
+    def _generic_id(self, url):
+        return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+
+    def _generic_title(self, url):
+        return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
index 5d130a170ed79454e05087cae24ab0d132448b32..d98331a4e400b23389bee55e4b648b31d464b8d6 100644 (file)
@@ -1,13 +1,9 @@
 from __future__ import unicode_literals
 
-import os
-
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse_unquote,
     compat_urlparse,
 )
-from ..utils import url_basename
 
 
 class RtmpIE(InfoExtractor):
@@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
-        title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+        video_id = self._generic_id(url)
+        title = self._generic_title(url)
         return {
             'id': video_id,
             'title': title,
@@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor):
                 'format_id': compat_urlparse.urlparse(url).scheme,
             }],
         }
+
+
+class MmsIE(InfoExtractor):
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'(?i)mms://.+'
+
+    _TEST = {
+        # Direct MMS link
+        'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
+        'info_dict': {
+            'id': 'MilesReid(0709)',
+            'ext': 'wmv',
+            'title': 'MilesReid(0709)',
+        },
+        'params': {
+            'skip_download': True,  # rtsp downloads, requiring mplayer or mpv
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._generic_id(url)
+        title = self._generic_title(url)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': url,
+        }
index ad32673a812f8bc529993c25feba87ffcf21b3e1..cf6a5d6cbe906443b1db592616cd89926860bbdd 100644 (file)
@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Le Samouraï',
             'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
+            'thumbnail': 're:^https?://.*\.jpg$',
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         final_url = self._search_regex(
-            r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
+            r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
         title = self._og_search_title(webpage)
         description = self._html_search_meta('description', webpage)
         thumbnail = self._search_regex(
-            r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
+            r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
             webpage, 'thumbnail url')
 
         return {
index e4c10ad24de1f5aa31b73baaf76a72d18dc1aae0..cc141f68ec52f4d3b7f795a099b5b1ccf310fdbb 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -150,6 +150,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # rtmp
             'skip_download': True,
         },
+        'skip': 'Video gone',
     }, {
         'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
         'info_dict': {
index 62b0747a5c5a73f7628a3e500cbffada661f3212..4a3314ea7d4fc2df95543cda554d32a8caf586ac 100644 (file)
@@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
                 'uploader': 'HotWaves1012',
                 'age_limit': 18,
-            }
+            },
+            'skip': 'video gone',
         },
         # geo-restricted, player v5
         {
@@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
         player_v5 = self._search_regex(
             [r'buildPlayer\(({.+?})\);\n',  # See https://github.com/rg3/youtube-dl/issues/7826
              r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
-             r'buildPlayer\(({.+?})\);'],
+             r'buildPlayer\(({.+?})\);',
+             r'var\s+config\s*=\s*({.+?});'],
             webpage, 'player v5', default=None)
         if player_v5:
             player = self._parse_json(player_v5, video_id)
index b5c310ccb8042c7bfa44c6a909ead398fc679dd4..732b4362a96488e67f4b1858f83429a85e877555 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 
 from __future__ import unicode_literals
 
index a47e0499346b978aeb9172c8353c18c94ecf867c..14ba88715887caeb9144e68384417b2e7b518b07 100644 (file)
@@ -1,61 +1,54 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..utils import unified_strdate
 
 
 class DctpTvIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
     _TEST = {
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+        'md5': '174dd4a8a6225cf5655952f969cfbe24',
         'info_dict': {
-            'id': '1324',
+            'id': '95eaa4f33dad413aa17b4ee613cccc6c',
             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
-            'ext': 'flv',
-            'title': 'Videoinstallation für eine Kaufhausfassade'
+            'ext': 'mp4',
+            'title': 'Videoinstallation für eine Kaufhausfassade',
+            'description': 'Kurzfilm',
+            'upload_date': '20110407',
+            'thumbnail': 're:^https?://.*\.jpg$',
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
-        version_json = self._download_json(
-            base_url + 'version.json',
-            video_id, note='Determining file version')
-        version = version_json['version_name']
-        info_json = self._download_json(
-            '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
-            video_id, note='Fetching object ID')
-        object_id = compat_str(info_json['object_id'])
-        meta_json = self._download_json(
-            '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
-            video_id, note='Downloading metadata')
-        uuid = meta_json['uuid']
-        title = meta_json['title']
-        wide = meta_json['is_wide']
-        if wide:
-            ratio = '16x9'
-        else:
-            ratio = '4x3'
-        play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
+        webpage = self._download_webpage(url, video_id)
+
+        object_id = self._html_search_meta('DC.identifier', webpage)
 
         servers_json = self._download_json(
-            'http://www.dctp.tv/streaming_servers/',
+            'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
             video_id, note='Downloading server list')
-        url = servers_json[0]['endpoint']
+        server = servers_json[0]['server']
+        m3u8_path = self._search_regex(
+            r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
+        formats = self._extract_m3u8_formats(
+            'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
+            entry_protocol='m3u8_native')
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_meta('DC.description', webpage)
+        upload_date = unified_strdate(
+            self._html_search_meta('DC.date.created', webpage))
+        thumbnail = self._og_search_thumbnail(webpage)
 
         return {
             'id': object_id,
             'title': title,
-            'format': 'rtmp',
-            'url': url,
-            'play_path': play_path,
-            'rtmp_real_time': True,
-            'ext': 'flv',
-            'display_id': video_id
+            'formats': formats,
+            'display_id': video_id,
+            'description': description,
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
         }
index 3b6529f4b108052e3019c8e400bbc2cd0eb5a9a1..c115956121a242920ec8016e8c9f3558c34060c6 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import itertools
index 713cb7b329208d3c761b12858cc265b401c16dd0..ee5ead18b0834b7c2e27258b4fc6950fa93ad960 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 1cdb11e34804186e05cdca81d978ab944d49b4db..a5820b21e05a721fd654ff8c1d1313eb80239a73 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 09b3b49420307f3d9a2febf6be97bcff74a5af3e..a693f8c565609327e09281a8116d76fb2dff1033 100644 (file)
@@ -66,6 +66,7 @@ from .arte import (
     ArteTVDDCIE,
     ArteTVMagazineIE,
     ArteTVEmbedIE,
+    TheOperaPlatformIE,
     ArteTVPlaylistIE,
 )
 from .atresplayer import AtresPlayerIE
@@ -93,7 +94,7 @@ from .bbc import (
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
 from .bellmedia import BellMediaIE
-from .beatportpro import BeatportProIE
+from .beatport import BeatportIE
 from .bet import BetIE
 from .bigflix import BigflixIE
 from .bild import BildIE
@@ -116,7 +117,10 @@ from .brightcove import (
     BrightcoveNewIE,
 )
 from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
+from .byutv import (
+    BYUtvIE,
+    BYUtvEventIE,
+)
 from .c56 import C56IE
 from .camdemy import (
     CamdemyIE,
@@ -183,7 +187,10 @@ from .comedycentral import (
 )
 from .comcarcoff import ComCarCoffIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .commonprotocols import RtmpIE
+from .commonprotocols import (
+    MmsIE,
+    RtmpIE,
+)
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .crackle import CrackleIE
@@ -342,7 +349,10 @@ from .goshgay import GoshgayIE
 from .gputechconf import GPUTechConfIE
 from .groupon import GrouponIE
 from .hark import HarkIE
-from .hbo import HBOIE
+from .hbo import (
+    HBOIE,
+    HBOEpisodeIE,
+)
 from .hearthisat import HearThisAtIE
 from .heise import HeiseIE
 from .hellporno import HellPornoIE
@@ -363,6 +373,7 @@ from .hrti import (
     HRTiIE,
     HRTiPlaylistIE,
 )
+from .huajiao import HuajiaoIE
 from .huffpost import HuffPostIE
 from .hypem import HypemIE
 from .iconosquare import IconosquareIE
@@ -434,6 +445,7 @@ from .lcp import (
 )
 from .learnr import LearnrIE
 from .lecture2go import Lecture2GoIE
+from .lego import LEGOIE
 from .lemonde import LemondeIE
 from .leeco import (
     LeIE,
@@ -634,6 +646,7 @@ from .nytimes import (
     NYTimesArticleIE,
 )
 from .nuvid import NuvidIE
+from .nzz import NZZIE
 from .odatv import OdaTVIE
 from .odnoklassniki import OdnoklassnikiIE
 from .oktoberfesttv import OktoberfestTVIE
@@ -887,8 +900,10 @@ from .theplatform import (
 from .thescene import TheSceneIE
 from .thesixtyone import TheSixtyOneIE
 from .thestar import TheStarIE
+from .theweatherchannel import TheWeatherChannelIE
 from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
+from .thisoldhouse import ThisOldHouseIE
 from .threeqsdn import ThreeQSDNIE
 from .tinypic import TinyPicIE
 from .tlc import TlcDeIE
@@ -903,6 +918,7 @@ from .tnaflix import (
     MovieFapIE,
 )
 from .toggle import ToggleIE
+from .tonline import TOnlineIE
 from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
index 3a220e9959f034224b42d199666b0523598bd8d9..801573459d21118d08852b1c9745f34d65711d7d 100644 (file)
@@ -258,7 +258,7 @@ class FacebookIE(InfoExtractor):
 
         if not video_data:
             server_js_data = self._parse_json(self._search_regex(
-                r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
+                r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
             for item in server_js_data.get('instances', []):
                 if item[1][0] == 'VideoConfig':
                     video_data = video_data_list2dict(item[2][0]['videoData'])
index fd535457dc56a589eaf9e062dc40fe5374735020..4bc8fc5127010e1b3ced207da04f8926716cc94d 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 332d12020d2c58118b5724b7ad84de7ec37a49c6..6b662cc3cd78e4acf661af473f2374b5ec2af05c 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 75399fa7d2a3164c67f2d72c24628a861ed77806..b3df93f28fc6471b1c5fe7303415c223042261bc 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index d2503ae2eff3d2e46497bbcba356af11db665452..118325b6d5cd6f29645f94c0d5cc6c719715e400 100644 (file)
@@ -2,25 +2,27 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from .streamable import StreamableIE
 
 
 class FootyRoomIE(InfoExtractor):
-    _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://footyroom\.com/matches/(?P<id>\d+)'
     _TESTS = [{
-        'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
+        'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review',
         'info_dict': {
-            'id': 'schalke-04-0-2-real-madrid-2015-02',
-            'title': 'Schalke 04 0 – 2 Real Madrid',
+            'id': '79922154',
+            'title': 'VIDEO Hull City 0 - 2 Chelsea',
         },
-        'playlist_count': 3,
-        'skip': 'Video for this match is not available',
+        'playlist_count': 2,
+        'add_ie': [StreamableIE.ie_key()],
     }, {
-        'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
+        'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
         'info_dict': {
-            'id': 'georgia-0-2-germany-2015-03',
-            'title': 'Georgia 0 – 2 Germany',
+            'id': '75817984',
+            'title': 'VIDEO Georgia 0 - 2 Germany',
         },
         'playlist_count': 1,
+        'add_ie': ['Playwire']
     }]
 
     def _real_extract(self, url):
@@ -28,9 +30,8 @@ class FootyRoomIE(InfoExtractor):
 
         webpage = self._download_webpage(url, playlist_id)
 
-        playlist = self._parse_json(
-            self._search_regex(
-                r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
+        playlist = self._parse_json(self._search_regex(
+            r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'),
             playlist_id)
 
         playlist_title = self._og_search_title(webpage)
@@ -40,11 +41,16 @@ class FootyRoomIE(InfoExtractor):
             payload = video.get('payload')
             if not payload:
                 continue
-            playwire_url = self._search_regex(
+            playwire_url = self._html_search_regex(
                 r'data-config="([^"]+)"', payload,
                 'playwire url', default=None)
             if playwire_url:
                 entries.append(self.url_result(self._proto_relative_url(
                     playwire_url, 'http:'), 'Playwire'))
 
+            streamable_url = StreamableIE._extract_url(payload)
+            if streamable_url:
+                entries.append(self.url_result(
+                    streamable_url, StreamableIE.ie_key()))
+
         return self.playlist_result(entries, playlist_id, playlist_title)
index 3233f66d5fe2efd8381b125948e4eed3d0e446ce..e7068d1aed9573199211a29a91486bd72e9aecd0 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 
 from __future__ import unicode_literals
 
index c1792c5348f3e4aea120bc873eb7670b15fccf11..7b8a9cf9a9baa1c1441b686342f9c481b0eca64d 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 
 from __future__ import unicode_literals
 
@@ -27,7 +27,6 @@ from ..utils import (
     unified_strdate,
     unsmuggle_url,
     UnsupportedError,
-    url_basename,
     xpath_text,
 )
 from .brightcove import (
@@ -1549,7 +1548,7 @@ class GenericIE(InfoExtractor):
             force_videoid = smuggled_data['force_videoid']
             video_id = force_videoid
         else:
-            video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+            video_id = self._generic_id(url)
 
         self.to_screen('%s: Requesting header' % video_id)
 
@@ -1578,7 +1577,7 @@ class GenericIE(InfoExtractor):
 
         info_dict = {
             'id': video_id,
-            'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+            'title': self._generic_title(url),
             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
         }
 
@@ -1754,9 +1753,9 @@ class GenericIE(InfoExtractor):
         if matches:
             return _playlist_from_matches(matches, ie='RtlNl')
 
-        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
-        if vimeo_url is not None:
-            return self.url_result(vimeo_url)
+        vimeo_urls = VimeoIE._extract_urls(url, webpage)
+        if vimeo_urls:
+            return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
 
         vid_me_embed_url = self._search_regex(
             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
@@ -2332,12 +2331,23 @@ class GenericIE(InfoExtractor):
             info_dict.update(json_ld)
             return info_dict
 
+        # Look for HTML5 media
+        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+        if entries:
+            for entry in entries:
+                entry.update({
+                    'id': video_id,
+                    'title': video_title,
+                })
+                self._sort_formats(entry['formats'])
+            return self.playlist_result(entries)
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
             vpath = compat_urlparse.urlparse(vurl).path
             vext = determine_ext(vpath)
-            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
+            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
 
         def filter_video(urls):
             return list(filter(check_video, urls))
@@ -2387,9 +2397,6 @@ class GenericIE(InfoExtractor):
             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
             if m_video_type is not None:
                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
-        if not found:
-            # HTML5 video
-            found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
         if not found:
             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
             found = re.search(
index a43abd154e8a73c6d8494880afdef1796ac94fe7..74e1720ee325da8fb4c011eddec342fe2de62d9b 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 749e9154f86358693cbac648d299ed1df51d7fb4..342a6130ea10325d4b7e7ecee7ee86b130e90173 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index dad0f3994c93cd0a38a2be52741d7c55ce0e6749..cbf774377b7261c326bd71f5db2d5de8216be5f4 100644 (file)
@@ -12,17 +12,7 @@ from ..utils import (
 )
 
 
-class HBOIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
-        'md5': '1c33253f0c7782142c993c0ba62a8753',
-        'info_dict': {
-            'id': '1437839',
-            'ext': 'mp4',
-            'title': 'Ep. 64 Clip: Encryption',
-        }
-    }
+class HBOBaseIE(InfoExtractor):
     _FORMATS_INFO = {
         '1920': {
             'width': 1280,
@@ -50,8 +40,7 @@ class HBOIE(InfoExtractor):
         },
     }
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
+    def _extract_from_id(self, video_id):
         video_data = self._download_xml(
             'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
         title = xpath_text(video_data, 'title', 'title', True)
@@ -116,7 +105,60 @@ class HBOIE(InfoExtractor):
         return {
             'id': video_id,
             'title': title,
-            'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
+            'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
             'formats': formats,
             'thumbnails': thumbnails,
         }
+
+
+class HBOIE(HBOBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
+        'md5': '1c33253f0c7782142c993c0ba62a8753',
+        'info_dict': {
+            'id': '1437839',
+            'ext': 'mp4',
+            'title': 'Ep. 64 Clip: Encryption',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'duration': 1072,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self._extract_from_id(video_id)
+
+
+class HBOEpisodeIE(HBOBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
+
+    _TESTS = [{
+        'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
+        'md5': '689132b253cc0ab7434237fc3a293210',
+        'info_dict': {
+            'id': '1439518',
+            'display_id': 'ep-52-inside-the-episode',
+            'ext': 'mp4',
+            'title': 'Ep. 52: Inside the Episode',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'duration': 240,
+        },
+    }, {
+        'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
+            webpage, 'video ID', group='video_id')
+
+        info_dict = self._extract_from_id(video_id)
+        info_dict['display_id'] = display_id
+
+        return info_dict
index 93107b3064ebfba513b3aa208556b5822f6cf979..575fb332a055465446fc5db9448313ec793d3258 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import unicode_literals
 
diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dl/extractor/huajiao.py
new file mode 100644 (file)
index 0000000..cec0df0
--- /dev/null
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class HuajiaoIE(InfoExtractor):
+    IE_DESC = '花椒直播'
+    _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.huajiao.com/l/38941232',
+        'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
+        'info_dict': {
+            'id': '38941232',
+            'ext': 'mp4',
+            'title': '#新人求关注#',
+            'description': 're:.*',
+            'duration': 2424.0,
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1475866459,
+            'upload_date': '20161007',
+            'uploader': 'Penny_余姿昀',
+            'uploader_id': '75206005',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        feed_json = self._search_regex(
+            r'var\s+feed\s*=\s*({.+})', webpage, 'feed json')
+        feed = self._parse_json(feed_json, video_id)
+
+        description = self._html_search_meta(
+            'description', webpage, 'description', fatal=False)
+
+        def get(section, field):
+            return feed.get(section, {}).get(field)
+
+        return {
+            'id': video_id,
+            'title': feed['feed']['formated_title'],
+            'description': description,
+            'duration': parse_duration(get('feed', 'duration')),
+            'thumbnail': get('feed', 'image'),
+            'timestamp': parse_iso8601(feed.get('creatime'), ' '),
+            'uploader': get('author', 'nickname'),
+            'uploader_id': get('author', 'uid'),
+            'formats': self._extract_m3u8_formats(
+                feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
+        }
index 65712abc28c3cc68cab7052ab709b2c1e6500cb5..9544ff9d469c52a932cc3a8fed0dafbed9f4ae83 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 788bbe0d5c44177b5a943da9f9c3c3adf46a77b1..da2cdc656ac90f15a575eceabf33309b084c8f28 100644 (file)
@@ -81,6 +81,9 @@ class IPrimaIE(InfoExtractor):
             for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
                 extract_formats(src)
 
+        if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
+            self.raise_geo_restricted()
+
         self._sort_formats(formats)
 
         return {
index 122e2dd8cad8c9fba6d861a80d77752e1b508301..4b5f346d1ef909e286b5c0555ab07a6e20bc11d4 100644 (file)
@@ -1,4 +1,4 @@
-# coding=utf-8
+# codingutf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 38199fcd0b5c46ba4e47b8fc8e448b3471dd17ea..5d56e0a28bd55b93153a92446834ba440ad59572 100644 (file)
@@ -32,13 +32,20 @@ class JWPlatformBaseIE(InfoExtractor):
         return self._parse_jwplayer_data(
             jwplayer_data, video_id, *args, **kwargs)
 
-    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
+    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
         # JWPlayer backward compatibility: flattened playlists
         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
         if 'playlist' not in jwplayer_data:
             jwplayer_data = {'playlist': [jwplayer_data]}
 
         entries = []
+
+        # JWPlayer backward compatibility: single playlist item
+        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+        if not isinstance(jwplayer_data['playlist'], list):
+            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
         for video_data in jwplayer_data['playlist']:
             # JWPlayer backward compatibility: flattened sources
             # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
@@ -57,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor):
                 if source_type == 'hls' or ext == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
                         source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
                 # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
                 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
                     formats.append({
index fbe4994970a19b33f3f37cdab1e6c56930a69bb6..d4da8f48462f61358c649537cb1a41d47d9e82b1 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 704bd7b34554af60dfec9b811251f5270cbd1f55..1fda451075e4e0638e0799fc2bb976f21a4bcf8e 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 0ae8ebd687034343c364dbc968d90d84f5bc37df..cf8876fa1f2321e7b020e2e773452f82df1bd2f1 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import json
diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py
new file mode 100644 (file)
index 0000000..d3bca64
--- /dev/null
@@ -0,0 +1,128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    unescapeHTML,
+    parse_duration,
+    get_element_by_class,
+)
+
+
+class LEGOIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)'
+    _TESTS = [{
+        'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
+        'md5': 'f34468f176cfd76488767fc162c405fa',
+        'info_dict': {
+            'id': '55492d823b1b4d5e985787fa8c2973b1',
+            'ext': 'mp4',
+            'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+            'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+        },
+    }, {
+        # geo-restricted but the contentUrl contain a valid url
+        'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
+        'md5': '4c3fec48a12e40c6e5995abc3d36cc2e',
+        'info_dict': {
+            'id': '13bdc2299ab24d9685701a915b3d71e7',
+            'ext': 'mp4',
+            'title': 'Aflevering 20 - Helden van het koninkrijk',
+            'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
+        },
+    }, {
+        # special characters in title
+        'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d',
+        'info_dict': {
+            'id': '9685ee9d12e84ff38e84b4e3d0db533d',
+            'ext': 'mp4',
+            'title': 'Force Surprise – LEGO® Star Wars™ Microfighters',
+            'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+    _BITRATES = [256, 512, 1024, 1536, 2560]
+
+    def _real_extract(self, url):
+        locale, video_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, video_id)
+        title = get_element_by_class('video-header', webpage).strip()
+        progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/'
+        streaming_base = 'http://legoprod-f.akamaihd.net/'
+        content_url = self._html_search_meta('contentUrl', webpage)
+        path = self._search_regex(
+            r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)',
+            content_url, 'video path', default=None)
+        if not path:
+            player_url = self._proto_relative_url(self._search_regex(
+                r'<iframe[^>]+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)',
+                webpage, 'player url', default=None))
+            if not player_url:
+                base_url = self._proto_relative_url(self._search_regex(
+                    r'data-baseurl="([^"]+)"', webpage, 'base url',
+                    default='http://www.lego.com/%s/mediaplayer/video/' % locale))
+                player_url = base_url + video_id
+            player_webpage = self._download_webpage(player_url, video_id)
+            video_data = self._parse_json(unescapeHTML(self._search_regex(
+                r"video='([^']+)'", player_webpage, 'video data')), video_id)
+            progressive_base = self._search_regex(
+                r'data-video-progressive-url="([^"]+)"',
+                player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/')
+            streaming_base = self._search_regex(
+                r'data-video-streaming-url="([^"]+)"',
+                player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/')
+            item_id = video_data['ItemId']
+
+            net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]])
+            base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])])
+            path = '/'.join([net_storage_path, base_path])
+        streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES))
+
+        formats = self._extract_akamai_formats(
+            '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
+        m3u8_formats = list(filter(
+            lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            formats))
+        if len(m3u8_formats) == len(self._BITRATES):
+            self._sort_formats(m3u8_formats)
+            for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats):
+                progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate)
+                mp4_f = m3u8_format.copy()
+                mp4_f.update({
+                    'url': progressive_base_url + 'mp4',
+                    'format_id': m3u8_format['format_id'].replace('hls', 'mp4'),
+                    'protocol': 'http',
+                })
+                web_f = {
+                    'url': progressive_base_url + 'webm',
+                    'format_id': m3u8_format['format_id'].replace('hls', 'webm'),
+                    'width': m3u8_format['width'],
+                    'height': m3u8_format['height'],
+                    'tbr': m3u8_format.get('tbr'),
+                    'ext': 'webm',
+                }
+                formats.extend([web_f, mp4_f])
+        else:
+            for bitrate in self._BITRATES:
+                for ext in ('web', 'mp4'):
+                    formats.append({
+                        'format_id': '%s-%s' % (ext, bitrate),
+                        'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext),
+                        'tbr': bitrate,
+                        'ext': ext,
+                    })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': self._html_search_meta('description', webpage),
+            'thumbnail': self._html_search_meta('thumbnail', webpage),
+            'duration': parse_duration(self._html_search_meta('duration', webpage)),
+            'formats': formats,
+        }
index 87120ecd1f40c8011269a5e80b6ab158d3d94df3..afce2010eafadc3ceaab1eaa7d846e5e6360d547 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 299873ecc3171a431c098e50d6c6a2ecaa40e2b3..f4dcfd93fa760878566568636d9c2b864b6c7556 100644 (file)
@@ -94,12 +94,12 @@ class LyndaBaseIE(InfoExtractor):
 class LyndaIE(LyndaBaseIE):
     IE_NAME = 'lynda'
     IE_DESC = 'lynda.com videos'
-    _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
 
     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
 
     _TESTS = [{
-        'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
+        'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
         # md5 is unstable
         'info_dict': {
             'id': '114408',
@@ -112,19 +112,71 @@ class LyndaIE(LyndaBaseIE):
         'only_matching': True,
     }]
 
+    def _raise_unavailable(self, video_id):
+        self.raise_login_required(
+            'Video %s is only available for members' % video_id)
+
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        course_id = mobj.group('course_id')
+
+        query = {
+            'videoId': video_id,
+            'type': 'video',
+        }
 
         video = self._download_json(
-            'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
-            video_id, 'Downloading video JSON')
+            'https://www.lynda.com/ajax/player', video_id,
+            'Downloading video JSON', fatal=False, query=query)
+
+        # Fallback scenario
+        if not video:
+            query['courseId'] = course_id
+
+            play = self._download_json(
+                'https://www.lynda.com/ajax/course/%s/%s/play'
+                % (course_id, video_id), video_id, 'Downloading play JSON')
+
+            if not play:
+                self._raise_unavailable(video_id)
+
+            formats = []
+            for formats_dict in play:
+                urls = formats_dict.get('urls')
+                if not isinstance(urls, dict):
+                    continue
+                cdn = formats_dict.get('name')
+                for format_id, format_url in urls.items():
+                    if not format_url:
+                        continue
+                    formats.append({
+                        'url': format_url,
+                        'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id,
+                        'height': int_or_none(format_id),
+                    })
+            self._sort_formats(formats)
+
+            conviva = self._download_json(
+                'https://www.lynda.com/ajax/player/conviva', video_id,
+                'Downloading conviva JSON', query=query)
+
+            return {
+                'id': video_id,
+                'title': conviva['VideoTitle'],
+                'description': conviva.get('VideoDescription'),
+                'release_year': int_or_none(conviva.get('ReleaseYear')),
+                'duration': int_or_none(conviva.get('Duration')),
+                'creator': conviva.get('Author'),
+                'formats': formats,
+            }
 
         if 'Status' in video:
             raise ExtractorError(
                 'lynda returned error: %s' % video['Message'], expected=True)
 
         if video.get('HasAccess') is False:
-            self.raise_login_required('Video %s is only available for members' % video_id)
+            self._raise_unavailable(video_id)
 
         video_id = compat_str(video.get('ID') or video_id)
         duration = int_or_none(video.get('DurationInSeconds'))
@@ -148,7 +200,7 @@ class LyndaIE(LyndaBaseIE):
             for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
                 formats.extend([{
                     'url': video_url,
-                    'width': int_or_none(format_id),
+                    'height': int_or_none(format_id),
                     'format_id': '%s-%s' % (prioritized_stream_id, format_id),
                 } for format_id, video_url in prioritized_stream.items()])
 
@@ -187,7 +239,7 @@ class LyndaIE(LyndaBaseIE):
             return srt
 
     def _get_subtitles(self, video_id):
-        url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
+        url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
         subs = self._download_json(url, None, False)
         if subs:
             return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
@@ -209,7 +261,7 @@ class LyndaCourseIE(LyndaBaseIE):
         course_id = mobj.group('courseid')
 
         course = self._download_json(
-            'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
+            'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
             course_id, 'Downloading course JSON')
 
         if course.get('Status') == 'NotFound':
@@ -231,7 +283,7 @@ class LyndaCourseIE(LyndaBaseIE):
                 if video_id:
                     entries.append({
                         '_type': 'url_transparent',
-                        'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+                        'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
                         'ie_key': LyndaIE.ie_key(),
                         'chapter': chapter.get('Title'),
                         'chapter_number': int_or_none(chapter.get('ChapterIndex')),
index 39d2742c89282c2773ee1aca44ca14f047393bc5..9806875e8d87f2a75e7689fbe0e0fabd6d7eeafe 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 9a7098c43c600a3cc3ed697252bc784d9a9cf5b7..f7cc3c83289f1101207c385d5bfed2055c7b7f67 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index aa091a62ca915f3742dc42302913c0105cb8c0ec..478e3996743d1eca8434a786b58c4bd799a7dc55 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 2174e5665778b590055c06255a91c030cb579d29..1854d59a5307a5b22f2efdda08a2b6c944aa8c50 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 0d5238d777ad00ab13e84a69474d42b360cdecc1..ab32e632e34375561980f168834443754f606383 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 1dcf27afef331ceb3b09d6ade66cde65e92e3cd6..b91d865286e47affdc66c138dde9507963d62733 100644 (file)
@@ -4,6 +4,7 @@ import re
 
 from .common import InfoExtractor
 from .adobepass import AdobePassIE
+from .theplatform import ThePlatformIE
 from ..utils import (
     smuggle_url,
     url_basename,
@@ -65,7 +66,7 @@ class NationalGeographicVideoIE(InfoExtractor):
         }
 
 
-class NationalGeographicIE(AdobePassIE):
+class NationalGeographicIE(ThePlatformIE, AdobePassIE):
     IE_NAME = 'natgeo'
     _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
 
@@ -110,25 +111,39 @@ class NationalGeographicIE(AdobePassIE):
         release_url = self._search_regex(
             r'video_auth_playlist_url\s*=\s*"([^"]+)"',
             webpage, 'release url')
+        theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path')
+        video_id = theplatform_path.split('/')[-1]
         query = {
             'mbr': 'true',
-            'switch': 'http',
         }
         is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
         if is_auth == 'auth':
             auth_resource_id = self._search_regex(
                 r"video_auth_resourceId\s*=\s*'([^']+)'",
                 webpage, 'auth resource id')
-            query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id)
-
-        return {
-            '_type': 'url_transparent',
-            'ie_key': 'ThePlatform',
-            'url': smuggle_url(
-                update_url_query(release_url, query),
-                {'force_smil_url': True}),
+            query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id)
+
+        formats = []
+        subtitles = {}
+        for key, value in (('switch', 'http'), ('manifest', 'm3u')):
+            tp_query = query.copy()
+            tp_query.update({
+                key: value,
+            })
+            tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value)
+            formats.extend(tp_formats)
+            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+        self._sort_formats(formats)
+
+        info = self._extract_theplatform_metadata(theplatform_path, display_id)
+        info.update({
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
             'display_id': display_id,
-        }
+        })
+        return info
 
 
 class NationalGeographicEpisodeGuideIE(InfoExtractor):
index 0891d2772cd53f9c686fed9e9cd50c77ddc80bb1..055070ff54fd8990c2e58ab1d6df037b19f3a029 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 0092b85ceaa27e9b190a05ea1d4dff299351b6c7..e3f35f1d8b6d526d13bf7b301e57c9570bff3af4 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index a08e48c4ba80d79bc011fb28fadbe2fdd7d89adc..dee9056d39e9bb0076d390054006c6dd4246afae 100644 (file)
@@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE):
 
 class AppleDailyIE(NextMediaIE):
     IE_DESC = '臺灣蘋果日報'
-    _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+    _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
     _TESTS = [{
         'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
         'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
@@ -154,6 +154,9 @@ class AppleDailyIE(NextMediaIE):
             'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
             'upload_date': '20140417',
         },
+    }, {
+        'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
+        'only_matching': True,
     }]
 
     _URL_PATTERN = r'\{url: \'(.+)\'\}'
index b04d2111312d5a9e956762b10a30422ae5a8cd64..62ce800c072d2a316a0c6b8b7479cc89dc29b90d 100644 (file)
@@ -245,7 +245,11 @@ class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
 
 class NHLIE(InfoExtractor):
     IE_NAME = 'nhl.com'
-    _VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
+    _SITES_MAP = {
+        'nhl': 'nhl',
+        'wch2016': 'wch',
+    }
     _TESTS = [{
         # type=video
         'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
@@ -270,13 +274,32 @@ class NHLIE(InfoExtractor):
             'upload_date': '20160204',
             'timestamp': 1454544904,
         },
+    }, {
+        # Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713)
+        'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
+        'md5': '50b2bb47f405121484dda3ccbea25459',
+        'info_dict': {
+            'id': '44315003',
+            'ext': 'mp4',
+            'title': 'Poile, Laviolette on Subban trade',
+            'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)',
+            'timestamp': 1467242866,
+            'upload_date': '20160629',
+        },
+    }, {
+        'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        tmp_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        tmp_id, site = mobj.group('id'), mobj.group('site')
         video_data = self._download_json(
-            'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id,
-            tmp_id)
+            'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
+            % (self._SITES_MAP[site], tmp_id), tmp_id)
         if video_data.get('type') == 'article':
             video_data = video_data['media']
 
@@ -290,9 +313,11 @@ class NHLIE(InfoExtractor):
                 continue
             ext = determine_ext(playback_url)
             if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_formats = self._extract_m3u8_formats(
                     playback_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=playback.get('name', 'hls'), fatal=False))
+                    m3u8_id=playback.get('name', 'hls'), fatal=False)
+                self._check_formats(m3u8_formats, video_id)
+                formats.extend(m3u8_formats)
             else:
                 height = int_or_none(playback.get('height'))
                 formats.append({
index 6eaaa8416c7f4fb7e1c738fe2a6c13da6c66c503..a104e33f8bdea73540779e41db45d92c1249668a 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 06f2bda07dd5db2c54e1e0492f244dbf0fc5a526..70ff2ab3653525664b4f1ae590393ee680a2f6e5 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index af44c3bb5714bc0079e3d1307782a8ff1fe5ba84..6aa0895b82e5949657a62b009addc8e93885936e 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 17671ad398b9e9a8148bceff74db678969d26d3f..103952345aa98ed186515452baf2f945409ffdaa 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 74860eb2054e4f685b4b52c89149e49563ffe230..7e53463164b281e84a349a6fc382f5e203f278a4 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .brightcove import (
index 9c7cc777b4297051628cc1f0cee78f84c59dff83..c91f5846171be2a720523a4531313703d18920fd 100644 (file)
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_HTTPError
 from ..utils import (
     fix_xml_ampersands,
     orderedSet,
@@ -10,6 +11,7 @@ from ..utils import (
     qualities,
     strip_jsonp,
     unified_strdate,
+    ExtractorError,
 )
 
 
@@ -181,9 +183,16 @@ class NPOIE(NPOBaseIE):
                     continue
                 streams = format_info.get('streams')
                 if streams:
-                    video_info = self._download_json(
-                        streams[0] + '&type=json',
-                        video_id, 'Downloading %s stream JSON' % format_id)
+                    try:
+                        video_info = self._download_json(
+                            streams[0] + '&type=json',
+                            video_id, 'Downloading %s stream JSON' % format_id)
+                    except ExtractorError as ee:
+                        if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+                            error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring')
+                            if error:
+                                raise ExtractorError(error, expected=True)
+                        raise
                 else:
                     video_info = format_info
                 video_url = video_info.get('url')
@@ -459,8 +468,9 @@ class NPOPlaylistBaseIE(NPOIE):
 
 class VPROIE(NPOPlaylistBaseIE):
     IE_NAME = 'vpro'
-    _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
-    _PLAYLIST_TITLE_RE = r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+    _PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)',
+                          r'<h5[^>]+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)')
     _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
 
     _TESTS = [
@@ -492,6 +502,27 @@ class VPROIE(NPOPlaylistBaseIE):
                 'title': 'education education',
             },
             'playlist_count': 2,
+        },
+        {
+            'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
+            'info_dict': {
+                'id': 'de-tegenprestatie',
+                'title': 'De Tegenprestatie',
+            },
+            'playlist_count': 2,
+        }, {
+            'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html',
+            'info_dict': {
+                'id': 'VARA_101375237',
+                'ext': 'm4v',
+                'title': 'MH17: Het verdriet van Nederland',
+                'description': 'md5:09e1a37c1fdb144621e22479691a9f18',
+                'upload_date': '20150716',
+            },
+            'params': {
+                # Skip because of m3u8 download
+                'skip_download': True
+            },
         }
     ]
 
index ed42eb301040bc9ed4a5e28add4209c8bb9e505b..3700b7ab2eec4070fee53258bb0707d79aa37728 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -113,7 +113,17 @@ class NRKBaseIE(InfoExtractor):
 
 
 class NRKIE(NRKBaseIE):
-    _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                        (?:
+                            nrk:|
+                            https?://
+                                (?:
+                                    (?:www\.)?nrk\.no/video/PS\*|
+                                    v8-psapi\.nrk\.no/mediaelement/
+                                )
+                            )
+                            (?P<id>[^/?#&]+)
+                        '''
     _API_HOST = 'v8.psapi.nrk.no'
     _TESTS = [{
         # video
@@ -137,6 +147,12 @@ class NRKIE(NRKBaseIE):
             'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
             'duration': 20,
         }
+    }, {
+        'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+        'only_matching': True,
+    }, {
+        'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+        'only_matching': True,
     }]
 
 
index e8702ebcd72633f82a9bc15c55057d67d14d2401..7d7a785ab10e7b71ceb4729a012ebb574c7752d5 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index ef093dec2201afb7cb24384d7120f6ec00158de4..87fb94d1f583f5b174fe8d9ace84e4791f3afa4e 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 142c34256f20fea317dfa91736f38fdeb6ad5126..2bb77ab249239163d8318a57e8fd0fdb57d2e32a 100644 (file)
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import hmac
@@ -6,11 +7,13 @@ import base64
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
     float_or_none,
     int_or_none,
-    parse_iso8601,
+    js_to_json,
     mimetype2ext,
-    determine_ext,
+    parse_iso8601,
+    remove_start,
 )
 
 
@@ -138,16 +141,83 @@ class NYTimesArticleIE(NYTimesBaseIE):
             'upload_date': '20150414',
             'uploader': 'Matthew Williams',
         }
+    }, {
+        'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html',
+        'md5': 'e0d52040cafb07662acf3c9132db3575',
+        'info_dict': {
+            'id': '100000004709062',
+            'title': 'The Run-Up: ‘He Was Like an Octopus’',
+            'ext': 'mp3',
+            'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4',
+            'series': 'The Run-Up',
+            'episode': '‘He Was Like an Octopus’',
+            'episode_number': 20,
+            'duration': 2130,
+        }
+    }, {
+        'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html',
+        'info_dict': {
+            'id': '100000004709479',
+            'title': 'The Rise of Hitler',
+            'ext': 'mp3',
+            'description': 'md5:bce877fd9e3444990cb141875fab0028',
+            'creator': 'Pamela Paul',
+            'duration': 3475,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
         'only_matching': True,
     }]
 
+    def _extract_podcast_from_json(self, json, page_id, webpage):
+        podcast_audio = self._parse_json(
+            json, page_id, transform_source=js_to_json)
+
+        audio_data = podcast_audio['data']
+        track = audio_data['track']
+
+        episode_title = track['title']
+        video_url = track['source']
+
+        description = track.get('description') or self._html_search_meta(
+            ['og:description', 'twitter:description'], webpage)
+
+        podcast_title = audio_data.get('podcast', {}).get('title')
+        title = ('%s: %s' % (podcast_title, episode_title)
+                 if podcast_title else episode_title)
+
+        episode = audio_data.get('podcast', {}).get('episode') or ''
+        episode_number = int_or_none(self._search_regex(
+            r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
+
+        return {
+            'id': remove_start(podcast_audio.get('target'), 'FT') or page_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'creator': track.get('credit'),
+            'series': podcast_title,
+            'episode': episode_title,
+            'episode_number': episode_number,
+            'duration': int_or_none(track.get('duration')),
+        }
+
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        page_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, page_id)
 
-        video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id')
+        video_id = self._search_regex(
+            r'data-videoid=["\'](\d+)', webpage, 'video id',
+            default=None, fatal=False)
+        if video_id is not None:
+            return self._extract_video_from_id(video_id)
 
-        return self._extract_video_from_id(video_id)
+        podcast_data = self._search_regex(
+            (r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script',
+             r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
+            webpage, 'podcast data')
+        return self._extract_podcast_from_json(podcast_data, page_id, webpage)
diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py
new file mode 100644 (file)
index 0000000..2d352f5
--- /dev/null
@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    extract_attributes,
+)
+
+
+class NZZIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
+        'info_dict': {
+            'id': '9153',
+        },
+        'playlist_mincount': 6,
+    }
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+        webpage = self._download_webpage(url, page_id)
+
+        entries = []
+        for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
+            player_params = extract_attributes(player_element)
+            if player_params.get('data-type') not in ('kaltura_singleArticle',):
+                self.report_warning('Unsupported player type')
+                continue
+            entry_id = player_params['data-id']
+            entries.append(self.url_result(
+                'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
+
+        return self.playlist_result(entries, page_id)
index f2ccc53dc6be3e3030bd05608976ea5c4e456808..50fbbc79c12761449adc70e74a58f0442f5b9cfa 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 6ae30679a0a226b0d242b2ef773fbab9a90920c5..b4cce7ea9334c7bbaf9e617932189504dcd25121 100644 (file)
@@ -1,28 +1,28 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
 import re
 import calendar
 import datetime
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     HEADRequest,
     unified_strdate,
-    ExtractorError,
     strip_jsonp,
     int_or_none,
     float_or_none,
     determine_ext,
     remove_end,
+    unescapeHTML,
 )
 
 
 class ORFTVthekIE(InfoExtractor):
     IE_NAME = 'orf:tvthek'
     IE_DESC = 'ORF TVthek'
-    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
@@ -51,26 +51,23 @@ class ORFTVthekIE(InfoExtractor):
             'skip_download': True,  # rtsp downloads
         },
         '_skip': 'Blocked outside of Austria / Germany',
+    }, {
+        'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
+        'skip_download': True,
+    }, {
+        'url': 'http://tvthek.orf.at/profile/Universum/35429',
+        'skip_download': True,
     }]
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
         webpage = self._download_webpage(url, playlist_id)
 
-        data_json = self._search_regex(
-            r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
-        all_data = json.loads(data_json)
-
-        def get_segments(all_data):
-            for data in all_data:
-                if data['name'] in (
-                        'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM',
-                        'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'):
-                    return data['values']['segments']
-
-        sdata = get_segments(all_data)
-        if not sdata:
-            raise ExtractorError('Unable to extract segments')
+        data_jsb = self._parse_json(
+            self._search_regex(
+                r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
+                webpage, 'playlist', group='json'),
+            playlist_id, transform_source=unescapeHTML)['playlist']['videos']
 
         def quality_to_int(s):
             m = re.search('([0-9]+)', s)
@@ -79,8 +76,11 @@ class ORFTVthekIE(InfoExtractor):
             return int(m.group(1))
 
         entries = []
-        for sd in sdata:
-            video_id = sd['id']
+        for sd in data_jsb:
+            video_id, title = sd.get('id'), sd.get('title')
+            if not video_id or not title:
+                continue
+            video_id = compat_str(video_id)
             formats = [{
                 'preference': -10 if fd['delivery'] == 'hls' else None,
                 'format_id': '%s-%s-%s' % (
@@ -88,7 +88,7 @@ class ORFTVthekIE(InfoExtractor):
                 'url': fd['src'],
                 'protocol': fd['protocol'],
                 'quality': quality_to_int(fd['quality']),
-            } for fd in sd['playlist_item_array']['sources']]
+            } for fd in sd['sources']]
 
             # Check for geoblocking.
             # There is a property is_geoprotection, but that's always false
@@ -115,14 +115,24 @@ class ORFTVthekIE(InfoExtractor):
             self._check_formats(formats, video_id)
             self._sort_formats(formats)
 
-            upload_date = unified_strdate(sd['created_date'])
+            subtitles = {}
+            for sub in sd.get('subtitles', []):
+                sub_src = sub.get('src')
+                if not sub_src:
+                    continue
+                subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
+                    'url': sub_src,
+                })
+
+            upload_date = unified_strdate(sd.get('created_date'))
             entries.append({
                 '_type': 'video',
                 'id': video_id,
-                'title': sd['header'],
+                'title': title,
                 'formats': formats,
+                'subtitles': subtitles,
                 'description': sd.get('description'),
-                'duration': int(sd['duration_in_seconds']),
+                'duration': int_or_none(sd.get('duration_in_seconds')),
                 'upload_date': upload_date,
                 'thumbnail': sd.get('image_full_url'),
             })
index 8d49f5c4aff04954e773b9eb575af912130a8401..2b07958bb1f5815a162dadadff4f450f7ea0e97d 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 874aacc55253e69190361f63f8e165cbf1342764..ebdab8db9faa0c8911c53c5764a18456926b6a55 100644 (file)
@@ -6,9 +6,9 @@ from .common import InfoExtractor
 class ParliamentLiveUKIE(InfoExtractor):
     IE_NAME = 'parliamentlive.tv'
     IE_DESC = 'UK parliament videos'
-    _VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
         'info_dict': {
             'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
@@ -18,7 +18,10 @@ class ParliamentLiveUKIE(InfoExtractor):
             'timestamp': 1422696664,
             'upload_date': '20150131',
         },
-    }
+    }, {
+        'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index 22975066516a0d37e74c9c520dd4faf0a68305a6..a6a2c273f240db52c967a12a96484261bd37664a 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 61043cad5c23880abba1666842cdf7a6490c44b3..0e362302425cbe504b33b90aa1937dc68b9e288a 100644 (file)
@@ -132,7 +132,7 @@ class PeriscopeUserIE(PeriscopeBaseIE):
 
         user = list(data_store['UserCache']['users'].values())[0]['user']
         user_id = user['id']
-        session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id']
+        session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id']
 
         broadcasts = self._call_api(
             'getUserBroadcastsPublic',
index ea5caefa90da858f143d1f512d036698f6726b16..b66adfc00bfa01cf84e58b4741a9e856fab63e50 100644 (file)
@@ -28,7 +28,7 @@ class PluralsightBaseIE(InfoExtractor):
 
 class PluralsightIE(PluralsightBaseIE):
     IE_NAME = 'pluralsight'
-    _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?'
+    _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?'
     _LOGIN_URL = 'https://app.pluralsight.com/id/'
 
     _NETRC_MACHINE = 'pluralsight'
@@ -50,6 +50,9 @@ class PluralsightIE(PluralsightBaseIE):
         # available without pluralsight account
         'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
         'only_matching': True,
+    }, {
+        'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0',
+        'only_matching': True,
     }]
 
     def _real_initialize(self):
index 9894f32620c1692830df023423ae02a6199121b1..073fc3e21db07f05deef1a337aca7685f62b4079 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from ..compat import (
index 202f58673ae4f1dd77caee159f37dc24be9aad64..3c9087f2dfe3caa30c879f4905e857a046fd789c 100644 (file)
@@ -2,13 +2,13 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
+from .jwplatform import JWPlatformBaseIE
 from ..utils import (
     str_to_int,
 )
 
 
-class PornoXOIE(InfoExtractor):
+class PornoXOIE(JWPlatformBaseIE):
     _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
     _TEST = {
         'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
@@ -17,7 +17,8 @@ class PornoXOIE(InfoExtractor):
             'id': '7564',
             'ext': 'flv',
             'title': 'Striptease From Sexy Secretary!',
-            'description': 'Striptease From Sexy Secretary!',
+            'display_id': 'striptease-from-sexy-secretary',
+            'description': 'md5:0ee35252b685b3883f4a1d38332f9980',
             'categories': list,  # NSFW
             'thumbnail': 're:https?://.*\.jpg$',
             'age_limit': 18,
@@ -26,23 +27,14 @@ class PornoXOIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id, display_id = mobj.groups()
 
         webpage = self._download_webpage(url, video_id)
-
-        video_url = self._html_search_regex(
-            r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url')
+        video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False)
 
         title = self._html_search_regex(
             r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title')
 
-        description = self._html_search_regex(
-            r'<meta name="description" content="([^"]+)\s*featuring',
-            webpage, 'description', fatal=False)
-
-        thumbnail = self._html_search_regex(
-            r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
-
         view_count = str_to_int(self._html_search_regex(
             r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False))
 
@@ -53,13 +45,14 @@ class PornoXOIE(InfoExtractor):
             None if categories_str is None
             else categories_str.split(','))
 
-        return {
+        video_data.update({
             'id': video_id,
-            'url': video_url,
             'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
+            'display_id': display_id,
+            'description': self._html_search_meta('description', webpage),
             'categories': categories,
             'view_count': view_count,
             'age_limit': 18,
-        }
+        })
+
+        return video_data
index 873d4f981d90303dd06ded3d114bad7714c7b4ae..7cc07a2ad5b88c51aa9f5d339839fd743727e17e 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 9c2ccbe2de38fe779ec2e857c761a3989027a0ee..1c54af0022f087788d6bb11a25639f1a184b42b8 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .prosiebensat1 import ProSiebenSat1BaseIE
index 19a751da08058abe748fc3bbfb596541e8412aa3..0aa8d059bf81dffd28df727650b20aafc49302eb 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import unicode_literals
 
index 3c6725aeb42945ce7f4e07b49bcd0d629248fcac..4875009e5cafd68867b67393d36d90625e5f29c8 100644 (file)
@@ -1,29 +1,29 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import str_or_none
+from ..utils import (
+    qualities,
+    str_or_none,
+)
 
 
 class ReverbNationIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
     _TESTS = [{
         'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
-        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
+        'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
         'info_dict': {
             'id': '16965047',
             'ext': 'mp3',
             'title': 'MONA LISA',
             'uploader': 'ALKILADOS',
             'uploader_id': '216429',
-            'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$'
+            'thumbnail': 're:^https?://.*\.jpg',
         },
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        song_id = mobj.group('id')
+        song_id = self._match_id(url)
 
         api_res = self._download_json(
             'https://api.reverbnation.com/song/%s' % song_id,
@@ -31,14 +31,23 @@ class ReverbNationIE(InfoExtractor):
             note='Downloading information of song %s' % song_id
         )
 
+        THUMBNAILS = ('thumbnail', 'image')
+        quality = qualities(THUMBNAILS)
+        thumbnails = []
+        for thumb_key in THUMBNAILS:
+            if api_res.get(thumb_key):
+                thumbnails.append({
+                    'url': api_res[thumb_key],
+                    'preference': quality(thumb_key)
+                })
+
         return {
             'id': song_id,
-            'title': api_res.get('name'),
-            'url': api_res.get('url'),
+            'title': api_res['name'],
+            'url': api_res['url'],
             'uploader': api_res.get('artist', {}).get('name'),
             'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
-            'thumbnail': self._proto_relative_url(
-                api_res.get('image', api_res.get('thumbnail'))),
+            'thumbnails': thumbnails,
             'ext': 'mp3',
             'vcodec': 'none',
         }
index f3bb4fa661404ee77b57573e487c6b0a4607f4bf..2340dae535b3ed719ffd363283a1c542a2da80b1 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index de004671d564eb455e45361666fd304f8ca040a6..cb4ee88033ba1d761faac452de724a6c44f08503 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index f1b92f6da73c70b2fdfe1b997816d983d10e8860..6a43b036e924470055aea3910d1c5ea807483fdb 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import base64
index 38366b784e7fbedce2b2d7de3669db752a6528d0..9a330c1961b75f662caa457fabe231e6aa4bcb8a 100644 (file)
@@ -43,7 +43,7 @@ class RudoIE(JWPlatformBaseIE):
             transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
 
         info_dict = self._parse_jwplayer_data(
-            jwplayer_data, video_id, require_title=False, m3u8_id='hls')
+            jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')
 
         info_dict.update({
             'title': self._og_search_title(webpage),
index 1f7c262993c8ce7e0d602f612fc6316e80052f66..ce631b46c30bcd2eda03c798d61bed616f41e0b4 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 5d0ace5bfb472acc056d599368b02bb2573eafb8..fd1df925ba46bcecf87e192d2331da5e77d0b4bc 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index a2379eb04c2e6744a49f315ebee2a0c9fb0170f6..a5e672c0a674e3461c261e0b6b2ca7ca9435ea30 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index ffea438cc4645c267c87b54a761394e0c1eca247..2fce4e81b7f44c4c70ff5e6e775a4743032a231b 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class RuutuIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ruutu\.fi/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla)/(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'http://www.ruutu.fi/video/2058907',
@@ -34,12 +34,24 @@ class RuutuIE(InfoExtractor):
                 'id': '2057306',
                 'ext': 'mp4',
                 'title': 'Superpesis: katso koko kausi Ruudussa',
-                'description': 'md5:da2736052fef3b2bd5e0005e63c25eac',
+                'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 40,
                 'age_limit': 0,
             },
         },
+        {
+            'url': 'http://www.supla.fi/supla/2231370',
+            'md5': 'df14e782d49a2c0df03d3be2a54ef949',
+            'info_dict': {
+                'id': '2231370',
+                'ext': 'mp4',
+                'title': 'Osa 1: Mikael Jungner',
+                'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'age_limit': 0,
+            },
+        },
     ]
 
     def _real_extract(self, url):
index eabe41efe0b09c38ef7c26d2b2b0792210da416e..c3aec1edde5e9d02efb377fa39941ae01d2f04b4 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -157,7 +157,14 @@ class SafariCourseIE(SafariBaseIE):
     IE_NAME = 'safari:course'
     IE_DESC = 'safaribooksonline.com online courses'
 
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)|
+                            techbus\.safaribooksonline\.com
+                        )
+                        /(?P<id>[^/]+)/?(?:[#?]|$)
+                    '''
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
@@ -170,6 +177,9 @@ class SafariCourseIE(SafariBaseIE):
     }, {
         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
         'only_matching': True,
+    }, {
+        'url': 'http://techbus.safaribooksonline.com/9780134426365',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 172cc12752d64ce326ba74ced1223628e5fff76a..49a9b313a87a5bf9b80fa8a3b8d78c104722cd5b 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 96472fbc44e9a78654ae7c136e9f7e4a31751a13..43131fb7e5ce82d69d25bf639ce6c2bffe35182a 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 3566317008712d8e378eec36c437cbc39fdc1cdc..ed9de964841e52c1e5753556d6b9e53339ba23c3 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 40333c825443f1b349e8e40c7a08faff7f261e48..7d77e8825d7420185ed1f9f707efb3462f19cab1 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 7efb29f653b76b25c26d91aac16c6985255ee1d0..18cc7721e142c7493bbebdfcb59f621e3fedaf4f 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
     _TEST = {
         'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
-        'md5': '0cf531ae8006b530bd9df947a6a0df77',
+        'md5': '868309628ba00fd488cf516a113fd717',
         'info_dict': {
             'id': 'TD73btpBqSxc',
             'ext': 'mp4',
@@ -20,9 +18,7 @@ class SlutloadIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
index 1143587868c842704901df4484482e7def3e64cc..def46abda45c5d4899f3c3e5a3fb775592efdfa6 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 48e2ba2dd16b0df190956c4d5e71b9206d30e531..30760ca06be4b3fc112f3fe0200c74b665d64855 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 1a8114aa7d197ffa1da08a0560dde2736765db66..3b7ecb3c343291e3fec8af451b4bb2bc3dde9fae 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index e2a9e45acf4b2ff998738a49b1e78e1c7cb40e0c..08f8c5744a84dffda03904afd30d44cac42f2917 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .mtv import MTVServicesInfoExtractor
index b41d9f59fe55c827153dac1d01b947242c1e9d67..ec1b603889754af70d516e70c123be7d2604387a 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 409d5030422652e26fff1102c7fee1302f2b07b9..b03272f7a273e8a3726adb03d805bd2a449849bf 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .ard import ARDMediathekIE
index 1c61437a45901b91691f1d394f92d272ffd2e333..2c26fa689003c6203399eca293c32c8998636ea5 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
@@ -48,6 +50,14 @@ class StreamableIE(InfoExtractor):
         }
     ]
 
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)',
+            webpage)
+        if mobj:
+            return mobj.group('src')
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
index d3d2b7eb7a6fa9db4008365e62e046b83490b064..9e533103c88b93157efdd28d7765a7e9ae961603 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import hashlib
index 58073eefeffc0f3ebc244a6087cad36662940228..6d69f7686b37bd2b39b6362373eadefedef0b932 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index f562aa6d386ee891f4ab3a724bef53e20a6cec92..cfad3314642b0412f7fd31995828ee6ba8a6a5b9 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 136e18f96cadf7bd5701e32b0a3bc7c8767e324e..8670cee28d381de6011e3187db3024bcc40519de 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index c4ef70778b8ac8d2289bbbf5da3bbae5f65c263b..5293393efc219526b61fe04ff12ff25f1d49b33c 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import json
index 82675431f863fded8768241e2ad21c4874f8525d..df5d5556fadf82c8dc680643389fdeccf989793f 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 79a7789200e34e1e457d9cd69cdabb495e3548c3..75346393b017995098d08136df2cbffad1e1c6bb 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import base64
index 16e945d8e624adc51e6a68eab786bdece0a29960..a5b62c717160380c873117e878017f5c3573939a 100644 (file)
@@ -10,9 +10,9 @@ from ..utils import (
 
 
 class TechTalksIE(InfoExtractor):
-    _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
+    _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
         'info_dict': {
             'id': '57758',
@@ -38,7 +38,10 @@ class TechTalksIE(InfoExtractor):
             # rtmp download
             'skip_download': True,
         },
-    }
+    }, {
+        'url': 'http://techtalks.tv/talks/57758',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index ec6f4ecaa9fc48c1e82d65e7eeaebe1dcfb89e4b..f23b587137a0e471ada57c8a08d2fbaf8ecc9722 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 6febf805baa1bb65109462ae951e582007870bcd..cfbf7f4e1562c78ea1d5ae44437694a5325eb70b 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dl/extractor/theweatherchannel.py
new file mode 100644 (file)
index 0000000..c34a49d
--- /dev/null
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .theplatform import ThePlatformIE
+from ..utils import (
+    determine_ext,
+    parse_duration,
+)
+
+
+class TheWeatherChannelIE(ThePlatformIE):
+    _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
+        'md5': 'ab924ac9574e79689c24c6b95e957def',
+        'info_dict': {
+            'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
+            'ext': 'mp4',
+            'title': 'Ice Climber Is In For A Shock',
+            'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
+            'uploader': 'TWC - Digital (No Distro)',
+            'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
+        }
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        drupal_settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            webpage, 'drupal settings'), display_id)
+        video_id = drupal_settings['twc']['contexts']['node']['uuid']
+        video_data = self._download_json(
+            'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id)
+        seo_meta = video_data.get('seometa', {})
+        title = video_data.get('title') or seo_meta['title']
+
+        urls = []
+        thumbnails = []
+        formats = []
+        for variant_id, variant_url in video_data.get('variants', []).items():
+            variant_url = variant_url.strip()
+            if not variant_url or variant_url in urls:
+                continue
+            urls.append(variant_url)
+            ext = determine_ext(variant_url)
+            if ext == 'jpg':
+                thumbnails.append({
+                    'url': variant_url,
+                    'id': variant_id,
+                })
+            elif ThePlatformIE.suitable(variant_url):
+                tp_formats, _ = self._extract_theplatform_smil(variant_url, video_id)
+                formats.extend(tp_formats)
+            elif ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    variant_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id=variant_id, fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    variant_url, video_id, f4m_id=variant_id, fatal=False))
+            else:
+                formats.append({
+                    'url': variant_url,
+                    'format_id': variant_id,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': video_data.get('description') or seo_meta.get('description') or seo_meta.get('og:description'),
+            'duration': parse_duration(video_data.get('duration')),
+            'uploader': video_data.get('providername'),
+            'uploader_id': video_data.get('providerid'),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py
new file mode 100644 (file)
index 0000000..7629f0d
--- /dev/null
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ThisOldHouseIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
+        'md5': '568acf9ca25a639f0c4ff905826b662f',
+        'info_dict': {
+            'id': '2REGtUDQ',
+            'ext': 'mp4',
+            'title': 'How to Build a Storage Bench',
+            'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
+            'timestamp': 1442548800,
+            'upload_date': '20150918',
+        }
+    }, {
+        'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        drupal_settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            webpage, 'drupal settings'), display_id)
+        video_id = drupal_settings['jwplatform']['video_id']
+        return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
index ce4f91f460eda4d82a1623fe5c2d21224838f927..fd145ba429fbc94ec5582b6100660f2897b25f5f 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
diff --git a/youtube_dl/extractor/tonline.py b/youtube_dl/extractor/tonline.py
new file mode 100644 (file)
index 0000000..cc11eae
--- /dev/null
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TOnlineIE(InfoExtractor):
+    IE_NAME = 't-online.de'
+    _VALID_URL = r'https?://(?:www\.)?t-online\.de/tv/(?:[^/]+/)*id_(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.t-online.de/tv/sport/fussball/id_79166266/drittes-remis-zidane-es-muss-etwas-passieren-.html',
+        'md5': '7d94dbdde5f9d77c5accc73c39632c29',
+        'info_dict': {
+            'id': '79166266',
+            'ext': 'mp4',
+            'title': 'Drittes Remis! Zidane: "Es muss etwas passieren"',
+            'description': 'Es läuft nicht rund bei Real Madrid. Das 1:1 gegen den SD Eibar war das dritte Unentschieden in Folge in der Liga.',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_json(
+            'http://www.t-online.de/tv/id_%s/tid_json_video' % video_id, video_id)
+        title = video_data['subtitle']
+
+        formats = []
+        for asset in video_data.get('assets', []):
+            asset_source = asset.get('source') or asset.get('source2')
+            if not asset_source:
+                continue
+            formats_id = []
+            for field_key in ('type', 'profile'):
+                field_value = asset.get(field_key)
+                if field_value:
+                    formats_id.append(field_value)
+            formats.append({
+                'format_id': '-'.join(formats_id),
+                'url': asset_source,
+            })
+
+        thumbnails = []
+        for image in video_data.get('images', []):
+            image_source = image.get('source')
+            if not image_source:
+                continue
+            thumbnails.append({
+                'url': image_source,
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('description'),
+            'duration': int_or_none(video_data.get('duration')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index 2579ba8c67498c91aa117c6853b83f391ccb3ba6..938e05076313cb5b3d3284083d2cc7e699241d21 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding:utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 4d8b57111897f3c936e11f55fcea60d6a6bd30d6..ebe411e12aa5fa44e201dcaefc52e839e5b2d212 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index f225ec68448271eabbcca0b63ef367f37e7e908c..bd28267b0cb6a0154133c98f567c24f054b5459a 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index ead4c00c79bb453585b4ba18c67f7535bcc69254..f3817ab288473a01e899f335821c241fe43d0e91 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index bc352391ef931ab6691400badebabfcb58047347..46c2cfe7b251beb65422238a2158b48ad0a1d795 100644 (file)
@@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE):
             # m3u8 download
             'skip_download': True,
         },
+        'skip': 'HTTP Error 404: Not Found',
     }]
 
     def _real_extract(self, url):
index ce3bf6b023bced2176aa1cdf19cb75e371e290ed..8e6fd4731e38bfad40a11e035a61227736babb0f 100644 (file)
@@ -5,17 +5,20 @@ from .common import InfoExtractor
 
 
 class URPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
-        'md5': '15ca67b63fd8fb320ac2bcd854bad7b6',
+        'md5': 'ad5f0de86f16ca4c8062cd103959a9eb',
         'info_dict': {
             'id': '190031',
             'ext': 'mp4',
             'title': 'Tripp, Trapp, Träd : Sovkudde',
             'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
-        }
-    }
+        },
+    }, {
+        'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -27,30 +30,17 @@ class URPlayIE(InfoExtractor):
 
         formats = []
         for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
-            file_rtmp = urplayer_data.get('file_rtmp' + quality_attr)
-            if file_rtmp:
-                formats.append({
-                    'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp),
-                    'format_id': quality + '-rtmp',
-                    'ext': 'flv',
-                    'preference': preference,
-                })
             file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
             if file_http:
-                file_http_base_url = 'http://%s/%s' % (host, file_http)
-                formats.extend(self._extract_f4m_formats(
-                    file_http_base_url + 'manifest.f4m', video_id,
-                    preference, '%s-hds' % quality, fatal=False))
-                formats.extend(self._extract_m3u8_formats(
-                    file_http_base_url + 'playlist.m3u8', video_id, 'mp4',
-                    'm3u8_native', preference, '%s-hls' % quality, fatal=False))
+                formats.extend(self._extract_wowza_formats(
+                    'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp']))
         self._sort_formats(formats)
 
         subtitles = {}
         for subtitle in urplayer_data.get('subtitles', []):
             subtitle_url = subtitle.get('file')
             kind = subtitle.get('kind')
-            if subtitle_url or kind and kind != 'captions':
+            if not subtitle_url or (kind and kind != 'captions'):
                 continue
             subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
                 'url': subtitle_url,
index e1798857364c0d9d9ecc8ff48583880cb5cff697..a1e0851b7424e4c73cd34b72c02f16bc1905b6ce 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index cb64ae0bd07cdca051eb3aa10550840a296ded85..5ab7168808b10279932ba670165bc8190d5fceb0 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 8a11ff84828a26d35566c7f5fe65c3f4cdc322b4..7f25665864c696757903deeb582a64f16eec0d85 100644 (file)
@@ -86,6 +86,11 @@ class VideomoreIE(InfoExtractor):
         mobj = re.search(
             r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
             webpage)
+        if not mobj:
+            mobj = re.search(
+                r'<iframe[^>]+src=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)',
+                webpage)
+
         if mobj:
             return mobj.group('url')
 
index d49cc6cbc567a8a0219f304a52707bd4129d1119..9950c62ad636ee4f03389bef627da4318f019c22 100644 (file)
@@ -1,10 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .jwplatform import JWPlatformBaseIE
 from ..utils import (
     decode_packed_codes,
     js_to_json,
+    NO_DEFAULT,
+    PACKED_CODES_RE,
 )
 
 
@@ -35,10 +39,17 @@ class VidziIE(JWPlatformBaseIE):
         title = self._html_search_regex(
             r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
 
-        code = decode_packed_codes(webpage).replace('\\\'', '\'')
-        jwplayer_data = self._parse_json(
-            self._search_regex(r'setup\(([^)]+)\)', code, 'jwplayer data'),
-            video_id, transform_source=js_to_json)
+        packed_codes = [mobj.group(0) for mobj in re.finditer(
+            PACKED_CODES_RE, webpage)]
+        for num, pc in enumerate(packed_codes, 1):
+            code = decode_packed_codes(pc).replace('\\\'', '\'')
+            jwplayer_data = self._parse_json(
+                self._search_regex(
+                    r'setup\(([^)]+)\)', code, 'jwplayer data',
+                    default=NO_DEFAULT if num == len(packed_codes) else '{}'),
+                video_id, transform_source=js_to_json)
+            if jwplayer_data:
+                break
 
         info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
         info_dict['title'] = title
index dc142a245bcd765dc11200b8eb50fc342c5260f2..d26fb49b3939728e8a962b2ad3131c71fd223366 100644 (file)
@@ -49,7 +49,7 @@ class VierIE(InfoExtractor):
             webpage, 'filename')
 
         playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
-        formats = self._extract_wowza_formats(playlist_url, display_id)
+        formats = self._extract_wowza_formats(playlist_url, display_id, skip_protocols=['dash'])
         self._sort_formats(formats)
 
         title = self._og_search_title(webpage, default=display_id)
index 50aacc6ac2e3fd3ce5b08e09c804bfb6d34d9640..b566241cc31810ee8a9496b4c538dbf2505f962d 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import json
@@ -355,23 +355,28 @@ class VimeoIE(VimeoBaseInfoExtractor):
         return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
 
     @staticmethod
-    def _extract_vimeo_url(url, webpage):
+    def _extract_urls(url, webpage):
+        urls = []
         # Look for embedded (iframe) Vimeo player
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
-        if mobj:
-            player_url = unescapeHTML(mobj.group('url'))
-            return VimeoIE._smuggle_referrer(player_url, url)
-        # Look for embedded (swf embed) Vimeo player
-        mobj = re.search(
-            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
-        if mobj:
-            return mobj.group(1)
-        # Look more for non-standard embedded Vimeo player
-        mobj = re.search(
-            r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage)
-        if mobj:
-            return mobj.group('url')
+        for mobj in re.finditer(
+                r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
+                webpage):
+            urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
+        PLAIN_EMBED_RE = (
+            # Look for embedded (swf embed) Vimeo player
+            r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
+            # Look more for non-standard embedded Vimeo player
+            r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
+        )
+        for embed_re in PLAIN_EMBED_RE:
+            for mobj in re.finditer(embed_re, webpage):
+                urls.append(mobj.group('url'))
+        return urls
+
+    @staticmethod
+    def _extract_url(url, webpage):
+        urls = VimeoIE._extract_urls(url, webpage)
+        return urls[0] if urls else None
 
     def _verify_player_video_password(self, url, video_id):
         password = self._downloader.params.get('videopassword')
@@ -832,6 +837,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
         'params': {
             'videopassword': 'holygrail',
         },
+        'skip': 'video gone',
     }]
 
     def _real_initialize(self):
@@ -839,9 +845,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
 
     def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
         webpage = self._download_webpage(webpage_url, video_id)
-        config_url = self._html_search_regex(
-            r'data-config-url="([^"]+)"', webpage, 'config URL',
-            default=NO_DEFAULT if video_password_verified else None)
+        data = self._parse_json(self._search_regex(
+            r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
+            default=NO_DEFAULT if video_password_verified else '{}'), video_id)
+        config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
         if config_url is None:
             self._verify_video_password(webpage_url, video_id, webpage)
             config_url = self._get_config_url(
index 58799d413715d93d31f959a815ca5b8a92d835ed..df43ba86755eca4d25e5c670626550eb4bc88053 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import collections
@@ -341,7 +341,7 @@ class VKIE(VKBaseIE):
         if youtube_url:
             return self.url_result(youtube_url, 'Youtube')
 
-        vimeo_url = VimeoIE._extract_vimeo_url(url, info_page)
+        vimeo_url = VimeoIE._extract_url(url, info_page)
         if vimeo_url is not None:
             return self.url_result(vimeo_url)
 
index a938a4007ead91a25ca84b43307ce16e1787e2e6..c85b474d25fecc4e8b5acb8931b0939222882f0b 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 390f9e8302f392a25c83af2520cb30b854500632..f7e6360a33e8b6d2cc3096232bfa1d2c458ab3c7 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index bdd7097baec16afb2a3c83dbdde0e89ebc713a69..0f53f1bcb85f409a71d77712b006a57146a9d513 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index a83e68b17f53ca06fa4b76af6e3aa560fb23e107..deb7483ae51699df4670675db4622503320f1cbc 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index b113ab1c4891fdf96898d359cf38779d61b394f8..d9c277bc3cb0221cd926c54a64f95bcec928bd3d 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index bcb140305559a164f56392dd33eab4d5c7b0bab5..e0a6255dc4df8f2a2bd56ffcf1363089a08e6aea 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index a66daee46ebc0324152f69eefa2b66d79bfb513d..4b9c1ee9c5222f48c5634184f703baa062cf3ae9 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import base64
index f86823112297d40aeb9e847d182f905bcf7afb76..545246bcd74a8b94878d2b631d23029539e0438b 100644 (file)
@@ -1867,7 +1867,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             'title': 'Uploads from Interstellar Movie',
             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
         },
-        'playlist_mincout': 21,
+        'playlist_mincount': 21,
     }, {
         # Playlist URL that does not actually serve a playlist
         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
@@ -1890,6 +1890,27 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             'skip_download': True,
         },
         'add_ie': [YoutubeIE.ie_key()],
+    }, {
+        'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
+        'info_dict': {
+            'id': 'yeWKywCrFtk',
+            'ext': 'mp4',
+            'title': 'Small Scale Baler and Braiding Rugs',
+            'uploader': 'Backus-Page House Museum',
+            'uploader_id': 'backuspagemuseum',
+            'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+            'upload_date': '20161008',
+            'license': 'Standard YouTube License',
+            'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
+            'categories': ['Nonprofits & Activism'],
+            'tags': list,
+            'like_count': int,
+            'dislike_count': int,
+        },
+        'params': {
+            'noplaylist': True,
+            'skip_download': True,
+        },
     }, {
         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
         'only_matching': True,
@@ -1971,8 +1992,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
     def _check_download_just_video(self, url, playlist_id):
         # Check if it's a video-specific URL
         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-        if 'v' in query_dict:
-            video_id = query_dict['v'][0]
+        video_id = query_dict.get('v', [None])[0] or self._search_regex(
+            r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
+            'video id', default=None)
+        if video_id:
             if self._downloader.params.get('noplaylist'):
                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
@@ -2152,7 +2175,7 @@ class YoutubeUserIE(YoutubeChannelIE):
 
 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com live streams'
-    _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live'
+    _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+))/live'
     IE_NAME = 'youtube:live'
 
     _TESTS = [{
@@ -2178,6 +2201,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
     }, {
         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
         'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index bd708b42c0ae95537e146d931aebfac371a99009..0f0e9d0eb9b1ac945934b11a134d143d82b19fb0 100644 (file)
@@ -1,4 +1,4 @@
-# coding=utf-8
+# codingutf-8
 from __future__ import unicode_literals
 
 import re
index 3bad5a266b6d51aaf0c92224a94986957da230f2..2e4789eb220b1069f51c5e68a9b3ff76b7514c6d 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 
index 8d1214ee2c75e000ab846893394baf3a85e40a4d..1881f4849e23c749d51da2e45d655ed4e6a68314 100644 (file)
@@ -279,6 +279,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         prefix, sep, ext = path.rpartition('.')  # not os.path.splitext, since the latter does not work on unicode in all setups
         new_path = prefix + sep + extension
 
+        information['filepath'] = new_path
+        information['ext'] = extension
+
         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
         if (new_path == path or
                 (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
@@ -300,9 +303,6 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                 new_path, time.time(), information['filetime'],
                 errnote='Cannot update utime of audio file')
 
-        information['filepath'] = new_path
-        information['ext'] = extension
-
         return [path], information
 
 
index d2dfa80139e25babab7fef073dc4cfe670ce7c50..28941673fa9bec36b24464faa79cc3b2348aca99 100644 (file)
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import unicode_literals
 
@@ -165,6 +165,8 @@ DATE_FORMATS_MONTH_FIRST.extend([
     '%m/%d/%Y %H:%M:%S',
 ])
 
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+
 
 def preferredencoding():
     """Get preferred encoding.
@@ -782,6 +784,7 @@ class XAttrMetadataError(Exception):
     def __init__(self, code=None, msg='Unknown error'):
         super(XAttrMetadataError, self).__init__(msg)
         self.code = code
+        self.msg = msg
 
         # Parsing code and msg
         if (self.code in (errno.ENOSPC, errno.EDQUOT) or
@@ -3016,9 +3019,7 @@ def encode_base_n(num, n, table=None):
 
 
 def decode_packed_codes(code):
-    mobj = re.search(
-        r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)",
-        code)
+    mobj = re.search(PACKED_CODES_RE, code)
     obfucasted_code, base, count, symbols = mobj.groups()
     base = int(base)
     count = int(count)
@@ -3161,20 +3162,25 @@ def write_xattr(path, key, value):
         # try the pyxattr module...
         import xattr
 
-        # Unicode arguments are not supported in python-pyxattr until
-        # version 0.5.0
-        # See https://github.com/rg3/youtube-dl/issues/5498
-        pyxattr_required_version = '0.5.0'
-        if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
-            # TODO: fallback to CLI tools
-            raise XAttrUnavailableError(
-                'python-pyxattr is detected but is too old. '
-                'youtube-dl requires %s or above while your version is %s. '
-                'Falling back to other xattr implementations' % (
-                    pyxattr_required_version, xattr.__version__))
+        if hasattr(xattr, 'set'):  # pyxattr
+            # Unicode arguments are not supported in python-pyxattr until
+            # version 0.5.0
+            # See https://github.com/rg3/youtube-dl/issues/5498
+            pyxattr_required_version = '0.5.0'
+            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+                # TODO: fallback to CLI tools
+                raise XAttrUnavailableError(
+                    'python-pyxattr is detected but is too old. '
+                    'youtube-dl requires %s or above while your version is %s. '
+                    'Falling back to other xattr implementations' % (
+                        pyxattr_required_version, xattr.__version__))
+
+            setxattr = xattr.set
+        else:  # xattr
+            setxattr = xattr.setxattr
 
         try:
-            xattr.set(path, key, value)
+            setxattr(path, key, value)
         except EnvironmentError as e:
             raise XAttrMetadataError(e.errno, e.strerror)
 
index af0c2cfc4e14360c526cd629e9c41db38c8ea2ea..b883dbdff64459f14fa505843fa8df43e50ee7c3 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2016.09.27'
+__version__ = '2016.10.19'