From: remitamine Date: Sat, 15 Apr 2017 21:48:13 +0000 (+0200) Subject: Merge pull request #12752 from triple-j/go90_improvements_pull_request X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=72950c4dce413d4274323173dc3e7a2c17d93392;hp=334f41e0d827a361b4b8d39f55b66221281d92ae;p=youtube-dl Merge pull request #12752 from triple-j/go90_improvements_pull_request [go90] Improve extraction --- diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 70f6b51ed..5d5adb199 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.04.15 +[debug] youtube-dl version 2017.04.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index cf5ee84a4..6be86a090 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version 2017.04.16 + +Core +* [YoutubeDL] Apply expand_path after output template substitution ++ [YoutubeDL] Propagate overridden meta fields to extraction results of type + url (#11163) + +Extractors ++ [generic] Extract RSS entries as url_transparent (#11163) ++ [streamango] Add support for streamango.com (#12643) ++ [wsj:article] Add support for articles (#12558) +* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds' + URLs (#9163, #12005, #12178, #12480) ++ [udemy] Add support for react rendition (#12744) + + version 2017.04.15 Extractors diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b29b50c8d..afae82214 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -745,6 +745,7 @@ - **Steam** - **Stitcher** - **Streamable** + - **Streamango** - **streamcloud.eu** - **StreamCZ** - **StreetVoice** @@ -966,6 +967,7 @@ - **wrzuta.pl** - **wrzuta.pl:playlist** - **WSJ**: Wall Street Journal + - **WSJArticle** - **XBef** - **XboxClips** - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 8491a88bd..75945e38f 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -755,6 +755,7 @@ class TestYoutubeDL(unittest.TestCase): '_type': 'url_transparent', 'url': 'foo2:', 'ie_key': 'Foo2', + 'title': 'foo1 title' } class Foo2IE(InfoExtractor): @@ -771,7 +772,7 @@ class TestYoutubeDL(unittest.TestCase): _VALID_URL = r'foo3:' def _real_extract(self, url): - return _make_result([{'url': TEST_URL}]) + return _make_result([{'url': TEST_URL}], title='foo3 title') ydl.add_info_extractor(Foo1IE(ydl)) ydl.add_info_extractor(Foo2IE(ydl)) @@ -779,6 +780,7 @@ class TestYoutubeDL(unittest.TestCase): ydl.extract_info('foo1:') downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['url'], TEST_URL) + self.assertEqual(downloaded['title'], 'foo1 title') if __name__ == '__main__': diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 7953670a7..819b374ef 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -672,8 +672,7 @@ class YoutubeDL(object): FORMAT_RE.format(numeric_field), r'%({0})s'.format(numeric_field), outtmpl) - tmpl = expand_path(outtmpl) - filename = tmpl % template_dict + filename = expand_path(outtmpl % template_dict) # Temporary fix for #4787 # 'Treat' all problem characters by passing filename through preferredencoding # to workaround encoding issues with subprocess on python2 @ Windows @@ -851,7 +850,14 @@ class YoutubeDL(object): new_result = info.copy() new_result.update(force_properties) - assert new_result.get('_type') != 'url_transparent' + # Extracted info may not be a video result (i.e. + # info.get('_type', 'video') != video) but rather an url or + # url_transparent. In such cases outer metadata (from ie_result) + # should be propagated to inner one (info). For this to happen + # _type of info should be overridden with url_transparent. This + # fixes issue from https://github.com/rg3/youtube-dl/pull/11163. + if new_result.get('_type') == 'url': + new_result['_type'] = 'url_transparent' return self.process_ie_result( new_result, download=download, extra_info=extra_info) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 46ef8e605..124497e95 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -17,6 +17,7 @@ from ..compat import ( from ..utils import ( determine_ext, ExtractorError, + extract_attributes, find_xpath_attr, fix_xml_ampersands, float_or_none, @@ -109,6 +110,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'upload_date': '20140827', 'uploader_id': '710858724001', }, + 'skip': 'Video gone', }, { # playlist with 'videoList' @@ -487,12 +489,13 @@ class BrightcoveNewIE(InfoExtractor): return urls[0] if urls else None @staticmethod - def _extract_urls(webpage): + def _extract_urls(ie, webpage): # Reference: # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe - # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript - # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html - # 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player + # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag + # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript + # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html + # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player entries = [] @@ -501,22 +504,48 @@ class BrightcoveNewIE(InfoExtractor): r']+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): entries.append(url if url.startswith('http') else 'http:' + url) - # Look for embed_in_page embeds [2] - for video_id, account_id, player_id, embed in re.findall( - # According to examples from [3] it's unclear whether video id - # may be optional and what to do when it is - # According to [4] data-video-id may be prefixed with ref: - r'''(?sx) - ]+ - data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*? - .*? - ]+ - src=["\'](?:https?:)?//players\.brightcove\.net/ - (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js + # Look for