Merge remote-tracking branch 'jbboehr/imgur-gifv-improvements'
authorPhilipp Hagemeister <phihag@phihag.de>
Thu, 19 Feb 2015 04:16:11 +0000 (05:16 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 19 Feb 2015 04:16:11 +0000 (05:16 +0100)
Makefile
youtube_dl/extractor/generic.py
youtube_dl/extractor/netzkino.py
youtube_dl/extractor/sockshare.py
youtube_dl/extractor/theonion.py
youtube_dl/extractor/webofstories.py

index 0636fc4cbe108667d0ecb85aed68018e4e6803ee..573c826850241be4557a4bbb0b43538242db6704 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
 
 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
index 36a1f65bfd6e1e07e123d22bbf5b2c87ad0c10f3..8dce96a648a953c893266c7676c992a7aa6eb2d1 100644 (file)
@@ -532,7 +532,7 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': 'Mrj4DVp2zeA',
                 'ext': 'mp4',
-                'upload_date': '20150204',
+                'upload_date': '20150212',
                 'uploader': 'The National Archives UK',
                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
                 'uploader_id': 'NationalArchives08',
index 93567d1e38bc7da5ea2e621cf1f3adb848ef3461..bc17e20aa9d736eb9e4ba0a39929f20db47d8465 100644 (file)
@@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
             'timestamp': 1344858571,
             'age_limit': 12,
         },
+        'params': {
+            'skip_download': 'Download only works from Germany',
+        }
     }
 
     def _real_extract(self, url):
index 7d3c0e93783afeac3d8e939e0cf317177df4ca9f..b5fa6f1da203c993873622a9ee80c923300eebb2 100644 (file)
@@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
             'id': '437BE28B89D799D7',
             'title': 'big_buck_bunny_720p_surround.avi',
             'ext': 'avi',
-            'thumbnail': 're:^http://.*\.jpg$',
         }
     }
 
@@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
             ''', webpage, 'hash')
 
         fields = {
-            "hash": confirm_hash,
+            "hash": confirm_hash.encode('utf-8'),
             "confirm": "Continue as Free User"
         }
 
@@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
             webpage, 'title', default=None)
         thumbnail = self._html_search_regex(
             r'<img\s+src="([^"]*)".+?name="bg"',
-            webpage, 'thumbnail')
+            webpage, 'thumbnail', default=None)
 
         formats = [{
             'format_id': 'sd',
index b65d8e03f7741a712001099c601ee354830a74a1..10239c906201e460ed288386709dffc5b7f6efbc 100644 (file)
@@ -4,11 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
 
 
 class TheOnionIE(InfoExtractor):
-    _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
+    _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
     _TEST = {
         'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
         'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        article_id = mobj.group('article_id')
-
-        webpage = self._download_webpage(url, article_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
 
         video_id = self._search_regex(
             r'"videoId":\s(\d+),', webpage, 'video ID')
@@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(webpage)
 
         sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
-        if not sources:
-            raise ExtractorError(
-                'No sources found for video %s' % video_id, expected=True)
-
         formats = []
         for src, type_ in sources:
             if type_ == 'video/mp4':
@@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
                 })
             elif type_ == 'application/x-mpegURL':
                 formats.extend(
-                    self._extract_m3u8_formats(src, video_id, preference=-1))
+                    self._extract_m3u8_formats(src, display_id, preference=-1))
             else:
                 self.report_warning(
                     'Encountered unexpected format: %s' % type_)
-
         self._sort_formats(formats)
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'title': title,
             'formats': formats,
             'thumbnail': thumbnail,
index 396cf4e8312ca73f90f45b3e24f3fb3561f54fa8..73077a312549f6b883fdf549a2b364f6de35db9f 100644 (file)
@@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
         description = self._html_search_meta('description', webpage)
         thumbnail = self._og_search_thumbnail(webpage)
 
-        story_filename = self._search_regex(
-            r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
-        speaker_id = self._search_regex(
-            r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
-        story_id = self._search_regex(
-            r'\.storyId\((\d+)\)', webpage, 'story ID')
-        speaker_type = self._search_regex(
-            r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
-        great_life = self._search_regex(
-            r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
+        embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
+            r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
+            webpage, 'embed params').split(',')]
+
+        (
+            _, speaker_id, story_id, story_duration,
+            speaker_type, great_life, _thumbnail, _has_subtitles,
+            story_filename, _story_order) = embed_params
+
         is_great_life_series = great_life == 'true'
-        duration = int_or_none(self._search_regex(
-            r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
+        duration = int_or_none(story_duration)
 
         # URL building, see: http://www.webofstories.com/scripts/player.js
         ms_prefix = ''