[theonion] Modernize
authorPhilipp Hagemeister <phihag@phihag.de>
Thu, 19 Feb 2015 03:12:40 +0000 (04:12 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 19 Feb 2015 03:12:40 +0000 (04:12 +0100)
youtube_dl/extractor/theonion.py

index b65d8e03f7741a712001099c601ee354830a74a1..10239c906201e460ed288386709dffc5b7f6efbc 100644 (file)
@@ -4,11 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
 
 
 class TheOnionIE(InfoExtractor):
-    _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
+    _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
     _TEST = {
         'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
         'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        article_id = mobj.group('article_id')
-
-        webpage = self._download_webpage(url, article_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
 
         video_id = self._search_regex(
             r'"videoId":\s(\d+),', webpage, 'video ID')
@@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(webpage)
 
         sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
-        if not sources:
-            raise ExtractorError(
-                'No sources found for video %s' % video_id, expected=True)
-
         formats = []
         for src, type_ in sources:
             if type_ == 'video/mp4':
@@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
                 })
             elif type_ == 'application/x-mpegURL':
                 formats.extend(
-                    self._extract_m3u8_formats(src, video_id, preference=-1))
+                    self._extract_m3u8_formats(src, display_id, preference=-1))
             else:
                 self.report_warning(
                     'Encountered unexpected format: %s' % type_)
-
         self._sort_formats(formats)
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'title': title,
             'formats': formats,
             'thumbnail': thumbnail,