]> git.bitcoin.ninja Git - youtube-dl/commitdiff
[screenwavemedia] Simplify (#3766)
authorPhilipp Hagemeister <phihag@phihag.de>
Fri, 12 Dec 2014 01:11:58 +0000 (02:11 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Fri, 12 Dec 2014 01:11:58 +0000 (02:11 +0100)
youtube_dl/YoutubeDL.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/screenwavemedia.py

index 248367039854ed99c32fdc51fe76d5bce02f7509..31531855e8e43294c155c752cda3e0c397e0921a 100755 (executable)
@@ -622,23 +622,17 @@ class YoutubeDL(object):
                 ie_result['url'], ie_key=ie_result.get('ie_key'),
                 extra_info=extra_info, download=False, process=False)
 
                 ie_result['url'], ie_key=ie_result.get('ie_key'),
                 extra_info=extra_info, download=False, process=False)
 
-            def make_result(embedded_info):
-                new_result = ie_result.copy()
-                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
-                          'entries', 'ie_key', 'duration',
-                          'subtitles', 'annotations', 'format',
-                          'thumbnail', 'thumbnails'):
-                    if f in new_result:
-                        del new_result[f]
-                    if f in embedded_info:
-                        new_result[f] = embedded_info[f]
-                return new_result
-            new_result = make_result(info)
+            new_result = ie_result.copy()
+            for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats',
+                      'entries', 'ie_key', 'duration',
+                      'subtitles', 'annotations', 'format',
+                      'thumbnail', 'thumbnails'):
+                if f in new_result:
+                    del new_result[f]
+                if f in info:
+                    new_result[f] = info[f]
 
             assert new_result.get('_type') != 'url_transparent'
 
             assert new_result.get('_type') != 'url_transparent'
-            if new_result.get('_type') == 'compat_list':
-                new_result['entries'] = [
-                    make_result(e) for e in new_result['entries']]
 
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
 
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
index 6b7660ab1585b998f6f8d807db6e8a0147b1526b..982a134bf534e0f649df252ddefc0b2fd6df2a45 100644 (file)
@@ -335,7 +335,7 @@ from .savefrom import SaveFromIE
 from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
-from .screenwavemedia import ScreenwaveMediaIE
+from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
 from .servingsys import ServingSysIE
 from .sexu import SexuIE
 from .sexykarma import SexyKarmaIE
 from .servingsys import ServingSysIE
 from .sexu import SexuIE
 from .sexykarma import SexyKarmaIE
index cf9b8e0de984b73c8a5230d4b522740b5ee61c84..1081904712806e4add7540c164824e3c68e26029 100644 (file)
@@ -6,109 +6,28 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    month_by_name,
     int_or_none,
     int_or_none,
+    month_by_name,
+    unified_strdate,
 )
 
 )
 
-class ScreenwaveMediaIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?' \
-        r':(?P<generic>player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<video_id>.+))' \
-        r'|(?P<cinemassacre>(?:www\.)?cinemassacre\.com/(?P<cm_date_Y>[0-9]{4})/(?P<cm_date_m>[0-9]{2})/(?P<cm_date_d>[0-9]{2})/(?P<cm_display_id>[^?#/]+))' \
-        r'|(?P<teamfourstar>(?:www\.)?teamfourstar\.com/video/(?P<tfs_display_id>[a-z0-9\-]+)/?)' \
-        r')'
-
-    _TESTS = [
-        {
-            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-            'md5': 'fde81fbafaee331785f58cd6c0d46190',
-            'info_dict': {
-                'id': 'Cinemasssacre-19911',
-                'ext': 'mp4',
-                'upload_date': '20121110',
-                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
-                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
-            },
-        },
-        {
-            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            'md5': 'd72f10cd39eac4215048f62ab477a511',
-            'info_dict': {
-                'id': 'Cinemasssacre-521be8ef82b16',
-                'ext': 'mp4',
-                'upload_date': '20131002',
-                'title': 'The Mummy’s Hand (1940)',
-            },
-        }
-    ]
-
-    def _cinemassacre_get_info(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('cm_display_id')
-
-        webpage = self._download_webpage(url, display_id)
-        video_date = mobj.group('cm_date_Y') + mobj.group('cm_date_m') + mobj.group('cm_date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
-        if not mobj:
-            raise ExtractorError('Can\'t extract embed url and video id')
-        playerdata_url = mobj.group('embed_url')
-
-        video_title = self._html_search_regex(
-            r'<title>(?P<title>.+?)\|', webpage, 'title')
-        video_description = self._html_search_regex(
-            r'<div class="entry-content">(?P<description>.+?)</div>',
-            webpage, 'description', flags=re.DOTALL, fatal=False)
-        video_thumbnail = self._og_search_thumbnail(webpage)
-
-        return {
-            'title': video_title,
-            'description': video_description,
-            'upload_date': video_date,
-            'thumbnail': video_thumbnail,
-            '_embed_url': playerdata_url,
-        }
-
-    def _teamfourstar_get_info(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('tfs_display_id')
-        webpage = self._download_webpage(url, display_id)
-
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
-        if not mobj:
-            raise ExtractorError('Can\'t extract embed url and video id')
-        playerdata_url = mobj.group('embed_url')
-
-        video_title = self._html_search_regex(
-            r'<div class="heroheadingtitle">(?P<title>.+?)</div>', webpage, 'title')
-        video_date = self._html_search_regex(
-            r'<div class="heroheadingdate">(?P<date>.+?)</div>', webpage, 'date')
-        mobj = re.match('(?P<month>[A-Z][a-z]+) (?P<day>\d+), (?P<year>\d+)', video_date)
-        video_date = '%04u%02u%02u' % (int(mobj.group('year')), month_by_name(mobj.group('month')), int(mobj.group('day')))
-        video_description = self._html_search_regex(
-            r'<div class="postcontent">(?P<description>.+?)</div>', webpage, 'description', flags=re.DOTALL)
-        video_thumbnail = self._og_search_thumbnail(webpage)
 
 
-        return {
-            'title': video_title,
-            'description': video_description,
-            'upload_date': video_date,
-            'thumbnail': video_thumbnail,
-            '_embed_url': playerdata_url,
-        }
+class ScreenwaveMediaIE(InfoExtractor):
+    _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
 
 
-    def _screenwavemedia_get_info(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if not mobj:
-            raise ExtractorError('Can\'t extract embed url and video id')
-        video_id = mobj.group('video_id')
+    _TESTS = [{
+        'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
+        'only_matching': True,
+    }]
 
 
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
         playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
 
         vidtitle = self._search_regex(
         playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
 
         vidtitle = self._search_regex(
-            r'\'vidtitle\'\s*:\s*"([^\']+)"', playerdata, 'vidtitle').replace('\\/', '/')
+            r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
         vidurl = self._search_regex(
         vidurl = self._search_regex(
-            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
-        pageurl = self._search_regex(
-            r'\'pageurl\'\s*:\s*"([^\']+)"', playerdata, 'pageurl', fatal=False).replace('\\/', '/')
+            r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
 
         videolist_url = None
 
 
         videolist_url = None
 
@@ -134,61 +53,128 @@ class ScreenwaveMediaIE(InfoExtractor):
                 file_ = src.partition(':')[-1]
                 width = int_or_none(video.get('width'))
                 height = int_or_none(video.get('height'))
                 file_ = src.partition(':')[-1]
                 width = int_or_none(video.get('width'))
                 height = int_or_none(video.get('height'))
-                bitrate = int_or_none(video.get('system-bitrate'))
+                bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
                 format = {
                     'url': baseurl + file_,
                     'format_id': src.rpartition('.')[0].rpartition('_')[-1],
                 }
                 if width or height:
                     format.update({
                 format = {
                     'url': baseurl + file_,
                     'format_id': src.rpartition('.')[0].rpartition('_')[-1],
                 }
                 if width or height:
                     format.update({
-                        'tbr': bitrate // 1000 if bitrate else None,
+                        'tbr': bitrate,
                         'width': width,
                         'height': height,
                     })
                 else:
                     format.update({
                         'width': width,
                         'height': height,
                     })
                 else:
                     format.update({
-                        'abr': bitrate // 1000 if bitrate else None,
+                        'abr': bitrate,
                         'vcodec': 'none',
                     })
                 formats.append(format)
                         'vcodec': 'none',
                     })
                 formats.append(format)
-            self._sort_formats(formats)
         else:
             formats = [{
                 'url': vidurl,
             }]
         else:
             formats = [{
                 'url': vidurl,
             }]
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': vidtitle,
             'formats': formats,
 
         return {
             'id': video_id,
             'title': vidtitle,
             'formats': formats,
-            '_episode_page': pageurl,
         }
 
         }
 
+
+class CinemassacreIE(InfoExtractor):
+    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
+    _TESTS = [
+        {
+            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
+            'info_dict': {
+                'id': 'Cinemassacre-19911',
+                'ext': 'mp4',
+                'upload_date': '20121110',
+                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
+                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
+            },
+        },
+        {
+            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
+            'info_dict': {
+                'id': 'Cinemassacre-521be8ef82b16',
+                'ext': 'mp4',
+                'upload_date': '20131002',
+                'title': 'The Mummy’s Hand (1940)',
+            },
+        }
+    ]
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
 
 
-        swm_info = None
-        site_info = None
+        webpage = self._download_webpage(url, display_id)
 
 
-        if mobj.group('generic'):
-            swm_info = self._screenwavemedia_get_info(url)
-            url = swm_info['_episode_page']
-            mobj = re.match(self._VALID_URL, url)
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+        video_title = self._html_search_regex(
+            r'<title>(?P<title>.+?)\|', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'<div class="entry-content">(?P<description>.+?)</div>',
+            webpage, 'description', flags=re.DOTALL, fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
 
 
-        if mobj:
-            if mobj.group('cinemassacre'):
-                site_info = self._cinemassacre_get_info(url)
-            elif mobj.group('teamfourstar'):
-                site_info = self._teamfourstar_get_info(url)
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
+            'description': video_description,
+            'upload_date': video_date,
+            'thumbnail': video_thumbnail,
+            'url': playerdata_url,
+        }
 
 
-        if not swm_info:
-            if site_info:
-                swm_info = self._screenwavemedia_get_info(site_info['_embed_url'])
 
 
-        if not swm_info:
-            raise ExtractorError("Failed to extract metadata for this URL")
+class TeamFourIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
+    _TEST = {
+        'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
+        'info_dict': {
+            'id': 'TeamFourStar-5292a02f20bfa',
+            'ext': 'mp4',
+            'upload_date': '20130401',
+            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
+            'title': 'A Moment With TFS Episode 4',
+        }
+    }
 
 
-        if site_info:
-            swm_info.update(site_info)
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+
+        video_title = self._html_search_regex(
+            r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
+            webpage, 'title')
+        video_date = unified_strdate(self._html_search_regex(
+            r'<div class="heroheadingdate">(?P<date>.+?)</div>',
+            webpage, 'date', fatal=False))
+        video_description = self._html_search_regex(
+            r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
+            webpage, 'description', fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
 
 
-        return swm_info
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
+            'description': video_description,
+            'upload_date': video_date,
+            'thumbnail': video_thumbnail,
+            'url': playerdata_url,
+        }