[screenwavemedia] Simplify (#3766)

author Philipp Hagemeister <phihag@phihag.de>

Fri, 12 Dec 2014 01:11:58 +0000 (02:11 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Fri, 12 Dec 2014 01:11:58 +0000 (02:11 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Fri, 12 Dec 2014 01:11:58 +0000 (02:11 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Fri, 12 Dec 2014 01:11:58 +0000 (02:11 +0100)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 248367039854ed99c32fdc51fe76d5bce02f7509..31531855e8e43294c155c752cda3e0c397e0921a 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -622,23 +622,17 @@ class YoutubeDL(object):
                  ie_result['url'], ie_key=ie_result.get('ie_key'),
                  extra_info=extra_info, download=False, process=False)
  
                  ie_result['url'], ie_key=ie_result.get('ie_key'),
                  extra_info=extra_info, download=False, process=False)
  
-            def make_result(embedded_info):
-                new_result = ie_result.copy()
-                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
-                          'entries', 'ie_key', 'duration',
-                          'subtitles', 'annotations', 'format',
-                          'thumbnail', 'thumbnails'):
-                    if f in new_result:
-                        del new_result[f]
-                    if f in embedded_info:
-                        new_result[f] = embedded_info[f]
-                return new_result
-            new_result = make_result(info)
+            new_result = ie_result.copy()
+            for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats',
+                      'entries', 'ie_key', 'duration',
+                      'subtitles', 'annotations', 'format',
+                      'thumbnail', 'thumbnails'):
+                if f in new_result:
+                    del new_result[f]
+                if f in info:
+                    new_result[f] = info[f]
  
              assert new_result.get('_type') != 'url_transparent'
  
              assert new_result.get('_type') != 'url_transparent'
-            if new_result.get('_type') == 'compat_list':
-                new_result['entries'] = [
-                    make_result(e) for e in new_result['entries']]
  
              return self.process_ie_result(
                  new_result, download=download, extra_info=extra_info)
  
              return self.process_ie_result(
                  new_result, download=download, extra_info=extra_info)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 6b7660ab1585b998f6f8d807db6e8a0147b1526b..982a134bf534e0f649df252ddefc0b2fd6df2a45 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -335,7 +335,7 @@ from .savefrom import SaveFromIE
  from .sbs import SBSIE
  from .scivee import SciVeeIE
  from .screencast import ScreencastIE
  from .sbs import SBSIE
  from .scivee import SciVeeIE
  from .screencast import ScreencastIE
-from .screenwavemedia import ScreenwaveMediaIE
+from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
  from .servingsys import ServingSysIE
  from .sexu import SexuIE
  from .sexykarma import SexyKarmaIE
  from .servingsys import ServingSysIE
  from .sexu import SexuIE
  from .sexykarma import SexyKarmaIE
diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py

index cf9b8e0de984b73c8a5230d4b522740b5ee61c84..1081904712806e4add7540c164824e3c68e26029 100644 (file)
--- a/youtube_dl/extractor/screenwavemedia.py
+++ b/youtube_dl/extractor/screenwavemedia.py
@@ -6,109 +6,28 @@ import re
  from .common import InfoExtractor
  from ..utils import (
      ExtractorError,
  from .common import InfoExtractor
  from ..utils import (
      ExtractorError,
-    month_by_name,
      int_or_none,
      int_or_none,
+    month_by_name,
+    unified_strdate,
  )
  
  )
  
-class ScreenwaveMediaIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?' \
-        r':(?P<generic>player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<video_id>.+))' \
-        r'|(?P<cinemassacre>(?:www\.)?cinemassacre\.com/(?P<cm_date_Y>[0-9]{4})/(?P<cm_date_m>[0-9]{2})/(?P<cm_date_d>[0-9]{2})/(?P<cm_display_id>[^?#/]+))' \
-        r'|(?P<teamfourstar>(?:www\.)?teamfourstar\.com/video/(?P<tfs_display_id>[a-z0-9\-]+)/?)' \
-        r')'
-
-    _TESTS = [
-        {
-            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-            'md5': 'fde81fbafaee331785f58cd6c0d46190',
-            'info_dict': {
-                'id': 'Cinemasssacre-19911',
-                'ext': 'mp4',
-                'upload_date': '20121110',
-                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
-                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
-            },
-        },
-        {
-            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            'md5': 'd72f10cd39eac4215048f62ab477a511',
-            'info_dict': {
-                'id': 'Cinemasssacre-521be8ef82b16',
-                'ext': 'mp4',
-                'upload_date': '20131002',
-                'title': 'The Mummy’s Hand (1940)',
-            },
-        }
-    ]
-
-    def _cinemassacre_get_info(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('cm_display_id')
-
-        webpage = self._download_webpage(url, display_id)
-        video_date = mobj.group('cm_date_Y') + mobj.group('cm_date_m') + mobj.group('cm_date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
-        if not mobj:
-            raise ExtractorError('Can\'t extract embed url and video id')
-        playerdata_url = mobj.group('embed_url')
-
-        video_title = self._html_search_regex(
-            r'<title>(?P<title>.+?)\|', webpage, 'title')
-        video_description = self._html_search_regex(
-            r'<div class="entry-content">(?P<description>.+?)</div>',
-            webpage, 'description', flags=re.DOTALL, fatal=False)
-        video_thumbnail = self._og_search_thumbnail(webpage)
-
-        return {
-            'title': video_title,
-            'description': video_description,
-            'upload_date': video_date,
-            'thumbnail': video_thumbnail,
-            '_embed_url': playerdata_url,
-        }
-
-    def _teamfourstar_get_info(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('tfs_display_id')
-        webpage = self._download_webpage(url, display_id)
-
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
-        if not mobj:
-            raise ExtractorError('Can\'t extract embed url and video id')
-        playerdata_url = mobj.group('embed_url')
-
-        video_title = self._html_search_regex(
-            r'<div class="heroheadingtitle">(?P<title>.+?)</div>', webpage, 'title')
-        video_date = self._html_search_regex(
-            r'<div class="heroheadingdate">(?P<date>.+?)</div>', webpage, 'date')
-        mobj = re.match('(?P<month>[A-Z][a-z]+) (?P<day>\d+), (?P<year>\d+)', video_date)
-        video_date = '%04u%02u%02u' % (int(mobj.group('year')), month_by_name(mobj.group('month')), int(mobj.group('day')))
-        video_description = self._html_search_regex(
-            r'<div class="postcontent">(?P<description>.+?)</div>', webpage, 'description', flags=re.DOTALL)
-        video_thumbnail = self._og_search_thumbnail(webpage)
  
  
-        return {
-            'title': video_title,
-            'description': video_description,
-            'upload_date': video_date,
-            'thumbnail': video_thumbnail,
-            '_embed_url': playerdata_url,
-        }
+class ScreenwaveMediaIE(InfoExtractor):
+    _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
  
  
-    def _screenwavemedia_get_info(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if not mobj:
-            raise ExtractorError('Can\'t extract embed url and video id')
-        video_id = mobj.group('video_id')
+    _TESTS = [{
+        'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
+        'only_matching': True,
+    }]
  
  
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
          playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
  
          vidtitle = self._search_regex(
          playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
  
          vidtitle = self._search_regex(
-            r'\'vidtitle\'\s*:\s*"([^\']+)"', playerdata, 'vidtitle').replace('\\/', '/')
+            r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
          vidurl = self._search_regex(
          vidurl = self._search_regex(
-            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
-        pageurl = self._search_regex(
-            r'\'pageurl\'\s*:\s*"([^\']+)"', playerdata, 'pageurl', fatal=False).replace('\\/', '/')
+            r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
  
          videolist_url = None
  
  
          videolist_url = None
  
@@ -134,61 +53,128 @@ class ScreenwaveMediaIE(InfoExtractor):
                  file_ = src.partition(':')[-1]
                  width = int_or_none(video.get('width'))
                  height = int_or_none(video.get('height'))
                  file_ = src.partition(':')[-1]
                  width = int_or_none(video.get('width'))
                  height = int_or_none(video.get('height'))
-                bitrate = int_or_none(video.get('system-bitrate'))
+                bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
                  format = {
                      'url': baseurl + file_,
                      'format_id': src.rpartition('.')[0].rpartition('_')[-1],
                  }
                  if width or height:
                      format.update({
                  format = {
                      'url': baseurl + file_,
                      'format_id': src.rpartition('.')[0].rpartition('_')[-1],
                  }
                  if width or height:
                      format.update({
-                        'tbr': bitrate // 1000 if bitrate else None,
+                        'tbr': bitrate,
                          'width': width,
                          'height': height,
                      })
                  else:
                      format.update({
                          'width': width,
                          'height': height,
                      })
                  else:
                      format.update({
-                        'abr': bitrate // 1000 if bitrate else None,
+                        'abr': bitrate,
                          'vcodec': 'none',
                      })
                  formats.append(format)
                          'vcodec': 'none',
                      })
                  formats.append(format)
-            self._sort_formats(formats)
          else:
              formats = [{
                  'url': vidurl,
              }]
          else:
              formats = [{
                  'url': vidurl,
              }]
+        self._sort_formats(formats)
  
          return {
              'id': video_id,
              'title': vidtitle,
              'formats': formats,
  
          return {
              'id': video_id,
              'title': vidtitle,
              'formats': formats,
-            '_episode_page': pageurl,
          }
  
          }
  
+
+class CinemassacreIE(InfoExtractor):
+    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
+    _TESTS = [
+        {
+            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
+            'info_dict': {
+                'id': 'Cinemassacre-19911',
+                'ext': 'mp4',
+                'upload_date': '20121110',
+                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
+                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
+            },
+        },
+        {
+            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
+            'info_dict': {
+                'id': 'Cinemassacre-521be8ef82b16',
+                'ext': 'mp4',
+                'upload_date': '20131002',
+                'title': 'The Mummy’s Hand (1940)',
+            },
+        }
+    ]
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
  
  
-        swm_info = None
-        site_info = None
+        webpage = self._download_webpage(url, display_id)
  
  
-        if mobj.group('generic'):
-            swm_info = self._screenwavemedia_get_info(url)
-            url = swm_info['_episode_page']
-            mobj = re.match(self._VALID_URL, url)
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+        video_title = self._html_search_regex(
+            r'<title>(?P<title>.+?)\|', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'<div class="entry-content">(?P<description>.+?)</div>',
+            webpage, 'description', flags=re.DOTALL, fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
  
  
-        if mobj:
-            if mobj.group('cinemassacre'):
-                site_info = self._cinemassacre_get_info(url)
-            elif mobj.group('teamfourstar'):
-                site_info = self._teamfourstar_get_info(url)
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
+            'description': video_description,
+            'upload_date': video_date,
+            'thumbnail': video_thumbnail,
+            'url': playerdata_url,
+        }
  
  
-        if not swm_info:
-            if site_info:
-                swm_info = self._screenwavemedia_get_info(site_info['_embed_url'])
  
  
-        if not swm_info:
-            raise ExtractorError("Failed to extract metadata for this URL")
+class TeamFourIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
+    _TEST = {
+        'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
+        'info_dict': {
+            'id': 'TeamFourStar-5292a02f20bfa',
+            'ext': 'mp4',
+            'upload_date': '20130401',
+            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
+            'title': 'A Moment With TFS Episode 4',
+        }
+    }
  
  
-        if site_info:
-            swm_info.update(site_info)
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+
+        video_title = self._html_search_regex(
+            r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
+            webpage, 'title')
+        video_date = unified_strdate(self._html_search_regex(
+            r'<div class="heroheadingdate">(?P<date>.+?)</div>',
+            webpage, 'date', fatal=False))
+        video_description = self._html_search_regex(
+            r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
+            webpage, 'description', fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
  
  
-        return swm_info
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
+            'description': video_description,
+            'upload_date': video_date,
+            'thumbnail': video_thumbnail,
+            'url': playerdata_url,
+        }
author	Philipp Hagemeister <phihag@phihag.de>
	Fri, 12 Dec 2014 01:11:58 +0000 (02:11 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Fri, 12 Dec 2014 01:11:58 +0000 (02:11 +0100)
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/screenwavemedia.py		patch \| blob \| history