_ Git - youtube-dl/blob - youtube_dl/extractor/newgrounds.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     extract_attributes,
   8     int_or_none,
   9 )
  10
  11
  12 class NewgroundsIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
  14     _TESTS = [{
  15         'url': 'https://www.newgrounds.com/audio/listen/549479',
  16         'md5': 'fe6033d297591288fa1c1f780386f07a',
  17         'info_dict': {
  18             'id': '549479',
  19             'ext': 'mp3',
  20             'title': 'B7 - BusMode',
  21             'uploader': 'Burn7',
  22         }
  23     }, {
  24         'url': 'https://www.newgrounds.com/portal/view/673111',
  25         'md5': '3394735822aab2478c31b1004fe5e5bc',
  26         'info_dict': {
  27             'id': '673111',
  28             'ext': 'mp4',
  29             'title': 'Dancin',
  30             'uploader': 'Squirrelman82',
  31         },
  32     }, {
  33         # source format unavailable, additional mp4 formats
  34         'url': 'http://www.newgrounds.com/portal/view/689400',
  35         'info_dict': {
  36             'id': '689400',
  37             'ext': 'mp4',
  38             'title': 'ZTV News Episode 8',
  39             'uploader': 'BennettTheSage',
  40         },
  41         'params': {
  42             'skip_download': True,
  43         },
  44     }]
  45
  46     def _real_extract(self, url):
  47         media_id = self._match_id(url)
  48
  49         webpage = self._download_webpage(url, media_id)
  50
  51         title = self._html_search_regex(
  52             r'<title>([^>]+)</title>', webpage, 'title')
  53
  54         video_url = self._parse_json(self._search_regex(
  55             r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
  56
  57         formats = [{
  58             'url': video_url,
  59             'format_id': 'source',
  60             'quality': 1,
  61         }]
  62
  63         max_resolution = int_or_none(self._search_regex(
  64             r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
  65             default=None))
  66         if max_resolution:
  67             url_base = video_url.rpartition('.')[0]
  68             for resolution in (360, 720, 1080):
  69                 if resolution > max_resolution:
  70                     break
  71                 formats.append({
  72                     'url': '%s.%dp.mp4' % (url_base, resolution),
  73                     'format_id': '%dp' % resolution,
  74                     'height': resolution,
  75                 })
  76
  77         self._check_formats(formats, media_id)
  78         self._sort_formats(formats)
  79
  80         uploader = self._html_search_regex(
  81             r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
  82             fatal=False)
  83
  84         return {
  85             'id': media_id,
  86             'title': title,
  87             'uploader': uploader,
  88             'formats': formats,
  89         }
  90
  91
  92 class NewgroundsPlaylistIE(InfoExtractor):
  93     _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
  94     _TESTS = [{
  95         'url': 'https://www.newgrounds.com/collection/cats',
  96         'info_dict': {
  97             'id': 'cats',
  98             'title': 'Cats',
  99         },
 100         'playlist_mincount': 46,
 101     }, {
 102         'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
 103         'info_dict': {
 104             'id': 'ZONE-SAMA',
 105             'title': 'Portal Search: ZONE-SAMA',
 106         },
 107         'playlist_mincount': 47,
 108     }, {
 109         'url': 'http://www.newgrounds.com/audio/search/title/cats',
 110         'only_matching': True,
 111     }]
 112
 113     def _real_extract(self, url):
 114         playlist_id = self._match_id(url)
 115
 116         webpage = self._download_webpage(url, playlist_id)
 117
 118         title = self._search_regex(
 119             r'<title>([^>]+)</title>', webpage, 'title', default=None)
 120
 121         # cut left menu
 122         webpage = self._search_regex(
 123             r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
 124             webpage, 'wide column', default=webpage)
 125
 126         entries = []
 127         for a, path, media_id in re.findall(
 128                 r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
 129                 webpage):
 130             a_class = extract_attributes(a).get('class')
 131             if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
 132                 continue
 133             entries.append(
 134                 self.url_result(
 135                     'https://www.newgrounds.com/%s' % path,
 136                     ie=NewgroundsIE.ie_key(), video_id=media_id))
 137
 138         return self.playlist_result(entries, playlist_id, title)