_ Git - youtube-dl/blob - youtube_dl/extractor/myspace.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import json
   6
   7 from .common import InfoExtractor
   8 from ..compat import (
   9     compat_str,
  10 )
  11 from ..utils import ExtractorError
  12
  13
  14 class MySpaceIE(InfoExtractor):
  15     _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
  16
  17     _TESTS = [
  18         {
  19             'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
  20             'info_dict': {
  21                 'id': '109594919',
  22                 'ext': 'flv',
  23                 'title': 'Little Big Town',
  24                 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
  25                 'uploader': 'Five Minutes to the Stage',
  26                 'uploader_id': 'fiveminutestothestage',
  27             },
  28             'params': {
  29                 # rtmp download
  30                 'skip_download': True,
  31             },
  32         },
  33         # songs
  34         {
  35             'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
  36             'info_dict': {
  37                 'id': '93388656',
  38                 'ext': 'flv',
  39                 'playlist': 'The Demo',
  40                 'title': 'Of weakened soul...',
  41                 'uploader': 'Killsorrow',
  42                 'uploader_id': 'killsorrow',
  43             },
  44             'params': {
  45                 # rtmp download
  46                 'skip_download': True,
  47             },
  48         }, {
  49             'add_ie': ['Vevo'],
  50             'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
  51             'info_dict': {
  52                 'id': 'USZM20600099',
  53                 'ext': 'mp4',
  54                 'title': 'Animal I Have Become',
  55                 'uploader': 'Three Days Grace',
  56                 'timestamp': int,
  57                 'upload_date': '20060502',
  58             },
  59             'skip': 'VEVO is only available in some countries',
  60         }, {
  61             'add_ie': ['Youtube'],
  62             'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
  63             'info_dict': {
  64                 'id': 'ypWvQgnJrSU',
  65                 'ext': 'mp4',
  66                 'title': 'Starset - First Light',
  67                 'description': 'md5:2d5db6c9d11d527683bcda818d332414',
  68                 'uploader': 'Jacob Soren',
  69                 'uploader_id': 'SorenPromotions',
  70                 'upload_date': '20140725',
  71             }
  72         },
  73     ]
  74
  75     def _real_extract(self, url):
  76         mobj = re.match(self._VALID_URL, url)
  77         video_id = mobj.group('id')
  78         webpage = self._download_webpage(url, video_id)
  79         player_url = self._search_regex(
  80             r'playerSwf":"([^"?]*)', webpage, 'player URL')
  81
  82         if mobj.group('mediatype').startswith('music/song'):
  83             # songs don't store any useful info in the 'context' variable
  84             song_data = self._search_regex(
  85                 r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
  86                 webpage, 'song_data', default=None, group=0)
  87             if song_data is None:
  88                 self.to_screen(
  89                     '%s: No downloadable song on this page' % video_id)
  90                 return
  91             def search_data(name):
  92                 return self._search_regex(
  93                     r'''data-%s=([\'"])(.*?)\1''' % name,
  94                     song_data, name, default='', group=2)
  95             streamUrl = search_data('stream-url')
  96             if not streamUrl:
  97                 vevo_id = search_data('vevo-id')
  98                 youtube_id = search_data('youtube-id')
  99                 if vevo_id:
 100                     self.to_screen('Vevo video detected: %s' % vevo_id)
 101                     return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
 102                 elif youtube_id:
 103                     self.to_screen('Youtube video detected: %s' % youtube_id)
 104                     return self.url_result(youtube_id, ie='Youtube')
 105                 else:
 106                     raise ExtractorError(
 107                         'Found song but don\'t know how to download it')
 108             info = {
 109                 'id': video_id,
 110                 'title': self._og_search_title(webpage),
 111                 'uploader': search_data('artist-name'),
 112                 'uploader_id': search_data('artist-username'),
 113                 'playlist': search_data('album-title'),
 114                 'thumbnail': self._og_search_thumbnail(webpage),
 115             }
 116         else:
 117             context = json.loads(self._search_regex(
 118                 r'context = ({.*?});', webpage, 'context'))
 119             video = context['video']
 120             streamUrl = video['streamUrl']
 121             info = {
 122                 'id': compat_str(video['mediaId']),
 123                 'title': video['title'],
 124                 'description': video['description'],
 125                 'thumbnail': video['imageUrl'],
 126                 'uploader': video['artistName'],
 127                 'uploader_id': video['artistUsername'],
 128             }
 129
 130         rtmp_url, play_path = streamUrl.split(';', 1)
 131         info.update({
 132             'url': rtmp_url,
 133             'play_path': play_path,
 134             'player_url': player_url,
 135             'ext': 'flv',
 136         })
 137         return info
 138
 139
 140 class MySpaceAlbumIE(InfoExtractor):
 141     IE_NAME = 'MySpace:album'
 142     _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
 143
 144     _TESTS = [{
 145         'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
 146         'info_dict': {
 147             'title': 'Transmissions',
 148             'id': '19455773',
 149         },
 150         'playlist_count': 14,
 151         'skip': 'this album is only available in some countries',
 152     }, {
 153         'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
 154         'info_dict': {
 155             'title': 'The Demo',
 156             'id': '18596029',
 157         },
 158         'playlist_count': 5,
 159     }]
 160
 161     def _real_extract(self, url):
 162         mobj = re.match(self._VALID_URL, url)
 163         playlist_id = mobj.group('id')
 164         display_id = mobj.group('title') + playlist_id
 165         webpage = self._download_webpage(url, display_id)
 166         tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
 167         if not tracks_paths:
 168             self.to_screen('%s: No songs found, try using proxy' % display_id)
 169             return
 170         entries = [
 171             self.url_result(t_path, ie=MySpaceIE.ie_key())
 172             for t_path in tracks_paths]
 173         title = self._og_search_title(webpage)
 174         return {
 175             '_type': 'playlist',
 176             'id': playlist_id,
 177             'display_id': display_id,
 178             'title': title,
 179             'entries': entries,
 180         }