_ Git - youtube-dl/blob - youtube_dl/extractor/myspace.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4 import json
   5
   6 from .common import InfoExtractor
   7 from ..compat import (
   8     compat_str,
   9 )
  10 from ..utils import ExtractorError
  11
  12
  13 class MySpaceIE(InfoExtractor):
  14     _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
  15
  16     _TESTS = [
  17         {
  18             'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
  19             'info_dict': {
  20                 'id': '100008689',
  21                 'ext': 'flv',
  22                 'title': 'Viva La Vida',
  23                 'description': 'The official Viva La Vida video, directed by Hype Williams',
  24                 'uploader': 'Coldplay',
  25                 'uploader_id': 'coldplay',
  26             },
  27             'params': {
  28                 # rtmp download
  29                 'skip_download': True,
  30             },
  31         },
  32         # songs
  33         {
  34             'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
  35             'md5': 'f1d7323321f6b7775bf1e3754c1707dc',
  36             'info_dict': {
  37                 'id': '93388656',
  38                 'ext': 'flv',
  39                 'playlist': 'The Demo',
  40                 'title': 'Of weakened soul...',
  41                 'uploader': 'Killsorrow',
  42                 'uploader_id': 'killsorrow',
  43             },
  44             'params': {
  45                 # rtmp download
  46                 'skip_download': True,
  47             },
  48         }, {
  49             'add_ie': ['Vevo'],
  50             'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
  51             'info_dict': {
  52                 'id': u'USZM20600099',
  53                 'title': u'Animal I Have Become',
  54                 'uploader': u'Three Days Grace',
  55                 'timestamp': int,
  56             },
  57             'skip': 'VEVO is only available in some countries',
  58         }, {
  59             'add_ie': ['Youtube'],
  60             'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
  61             'info_dict': {
  62                 'id': 'ypWvQgnJrSU',
  63                 'title': 'Starset - First Light',
  64                 'uploader': 'Jacob Soren',
  65                 'uploader_id': 'SorenPromotions',
  66                 'upload_date': '20140725',
  67             }
  68         },
  69     ]
  70
  71     def _real_extract(self, url):
  72         mobj = re.match(self._VALID_URL, url)
  73         video_id = mobj.group('id')
  74         webpage = self._download_webpage(url, video_id)
  75         player_url = self._search_regex(
  76             r'playerSwf":"([^"?]*)', webpage, 'player URL')
  77
  78         if mobj.group('mediatype').startswith('music/song'):
  79             # songs don't store any useful info in the 'context' variable
  80             song_data = self._search_regex(
  81                 r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
  82                 webpage, 'song_data', default=None, group=0)
  83             if song_data is None:
  84                 self.to_screen(
  85                     '%s: No downloadable song on this page' % video_id)
  86                 return
  87             def search_data(name):
  88                 return self._search_regex(
  89                     r'''data-%s=([\'"])(.*?)\1''' % name,
  90                     song_data, name, default='', group=2)
  91             streamUrl = search_data('stream-url')
  92             if not streamUrl:
  93                 vevo_id = search_data('vevo-id')
  94                 youtube_id = search_data('youtube-id')
  95                 if vevo_id:
  96                     self.to_screen('Vevo video detected: %s' % vevo_id)
  97                     return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
  98                 elif youtube_id:
  99                     self.to_screen('Youtube video detected: %s' % youtube_id)
 100                     return self.url_result(youtube_id, ie='Youtube')
 101                 else:
 102                     raise ExtractorError(
 103                         'Found song but don\'t know how to download it')
 104             info = {
 105                 'id': video_id,
 106                 'title': self._og_search_title(webpage),
 107                 'uploader': search_data('artist-name'),
 108                 'uploader_id': search_data('artist-username'),
 109                 'playlist': search_data('album-title'),
 110                 'thumbnail': self._og_search_thumbnail(webpage),
 111             }
 112         else:
 113             context = json.loads(self._search_regex(
 114                 r'context = ({.*?});', webpage, 'context'))
 115             video = context['video']
 116             streamUrl = video['streamUrl']
 117             info = {
 118                 'id': compat_str(video['mediaId']),
 119                 'title': video['title'],
 120                 'description': video['description'],
 121                 'thumbnail': video['imageUrl'],
 122                 'uploader': video['artistName'],
 123                 'uploader_id': video['artistUsername'],
 124             }
 125
 126         rtmp_url, play_path = streamUrl.split(';', 1)
 127         info.update({
 128             'url': rtmp_url,
 129             'play_path': play_path,
 130             'player_url': player_url,
 131             'ext': 'flv',
 132         })
 133         return info