_ Git - youtube-dl/blob - youtube_dl/extractor/freesound.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     determine_ext,
   8     float_or_none,
   9     get_element_by_class,
  10     get_element_by_id,
  11     int_or_none,
  12     parse_filesize,
  13     unified_strdate,
  14 )
  15
  16
  17 class FreesoundIE(InfoExtractor):
  18     _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)'
  19     _TEST = {
  20         'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
  21         'md5': '12280ceb42c81f19a515c745eae07650',
  22         'info_dict': {
  23             'id': '194503',
  24             'ext': 'mp3',
  25             'title': 'gulls in the city.wav',
  26             'uploader': 'miklovan',
  27             'description': 'the sounds of seagulls in the city',
  28         }
  29     }
  30
  31     def _real_extract(self, url):
  32         mobj = re.match(self._VALID_URL, url)
  33         music_id = mobj.group('id')
  34         webpage = self._download_webpage(url, music_id)
  35
  36         audio_url = self._og_search_property('audio', webpage, 'song url')
  37         title = self._og_search_property('audio:title', webpage, 'song title')
  38         duration = float_or_none(get_element_by_class('duration', webpage), scale=1000)
  39         tags = get_element_by_class('tags', webpage)
  40         sound_info = get_element_by_id('sound_information_box', webpage)
  41         release_date = get_element_by_id('sound_date', webpage)
  42
  43         description = self._html_search_regex(
  44             r'<div id="sound_description">(.*?)</div>', webpage, 'description',
  45             fatal=False, flags=re.DOTALL)
  46
  47         download_count = int_or_none(self._html_search_regex(
  48             r'Downloaded.*>(\d+)<', webpage, 'downloaded', fatal=False))
  49
  50         filesize = float_or_none(parse_filesize(self._search_regex(
  51             r'Filesize</dt><dd>(.*)</dd>', sound_info, 'file size (approx)', fatal=False)))
  52
  53         if release_date:
  54             release_date = unified_strdate(release_date.replace('th', ''))
  55
  56         bitdepth = self._html_search_regex(
  57             r'Bitdepth</dt><dd>(.*)</dd>', sound_info, 'Bitdepth', fatal=False)
  58
  59         channels = self._html_search_regex(
  60             r'Channels</dt><dd>(.*)</dd>', sound_info, 'Channels info', fatal=False)
  61
  62         formats = [{
  63             'url': audio_url,
  64             'id': music_id,
  65             'format_id': self._og_search_property('audio:type', webpage, 'audio format', fatal=False),
  66             'format_note': '{0} {1} {2}'.format(determine_ext(audio_url), bitdepth, channels),
  67             'filesize_approx': filesize,
  68             'asr': int_or_none(self._html_search_regex(
  69                 r'Samplerate</dt><dd>(\d+).*</dd>',
  70                 sound_info, 'samplerate', fatal=False)),
  71         }]
  72
  73         return {
  74             'id': music_id,
  75             'title': title,
  76             'uploader': self._og_search_property('audio:artist', webpage, 'music uploader', fatal=False),
  77             'description': description,
  78             'duration': duration,
  79             'tags': [self._html_search_regex(r'>(.*)</a>', t, 'tag', fatal=False)
  80                      for t in tags.split('\n') if t.strip()],
  81             'formats': formats,
  82             'release_date': release_date,
  83             'likes_count': download_count,
  84         }