_ Git - youtube-dl/blob - youtube_dl/extractor/toypics.py

   1 from .common import InfoExtractor
   2 import re
   3
   4
   5 class ToypicsIE(InfoExtractor):
   6     IE_DESC = 'Toypics user profile'
   7     _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
   8     _TEST = {
   9         'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
  10         'md5': '16e806ad6d6f58079d210fe30985e08b',
  11         'info_dict': {
  12             'id': '514',
  13             'ext': 'mp4',
  14             'title': 'Chance-Bulge\'d, 2',
  15             'age_limit': 18,
  16             'uploader': 'kidsune',
  17         }
  18     }
  19
  20     def _real_extract(self, url):
  21         mobj = re.match(self._VALID_URL, url)
  22         video_id = mobj.group('id')
  23         page = self._download_webpage(url, video_id)
  24         video_url = self._html_search_regex(
  25             r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
  26         title = self._html_search_regex(
  27             r'<title>Toypics - ([^<]+)</title>', page, 'title')
  28         username = self._html_search_regex(
  29             r'toypics.net/([^/"]+)" class="user-name">', page, 'username')
  30         return {
  31             'id': video_id,
  32             'url': video_url,
  33             'title': title,
  34             'uploader': username,
  35             'age_limit': 18,
  36         }
  37
  38
  39 class ToypicsUserIE(InfoExtractor):
  40     IE_DESC = 'Toypics user profile'
  41     _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
  42
  43     def _real_extract(self, url):
  44         mobj = re.match(self._VALID_URL, url)
  45         username = mobj.group('username')
  46
  47         profile_page = self._download_webpage(
  48             url, username, note='Retrieving profile page')
  49
  50         video_count = int(self._search_regex(
  51             r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
  52             'video count'))
  53
  54         PAGE_SIZE = 8
  55         urls = []
  56         page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
  57         for n in range(1, page_count + 1):
  58             lpage_url = url + '/public/%d' % n
  59             lpage = self._download_webpage(
  60                 lpage_url, username,
  61                 note='Downloading page %d/%d' % (n, page_count))
  62             urls.extend(
  63                 re.findall(
  64                     r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
  65                     lpage))
  66
  67         return {
  68             '_type': 'playlist',
  69             'id': username,
  70             'entries': [{
  71                 '_type': 'url',
  72                 'url': eurl,
  73                 'ie_key': 'Toypics',
  74             } for eurl in urls]
  75         }