X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=e68657314ecde5406ec2d27fef005f899341daf1;hb=a816da0dc31914ae4de2f9bf80e84c035168e45d;hp=11b31db88422229b37c85a96ed1df3746867bf5a;hpb=feb72212091189353c0d6308fa20e4f33cc82da1;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 11b31db88..e68657314 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,11 +1,12 @@ import base64 import hashlib import json +import netrc import os import re import socket import sys -import netrc +import time import xml.etree.ElementTree from ..utils import ( @@ -92,8 +93,12 @@ class InfoExtractor(object): unique, but available before title. Typically, id is something like "4234987", title "Dancing naked mole rats", and display_id "dancing-naked-mole-rats" - thumbnails: A list of dictionaries (with the entries "resolution" and - "url") for the varying thumbnails + thumbnails: A list of dictionaries, with the following entries: + * "url" + * "width" (optional, int) + * "height" (optional, int) + * "resolution" (optional, string "{width}x{height"}, + deprecated) thumbnail: Full URL to a video thumbnail image. description: One-line video description. uploader: Full name of the video uploader. @@ -113,6 +118,8 @@ class InfoExtractor(object): webpage_url: The url to the video webpage, if given to youtube-dl it should allow to get the same result again. (It will be set by YoutubeDL if it's missing) + categories: A list of categories that the video falls in, for example + ["Sports", "Berlin"] Unless mentioned otherwise, the fields should be Unicode strings. @@ -242,7 +249,7 @@ class InfoExtractor(object): url = url_or_request.get_full_url() except AttributeError: url = url_or_request - basen = video_id + '_' + url + basen = '%s_%s' % (video_id, url) if len(basen) > 240: h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest() basen = basen[:240 - len(h)] + h @@ -453,14 +460,17 @@ class InfoExtractor(object): if secure: regexes = self._og_regexes('video:secure_url') + regexes return self._html_search_regex(regexes, html, name, **kargs) - def _html_search_meta(self, name, html, display_name=None, fatal=False): + def _og_search_url(self, html, **kargs): + return self._og_search_property('url', html, **kargs) + + def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): if display_name is None: display_name = name return self._html_search_regex( r'''(?ix)]+(?:itemprop|name|property)=["\']%s["\']) [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), - html, display_name, fatal=fatal) + html, display_name, fatal=fatal, **kwargs) def _dc_search_uploader(self, html): return self._html_search_meta('dc.creator', html, 'uploader') @@ -566,6 +576,13 @@ class InfoExtractor(object): else: return url + def _sleep(self, timeout, video_id, msg_template=None): + if msg_template is None: + msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' + msg = msg_template % {'video_id': video_id, 'timeout': timeout} + self.to_screen(msg) + time.sleep(timeout) + class SearchInfoExtractor(InfoExtractor): """ @@ -609,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor): @property def SEARCH_KEY(self): return self._SEARCH_KEY -