import datetime
+import itertools
import json
import re
-from .common import InfoExtractor
+from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
+ compat_urllib_parse,
+
ExtractorError,
)
class YahooIE(InfoExtractor):
- """Information extractor for screen.yahoo.com."""
+ IE_DESC = u'Yahoo screen'
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
+ _TEST = {
+ u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
+ u'file': u'214727115.flv',
+ u'md5': u'2e717f169c1be93d84d3794a00d4a325',
+ u'info_dict': {
+ u"title": u"Julian Smith & Travis Legg Watch Julian Smith"
+ },
+ u'skip': u'Requires rtmpdump'
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
'ext': 'flv',
}
return info_dict
+
+class YahooSearchIE(SearchInfoExtractor):
+ IE_DESC = u'Yahoo screen search'
+ _MAX_RESULTS = 1000
+ IE_NAME = u'screen.yahoo:search'
+ _SEARCH_KEY = 'yvsearch'
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+
+ res = {
+ '_type': 'playlist',
+ 'id': query,
+ 'entries': []
+ }
+ for pagenum in itertools.count(0):
+ result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
+ webpage = self._download_webpage(result_url, query,
+ note='Downloading results page '+str(pagenum+1))
+ info = json.loads(webpage)
+ m = info[u'm']
+ results = info[u'results']
+
+ for (i, r) in enumerate(results):
+ if (pagenum * 30) +i >= n:
+ break
+ mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
+ e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
+ res['entries'].append(e)
+ if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
+ break
+
+ return res