From: Philipp Hagemeister Date: Sun, 5 May 2013 18:51:27 +0000 (+0200) Subject: Merge remote-tracking branch 'origin/master' X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=02d5eb935f020241ff33b4b91ba88b92eee66b97;p=youtube-dl Merge remote-tracking branch 'origin/master' Conflicts: youtube_dl/InfoExtractors.py --- 02d5eb935f020241ff33b4b91ba88b92eee66b97 diff --cc youtube_dl/InfoExtractors.py index 54a37f266,0f1252113..f4e7065d1 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@@ -1520,27 -1497,36 +1480,26 @@@ class GoogleSearchIE(InfoExtractor) def _download_n_results(self, query, n): """Downloads a specified number of results for a query""" - video_ids = [] - pagenum = 0 + res = { + '_type': 'playlist', + 'id': query, + 'entries': [] + } - while True: - self.report_download_page(query, pagenum) - result_url = self._TEMPLATE_URL % (compat_urllib_parse.quote_plus(query), pagenum*10) - request = compat_urllib_request.Request(result_url) - try: - page = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to download webpage: %s' % compat_str(err)) - - # Extract video identifiers - for mobj in re.finditer(self._VIDEO_INDICATOR, page): - video_id = mobj.group(1) - if video_id not in video_ids: - video_ids.append(video_id) - if len(video_ids) == n: - # Specified n videos reached - for id in video_ids: - self._downloader.download(['http://video.google.com/videoplay?docid=%s' % id]) - return - - if re.search(self._MORE_PAGES_INDICATOR, page) is None: - for id in video_ids: - self._downloader.download(['http://video.google.com/videoplay?docid=%s' % id]) - return + for pagenum in itertools.count(1): + result_url = u'http://video.google.com/videosearch?q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10) + webpage = self._download_webpage(result_url, u'gvsearch:' + query, + note='Downloading result page ' + str(pagenum)) - # Extract video identifiers - pagenum = pagenum + 1 + for mobj in re.finditer(r'

n) or not re.search(self._MORE_PAGES_INDICATOR, webpage): + return res class YahooSearchIE(InfoExtractor): """Information Extractor for Yahoo! Video search queries."""