From: Filippo Valsorda Date: Fri, 29 Mar 2013 15:14:49 +0000 (+0100) Subject: Merge pull request #730 by @JohnyMoSwag X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=46b9d8295d43bb4a5954da95293ef2490eb28b5e;hp=-c Merge pull request #730 by @JohnyMoSwag Support for Worldstarhiphop.com --- 46b9d8295d43bb4a5954da95293ef2490eb28b5e diff --combined test/tests.json index 929d454ff,4190c5387..0c94c65bd --- a/test/tests.json +++ b/test/tests.json @@@ -294,30 -294,13 +294,39 @@@ "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" } }, + { + "name": "Generic", + "url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html", + "file": "13601338388002.mp4", + "md5": "85b90ccc9d73b4acd9138d3af4c27f89" + }, + { + "name": "Spiegel", + "url": "http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html", + "file": "1259285.mp4", + "md5": "2c2754212136f35fb4b19767d242f66e", + "info_dict": { + "title": "Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" + } + }, + { + "name": "LiveLeak", + "md5": "0813c2430bea7a46bf13acf3406992f4", + "url": "http://www.liveleak.com/view?i=757_1364311680", + "file": "757_1364311680.mp4", + "info_dict": { + "title": "Most unlucky car accident", + "description": "extremely bad day for this guy..!", + "uploader": "ljfriel2" + } ++ }, + { + "name": "WorldStarHipHop", + "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO", + "file": "wshh6a7q1ny0G34ZwuIO.mp4", + "md5": "9d04de741161603bf7071bbf4e883186", + "info_dict": { + "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick! " + } } ] diff --combined youtube_dl/InfoExtractors.py index b3c3dbb43,a31aa759e..b4c86cfa3 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@@ -48,7 -48,7 +48,7 @@@ class InfoExtractor(object) uploader_id: Nickname or id of the video uploader. location: Physical location of the video. player_url: SWF Player URL (used for rtmpdump). - subtitles: The .srt file contents. + subtitles: The subtitle file contents. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen @@@ -126,14 -126,8 +126,14 @@@ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): """ Returns the data of the page as a string """ urlh = self._request_webpage(url_or_request, video_id, note, errnote) + content_type = urlh.headers.get('Content-Type', '') + m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) + if m: + encoding = m.group(1) + else: + encoding = 'utf-8' webpage_bytes = urlh.read() - return webpage_bytes.decode('utf-8', 'replace') + return webpage_bytes.decode(encoding, 'replace') class YoutubeIE(InfoExtractor): @@@ -224,16 -218,7 +224,16 @@@ def report_video_subtitles_download(self, video_id): """Report attempt to download video info webpage.""" - self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id) + self._downloader.to_screen(u'[youtube] %s: Checking available subtitles' % video_id) + + def report_video_subtitles_request(self, video_id, sub_lang, format): + """Report attempt to download video info webpage.""" + self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format)) + + def report_video_subtitles_available(self, video_id, sub_lang_list): + """Report available subtitles.""" + sub_lang = ",".join(list(sub_lang_list.keys())) + self._downloader.to_screen(u'[youtube] %s: Available subtitles for video: %s' % (video_id, sub_lang)) def report_information_extraction(self, video_id): """Report attempt to extract video information.""" @@@ -247,63 -232,55 +247,63 @@@ """Indicate the download will use the RTMP protocol.""" self._downloader.to_screen(u'[youtube] RTMP download detected') - def _closed_captions_xml_to_srt(self, xml_string): - srt = '' - texts = re.findall(r'([^<]+)', xml_string, re.MULTILINE) - # TODO parse xml instead of regex - for n, (start, dur_tag, dur, caption) in enumerate(texts): - if not dur: dur = '4' - start = float(start) - end = start + float(dur) - start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000) - end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000) - caption = unescapeHTML(caption) - caption = unescapeHTML(caption) # double cycle, intentional - srt += str(n+1) + '\n' - srt += start + ' --> ' + end + '\n' - srt += caption + '\n\n' - return srt - - def _extract_subtitles(self, video_id): + def _get_available_subtitles(self, video_id): self.report_video_subtitles_download(video_id) request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) try: - srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8') + sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) - srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) - srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) - if not srt_lang_list: - return (u'WARNING: video has no closed captions', None) - if self._downloader.params.get('subtitleslang', False): - srt_lang = self._downloader.params.get('subtitleslang') - elif 'en' in srt_lang_list: - srt_lang = 'en' - else: - srt_lang = list(srt_lang_list.keys())[0] - if not srt_lang in srt_lang_list: - return (u'WARNING: no closed captions found in the specified language', None) + sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) + sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) + if not sub_lang_list: + return (u'WARNING: video doesn\'t have subtitles', None) + return sub_lang_list + + def _list_available_subtitles(self, video_id): + sub_lang_list = self._get_available_subtitles(video_id) + self.report_video_subtitles_available(video_id, sub_lang_list) + + def _request_subtitle(self, sub_lang, sub_name, video_id, format): + self.report_video_subtitles_request(video_id, sub_lang, format) params = compat_urllib_parse.urlencode({ - 'lang': srt_lang, - 'name': srt_lang_list[srt_lang].encode('utf-8'), + 'lang': sub_lang, + 'name': sub_name, 'v': video_id, + 'fmt': format, }) url = 'http://www.youtube.com/api/timedtext?' + params try: - srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8') + sub = compat_urllib_request.urlopen(url).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) - if not srt_xml: + if not sub: return (u'WARNING: Did not fetch video subtitles', None) - return (None, self._closed_captions_xml_to_srt(srt_xml)) + return (None, sub_lang, sub) + + def _extract_subtitle(self, video_id): + sub_lang_list = self._get_available_subtitles(video_id) + sub_format = self._downloader.params.get('subtitlesformat') + if self._downloader.params.get('subtitleslang', False): + sub_lang = self._downloader.params.get('subtitleslang') + elif 'en' in sub_lang_list: + sub_lang = 'en' + else: + sub_lang = list(sub_lang_list.keys())[0] + if not sub_lang in sub_lang_list: + return (u'WARNING: no closed captions found in the specified language "%s"' % sub_lang, None) + + subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) + return [subtitle] + + def _extract_all_subtitles(self, video_id): + sub_lang_list = self._get_available_subtitles(video_id) + sub_format = self._downloader.params.get('subtitlesformat') + subtitles = [] + for sub_lang in sub_lang_list: + subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) + subtitles.append(subtitle) + return subtitles def _print_formats(self, formats): print('Available formats:') @@@ -411,13 -388,13 +411,13 @@@ self.report_age_confirmation() age_results = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) + self._downloader.report_error(u'unable to confirm age: %s' % compat_str(err)) return def _extract_id(self, url): mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group(2) return video_id @@@ -436,7 -413,7 +436,7 @@@ try: video_webpage_bytes = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err)) return video_webpage = video_webpage_bytes.decode('utf-8', 'ignore') @@@ -461,18 -438,18 +461,18 @@@ if 'token' in video_info: break except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download video info webpage: %s' % compat_str(err)) return if 'token' not in video_info: if 'reason' in video_info: - self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0]) + self._downloader.report_error(u'YouTube said: %s' % video_info['reason'][0]) else: - self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason') + self._downloader.report_error(u'"token" parameter not in video info for unknown reason') return # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: - self._downloader.trouble(u'ERROR: "rental" videos not supported') + self._downloader.report_error(u'"rental" videos not supported') return # Start extracting information @@@ -480,7 -457,7 +480,7 @@@ # uploader if 'author' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract uploader name') + self._downloader.report_error(u'unable to extract uploader name') return video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0]) @@@ -490,17 -467,17 +490,17 @@@ if mobj is not None: video_uploader_id = mobj.group(1) else: - self._downloader.trouble(u'WARNING: unable to extract uploader nickname') + self._downloader.report_warning(u'unable to extract uploader nickname') # title if 'title' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract video title') + self._downloader.report_error(u'unable to extract video title') return video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) # thumbnail image if 'thumbnail_url' not in video_info: - self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + self._downloader.report_warning(u'unable to extract video thumbnail') video_thumbnail = '' else: # don't panic if we can't find it video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) @@@ -524,29 -501,15 +524,29 @@@ else: video_description = '' - # closed captions + # subtitles video_subtitles = None + if self._downloader.params.get('writesubtitles', False): - (srt_error, video_subtitles) = self._extract_subtitles(video_id) - if srt_error: - self._downloader.trouble(srt_error) + video_subtitles = self._extract_subtitle(video_id) + if video_subtitles: + (sub_error, sub_lang, sub) = video_subtitles[0] + if sub_error: + self._downloader.trouble(sub_error) + + if self._downloader.params.get('allsubtitles', False): + video_subtitles = self._extract_all_subtitles(video_id) + for video_subtitle in video_subtitles: + (sub_error, sub_lang, sub) = video_subtitle + if sub_error: + self._downloader.trouble(sub_error) + + if self._downloader.params.get('listsubtitles', False): + sub_lang_list = self._list_available_subtitles(video_id) + return if 'length_seconds' not in video_info: - self._downloader.trouble(u'WARNING: unable to extract video duration') + self._downloader.report_warning(u'unable to extract video duration') video_duration = '' else: video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) @@@ -574,7 -537,7 +574,7 @@@ format_list = available_formats existing_formats = [x for x in format_list if x in url_map] if len(existing_formats) == 0: - self._downloader.trouble(u'ERROR: no known formats available for video') + self._downloader.report_error(u'no known formats available for video') return if self._downloader.params.get('listformats', None): self._print_formats(existing_formats) @@@ -595,10 -558,10 +595,10 @@@ video_url_list = [(rf, url_map[rf])] break if video_url_list is None: - self._downloader.trouble(u'ERROR: requested format not available') + self._downloader.report_error(u'requested format not available') return else: - self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') + self._downloader.report_error(u'no conn or url_encoded_fmt_stream_map information found in video info') return results = [] @@@ -661,7 -624,7 +661,7 @@@ class MetacafeIE(InfoExtractor) self.report_disclaimer() disclaimer = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err)) + self._downloader.report_error(u'unable to retrieve disclaimer: %s' % compat_str(err)) return # Confirm age @@@ -674,14 -637,14 +674,14 @@@ self.report_age_confirmation() disclaimer = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) + self._downloader.report_error(u'unable to confirm age: %s' % compat_str(err)) return def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group(1) @@@ -698,7 -661,7 +698,7 @@@ self.report_download_webpage(video_id) webpage = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader and title from webpage @@@ -718,15 -681,15 +718,15 @@@ else: mobj = re.search(r' name="flashvars" value="(.*?)"', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract media URL') + self._downloader.report_error(u'unable to extract media URL') return vardict = compat_parse_qs(mobj.group(1)) if 'mediaData' not in vardict: - self._downloader.trouble(u'ERROR: unable to extract media URL') + self._downloader.report_error(u'unable to extract media URL') return mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0]) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract media URL') + self._downloader.report_error(u'unable to extract media URL') return mediaURL = mobj.group(1).replace('\\/', '/') video_extension = mediaURL[-3:] @@@ -734,13 -697,13 +734,13 @@@ mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract title') + self._downloader.report_error(u'unable to extract title') return video_title = mobj.group(1).decode('utf-8') mobj = re.search(r'submitter=(.*?);', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + self._downloader.report_error(u'unable to extract uploader nickname') return video_uploader = mobj.group(1) @@@ -772,7 -735,7 +772,7 @@@ class DailymotionIE(InfoExtractor) # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group(1).split('_')[0].split('?')[0] @@@ -788,7 -751,7 +788,7 @@@ self.report_extraction(video_id) mobj = re.search(r'\s*var flashvars = (.*)', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract media URL') + self._downloader.report_error(u'unable to extract media URL') return flashvars = compat_urllib_parse.unquote(mobj.group(1)) @@@ -798,12 -761,12 +798,12 @@@ self._downloader.to_screen(u'[dailymotion] Using %s' % key) break else: - self._downloader.trouble(u'ERROR: unable to extract video URL') + self._downloader.report_error(u'unable to extract video URL') return mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video URL') + self._downloader.report_error(u'unable to extract video URL') return video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/') @@@ -812,7 -775,7 +812,7 @@@ mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract title') + self._downloader.report_error(u'unable to extract title') return video_title = unescapeHTML(mobj.group('title')) @@@ -822,7 -785,7 +822,7 @@@ # lookin for official user mobj_official = re.search(r'', webpage) if mobj_official is None: - self._downloader.trouble(u'WARNING: unable to extract uploader nickname') + self._downloader.report_warning(u'unable to extract uploader nickname') else: video_uploader = mobj_official.group(1) else: @@@ -864,7 -827,7 +864,7 @@@ class PhotobucketIE(InfoExtractor) # Extract id from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + self._downloader.report_error(u'Invalid URL: %s' % url) return video_id = mobj.group(1) @@@ -877,14 -840,14 +877,14 @@@ self.report_download_webpage(video_id) webpage = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) + self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader, and title from webpage self.report_extraction(video_id) mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract media URL') + self._downloader.report_error(u'unable to extract media URL') return mediaURL = compat_urllib_parse.unquote(mobj.group(1)) @@@ -892,7 -855,7 +892,7 @@@ mobj = re.search(r'(.*) video by (.*) - Photobucket', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract title') + self._downloader.report_error(u'unable to extract title') return video_title = mobj.group(1).decode('utf-8') @@@ -933,7 -896,7 +933,7 @@@ class YahooIE(InfoExtractor) # Extract ID from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + self._downloader.report_error(u'Invalid URL: %s' % url) return video_id = mobj.group(2) @@@ -946,18 -909,18 +946,18 @@@ try: webpage = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) + self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: Unable to extract id field') + self._downloader.report_error(u'Unable to extract id field') return yahoo_id = mobj.group(1) mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: Unable to extract vid field') + self._downloader.report_error(u'Unable to extract vid field') return yahoo_vid = mobj.group(1) @@@ -970,34 -933,34 +970,34 @@@ self.report_download_webpage(video_id) webpage = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) + self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract uploader and title from webpage self.report_extraction(video_id) mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video title') + self._downloader.report_error(u'unable to extract video title') return video_title = mobj.group(1).decode('utf-8') mobj = re.search(r'

(.*)

', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video uploader') + self._downloader.report_error(u'unable to extract video uploader') return video_uploader = mobj.group(1).decode('utf-8') # Extract video thumbnail mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + self._downloader.report_error(u'unable to extract video thumbnail') return video_thumbnail = mobj.group(1).decode('utf-8') # Extract video description mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video description') + self._downloader.report_error(u'unable to extract video description') return video_description = mobj.group(1).decode('utf-8') if not video_description: @@@ -1006,13 -969,13 +1006,13 @@@ # Extract video height and width mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video height') + self._downloader.report_error(u'unable to extract video height') return yv_video_height = mobj.group(1) mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video width') + self._downloader.report_error(u'unable to extract video width') return yv_video_width = mobj.group(1) @@@ -1028,13 -991,13 +1028,13 @@@ self.report_download_webpage(video_id) webpage = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) + self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract media URL from playlist XML mobj = re.search(r'(.*)', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract title') + self._downloader.report_error(u'unable to extract title') return video_title = mobj.group(1) # video uploader is domain name mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract title') + self._downloader.report_error(u'unable to extract title') return video_uploader = mobj.group(1) @@@ -1470,7 -1437,7 +1470,7 @@@ class YoutubeSearchIE(InfoExtractor) def _real_extract(self, query): mobj = re.match(self._VALID_URL, query) if mobj is None: - self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) + self._downloader.report_error(u'invalid search query "%s"' % query) return prefix, query = query.split(':') @@@ -1486,7 -1453,7 +1486,7 @@@ try: n = int(prefix) if n <= 0: - self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + self._downloader.report_error(u'invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_youtube_results: self._downloader.report_warning(u'ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) @@@ -1511,7 -1478,7 +1511,7 @@@ try: data = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download API page: %s' % compat_str(err)) return api_response = json.loads(data)['data'] @@@ -1552,7 -1519,7 +1552,7 @@@ class GoogleSearchIE(InfoExtractor) def _real_extract(self, query): mobj = re.match(self._VALID_URL, query) if mobj is None: - self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) + self._downloader.report_error(u'invalid search query "%s"' % query) return prefix, query = query.split(':') @@@ -1568,7 -1535,7 +1568,7 @@@ try: n = int(prefix) if n <= 0: - self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + self._downloader.report_error(u'invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_google_results: self._downloader.report_warning(u'gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) @@@ -1592,7 -1559,7 +1592,7 @@@ try: page = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@@ -1636,7 -1603,7 +1636,7 @@@ class YahooSearchIE(InfoExtractor) def _real_extract(self, query): mobj = re.match(self._VALID_URL, query) if mobj is None: - self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) + self._downloader.report_error(u'invalid search query "%s"' % query) return prefix, query = query.split(':') @@@ -1652,7 -1619,7 +1652,7 @@@ try: n = int(prefix) if n <= 0: - self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + self._downloader.report_error(u'invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_yahoo_results: self._downloader.report_warning(u'yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) @@@ -1677,7 -1644,7 +1677,7 @@@ try: page = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@@ -1739,7 -1706,7 +1739,7 @@@ class YoutubePlaylistIE(InfoExtractor) # Extract playlist id mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: - self._downloader.trouble(u'ERROR: invalid url: %s' % url) + self._downloader.report_error(u'invalid url: %s' % url) return # Download playlist videos from API @@@ -1754,17 -1721,17 +1754,17 @@@ try: page = compat_urllib_request.urlopen(url).read().decode('utf8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err)) return try: response = json.loads(page) except ValueError as err: - self._downloader.trouble(u'ERROR: Invalid JSON in API response: ' + compat_str(err)) + self._downloader.report_error(u'Invalid JSON in API response: ' + compat_str(err)) return if not 'feed' in response or not 'entry' in response['feed']: - self._downloader.trouble(u'ERROR: Got a malformed response from YouTube API') + self._downloader.report_error(u'Got a malformed response from YouTube API') return videos += [ (entry['yt$position']['$t'], entry['content']['src']) for entry in response['feed']['entry'] @@@ -1810,7 -1777,7 +1810,7 @@@ class YoutubeChannelIE(InfoExtractor) # Extract channel id mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid url: %s' % url) + self._downloader.report_error(u'invalid url: %s' % url) return # Download channel pages @@@ -1825,7 -1792,7 +1825,7 @@@ try: page = compat_urllib_request.urlopen(request).read().decode('utf8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@@ -1868,7 -1835,7 +1868,7 @@@ class YoutubeUserIE(InfoExtractor) # Extract username mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid url: %s' % url) + self._downloader.report_error(u'invalid url: %s' % url) return username = mobj.group(1) @@@ -1890,7 -1857,7 +1890,7 @@@ try: page = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@@ -1948,7 -1915,7 +1948,7 @@@ class BlipTVUserIE(InfoExtractor) # Extract username mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid url: %s' % url) + self._downloader.report_error(u'invalid url: %s' % url) return username = mobj.group(1) @@@ -1962,7 -1929,7 +1962,7 @@@ mobj = re.search(r'data-users-id="([^"]+)"', page) page_base = page_base % mobj.group(1) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err)) return @@@ -1981,7 -1948,7 +1981,7 @@@ try: page = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.report_error(u'unable to download webpage: %s' % str(err)) return # Extract video identifiers @@@ -2045,7 -2012,7 +2045,7 @@@ class DepositFilesIE(InfoExtractor) self.report_download_webpage(file_id) webpage = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err)) + self._downloader.report_error(u'Unable to retrieve file webpage: %s' % compat_str(err)) return # Search for the real file URL @@@ -2055,9 -2022,9 +2055,9 @@@ mobj = re.search(r'(Attention.*?)', webpage, re.DOTALL) if (mobj is not None) and (mobj.group(1) is not None): restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() - self._downloader.trouble(u'ERROR: %s' % restriction_message) + self._downloader.report_error(u'%s' % restriction_message) else: - self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) + self._downloader.report_error(u'unable to extract download URL from: %s' % url) return file_url = mobj.group(1) @@@ -2066,7 -2033,7 +2066,7 @@@ # Search for file title mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract title') + self._downloader.report_error(u'unable to extract title') return file_title = mobj.group(1).decode('utf-8') @@@ -2139,7 -2106,7 +2139,7 @@@ class FacebookIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group('ID') @@@ -2195,7 -2162,7 +2195,7 @@@ class BlipTVIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return urlp = compat_urllib_parse_urlparse(url) @@@ -2242,7 -2209,7 +2242,7 @@@ json_code_bytes = urlh.read() json_code = json_code_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to read video info webpage: %s' % compat_str(err)) return try: @@@ -2273,7 -2240,7 +2273,7 @@@ 'user_agent': 'iTunes/10.6.1', } except (ValueError,KeyError) as err: - self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err)) + self._downloader.report_error(u'unable to parse video information: %s' % repr(err)) return return [info] @@@ -2295,7 -2262,7 +2295,7 @@@ class MyVideoIE(InfoExtractor) def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._download.trouble(u'ERROR: invalid URL: %s' % url) + self._download.report_error(u'invalid URL: %s' % url) return video_id = mobj.group(1) @@@ -2305,16 -2272,16 +2305,16 @@@ webpage = self._download_webpage(webpage_url, video_id) self.report_extraction(video_id) - mobj = re.search(r'', + mobj = re.search(r'([^<]+)', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract title') + self._downloader.report_error(u'unable to extract title') return video_title = mobj.group(1) @@@ -2387,7 -2354,7 +2387,7 @@@ class ComedyCentralIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return if mobj.group('shortname'): @@@ -2418,16 -2385,16 +2418,16 @@@ html = htmlHandle.read() webpage = html.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err)) return if dlNewest: url = htmlHandle.geturl() mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: - self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url) + self._downloader.report_error(u'Invalid redirected URL: ' + url) return if mobj.group('episode') == '': - self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url) + self._downloader.report_error(u'Redirected URL is still not specific: ' + url) return epTitle = mobj.group('episode') @@@ -2440,7 -2407,7 +2440,7 @@@ altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage) if len(altMovieParams) == 0: - self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) + self._downloader.report_error(u'unable to find Flash URL in webpage ' + url) return else: mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] @@@ -2451,7 -2418,7 +2451,7 @@@ try: indexXml = compat_urllib_request.urlopen(indexUrl).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download episode index: ' + compat_str(err)) + self._downloader.report_error(u'unable to download episode index: ' + compat_str(err)) return results = [] @@@ -2472,7 -2439,7 +2472,7 @@@ try: configXml = compat_urllib_request.urlopen(configReq).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err)) return cdoc = xml.etree.ElementTree.fromstring(configXml) @@@ -2482,7 -2449,7 +2482,7 @@@ turls.append(finfo) if len(turls) == 0: - self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found') + self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found') continue if self._downloader.params.get('listformats', None): @@@ -2539,7 -2506,7 +2539,7 @@@ class EscapistIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return showName = mobj.group('showname') videoId = mobj.group('episode') @@@ -2551,7 -2518,7 +2551,7 @@@ m = re.match(r'text/html; charset="?([^"]+)"?', webPage.headers['Content-Type']) webPage = webPageBytes.decode(m.group(1) if m else 'utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download webpage: ' + compat_str(err)) + self._downloader.report_error(u'unable to download webpage: ' + compat_str(err)) return descMatch = re.search('(.*?)\s+-\s+XVID', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video title') + self._downloader.report_error(u'unable to extract video title') return video_title = mobj.group(1) @@@ -2711,7 -2678,7 +2711,7 @@@ # Extract video thumbnail mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + self._downloader.report_error(u'unable to extract video thumbnail') return video_thumbnail = mobj.group(0) @@@ -2755,7 -2722,7 +2755,7 @@@ class SoundcloudIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return # extract uploader (which is in the url) @@@ -2773,7 -2740,7 +2773,7 @@@ info_json_bytes = compat_urllib_request.urlopen(request).read() info_json = info_json_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err)) return info = json.loads(info_json) @@@ -2786,7 -2753,7 +2786,7 @@@ stream_json_bytes = compat_urllib_request.urlopen(request).read() stream_json = stream_json_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download stream definitions: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download stream definitions: %s' % compat_str(err)) return streams = json.loads(stream_json) @@@ -2814,7 -2781,7 +2814,7 @@@ class InfoQIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return webpage = self._download_webpage(url, video_id=url) @@@ -2823,7 -2790,7 +2823,7 @@@ # Extract video URL mobj = re.search(r"jsclassref='([^']*)'", webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video url') + self._downloader.report_error(u'unable to extract video url') return real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8')) video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id @@@ -2831,7 -2798,7 +2831,7 @@@ # Extract title mobj = re.search(r'contentTitle = "(.*?)";', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video title') + self._downloader.report_error(u'unable to extract video title') return video_title = mobj.group(1) @@@ -2914,7 -2881,7 +2914,7 @@@ class MixcloudIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return # extract uploader & filename from url uploader = mobj.group(1).decode('utf-8') @@@ -2928,7 -2895,7 +2928,7 @@@ self.report_download_json(file_url) jsonData = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err)) + self._downloader.report_error(u'Unable to retrieve file: %s' % compat_str(err)) return # parse JSON @@@ -2952,7 -2919,7 +2952,7 @@@ break # got it! else: if req_format not in formats: - self._downloader.trouble(u'ERROR: format is not available') + self._downloader.report_error(u'format is not available') return url_list = self.get_urls(formats, req_format) @@@ -3006,14 -2973,14 +3006,14 @@@ class StanfordOpenClassroomIE(InfoExtra try: metaXml = compat_urllib_request.urlopen(xmlUrl).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download video info XML: %s' % compat_str(err)) return mdoc = xml.etree.ElementTree.fromstring(metaXml) try: info['title'] = mdoc.findall('./title')[0].text info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text except IndexError: - self._downloader.trouble(u'\nERROR: Invalid metadata XML file') + self._downloader.report_error(u'Invalid metadata XML file') return info['ext'] = info['url'].rpartition('.')[2] return [info] @@@ -3065,7 -3032,7 +3065,7 @@@ try: rootpage = compat_urllib_request.urlopen(rootURL).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err)) + self._downloader.report_error(u'unable to download course info page: ' + compat_str(err)) return info['title'] = info['id'] @@@ -3097,7 -3064,7 +3097,7 @@@ class MTVIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return if not mobj.group('proto'): url = 'http://' + url @@@ -3107,25 -3074,25 +3107,25 @@@ mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract song name') + self._downloader.report_error(u'unable to extract song name') return song_name = unescapeHTML(mobj.group(1).decode('iso-8859-1')) mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract performer') + self._downloader.report_error(u'unable to extract performer') return performer = unescapeHTML(mobj.group(1).decode('iso-8859-1')) video_title = performer + ' - ' + song_name mobj = re.search(r'', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to mtvn_uri') + self._downloader.report_error(u'unable to mtvn_uri') return mtvn_uri = mobj.group(1) mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract content id') + self._downloader.report_error(u'unable to extract content id') return content_id = mobj.group(1) @@@ -3135,7 -3102,7 +3135,7 @@@ try: metadataXml = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download video metadata: %s' % compat_str(err)) return mdoc = xml.etree.ElementTree.fromstring(metadataXml) @@@ -3207,7 -3174,7 +3207,7 @@@ class YoukuIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group('ID') @@@ -3218,7 -3185,7 +3218,7 @@@ self.report_download_webpage(video_id) jsondata = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) + self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@@ -3249,7 -3216,7 +3249,7 @@@ fileid = config['data'][0]['streamfileids'][format] keys = [s['k'] for s in config['data'][0]['segs'][format]] except (UnicodeDecodeError, ValueError, KeyError): - self._downloader.trouble(u'ERROR: unable to extract info section') + self._downloader.report_error(u'unable to extract info section') return files_info=[] @@@ -3296,7 -3263,7 +3296,7 @@@ class XNXXIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group(1) @@@ -3307,24 -3274,24 +3307,24 @@@ webpage_bytes = compat_urllib_request.urlopen(url).read() webpage = webpage_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err) + self._downloader.report_error(u'unable to download video webpage: %s' % err) return result = re.search(self.VIDEO_URL_RE, webpage) if result is None: - self._downloader.trouble(u'ERROR: unable to extract video url') + self._downloader.report_error(u'unable to extract video url') return video_url = compat_urllib_parse.unquote(result.group(1)) result = re.search(self.VIDEO_TITLE_RE, webpage) if result is None: - self._downloader.trouble(u'ERROR: unable to extract video title') + self._downloader.report_error(u'unable to extract video title') return video_title = result.group(1) result = re.search(self.VIDEO_THUMB_RE, webpage) if result is None: - self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + self._downloader.report_error(u'unable to extract video thumbnail') return video_thumbnail = result.group(1) @@@ -3373,7 -3340,7 +3373,7 @@@ class GooglePlusIE(InfoExtractor) # Extract id from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + self._downloader.report_error(u'Invalid URL: %s' % url) return post_url = mobj.group(0) @@@ -3387,7 -3354,7 +3387,7 @@@ try: webpage = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err)) + self._downloader.report_error(u'Unable to retrieve entry webpage: %s' % compat_str(err)) return # Extract update date @@@ -3422,14 -3389,14 +3422,14 @@@ pattern = '"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]' mobj = re.search(pattern, webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video page URL') + self._downloader.report_error(u'unable to extract video page URL') video_page = mobj.group(1) request = compat_urllib_request.Request(video_page) try: webpage = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) + self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extract_vid_page(video_page) @@@ -3439,7 -3406,7 +3439,7 @@@ pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"' mobj = re.findall(pattern, webpage) if len(mobj) == 0: - self._downloader.trouble(u'ERROR: unable to extract video links') + self._downloader.report_error(u'unable to extract video links') # Sort in resolution links = sorted(mobj) @@@ -3471,7 -3438,7 +3471,7 @@@ class NBAIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group(1) @@@ -3527,13 -3494,13 +3527,13 @@@ class JustinTVIE(InfoExtractor) webpage_bytes = urlh.read() webpage = webpage_bytes.decode('utf-8', 'ignore') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download video info JSON: %s' % compat_str(err)) + self._downloader.report_error(u'unable to download video info JSON: %s' % compat_str(err)) return response = json.loads(webpage) if type(response) != list: error_text = response.get('error', 'unknown error') - self._downloader.trouble(u'ERROR: Justin.tv API: %s' % error_text) + self._downloader.report_error(u'Justin.tv API: %s' % error_text) return info = [] for clip in response: @@@ -3558,7 -3525,7 +3558,7 @@@ def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return api = 'http://api.justin.tv' @@@ -3593,7 -3560,7 +3593,7 @@@ class FunnyOrDieIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group('id') @@@ -3601,13 -3568,13 +3601,13 @@@ m = re.search(r']*>\s*]*>\s*\s+(?P.*?)</a>", webpage) + m = re.search(r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", webpage, flags=re.DOTALL) if not m: self._downloader.trouble(u'Cannot find video title') - title = unescapeHTML(m.group('title')) + title = clean_html(m.group('title')) m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage) if m: @@@ -3654,7 -3621,7 +3654,7 @@@ class SteamIE(InfoExtractor) video_url = vid.group('videoURL') video_thumb = thumb.group('thumbnail') if not video_url: - self._downloader.trouble(u'ERROR: Cannot find video url for %s' % video_id) + self._downloader.report_error(u'Cannot find video url for %s' % video_id) info = { 'id':video_id, 'url':video_url, @@@ -3687,6 -3654,62 +3687,62 @@@ class UstreamIE(InfoExtractor) } return [info] + class WorldStarHipHopIE(InfoExtractor): + _VALID_URL = r'http://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)' + IE_NAME = u'WorldStarHipHop' + + def _real_extract(self, url): + _src_url = r"""(http://hw-videos.*(?:mp4|flv))""" + + webpage_src = compat_urllib_request.urlopen(url).read() + webpage_src = webpage_src.decode('utf-8') + + mobj = re.search(_src_url, webpage_src) + + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + if mobj is not None: + video_url = mobj.group() + if 'mp4' in video_url: + ext = 'mp4' + else: + ext = 'flv' + else: + self._downloader.trouble(u'ERROR: Cannot find video url for %s' % video_id) + return + + _title = r"""<title>(.*)""" + + mobj = re.search(_title, webpage_src) + + if mobj is not None: + title = mobj.group(1) + else: + title = 'World Start Hip Hop - %s' % time.ctime() + + _thumbnail = r"""rel="image_src" href="(.*)" />""" + mobj = re.search(_thumbnail, webpage_src) + + # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. + if mobj is not None: + thumbnail = mobj.group(1) + else: + _title = r"""candytitles.*>(.*)""" + mobj = re.search(_title, webpage_src) + if mobj is not None: + title = mobj.group(1) + thumbnail = None + + results = [{ + 'id': video_id, + 'url' : video_url, + 'title' : title, + 'thumbnail' : thumbnail, + 'ext' : ext, + }] + return results + class RBMARadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P[^/]+)$' @@@ -3744,7 -3767,7 +3800,7 @@@ class YouPornIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group('videoid') @@@ -3836,7 -3859,7 +3892,7 @@@ else: format = self._specific( req_format, formats ) if result is None: - self._downloader.trouble(u'ERROR: requested format not available') + self._downloader.report_error(u'requested format not available') return return [format] @@@ -3849,7 -3872,7 +3905,7 @@@ class PornotubeIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group('videoid') @@@ -3862,7 -3885,7 +3918,7 @@@ VIDEO_URL_RE = r'url: "(?Phttp://video[0-9].pornotube.com/.+\.flv)",' result = re.search(VIDEO_URL_RE, webpage) if result is None: - self._downloader.trouble(u'ERROR: unable to extract video url') + self._downloader.report_error(u'unable to extract video url') return video_url = compat_urllib_parse.unquote(result.group('url')) @@@ -3870,7 -3893,7 +3926,7 @@@ VIDEO_UPLOADED_RE = r'
Added (?P[0-9\/]+) by' result = re.search(VIDEO_UPLOADED_RE, webpage) if result is None: - self._downloader.trouble(u'ERROR: unable to extract video title') + self._downloader.report_error(u'unable to extract video title') return upload_date = result.group('date') @@@ -3891,7 -3914,7 +3947,7 @@@ class YouJizzIE(InfoExtractor) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + self._downloader.report_error(u'invalid URL: %s' % url) return video_id = mobj.group('videoid') @@@ -3986,11 -4009,11 +4042,11 @@@ class KeekIE(InfoExtractor) webpage = self._download_webpage(url, video_id) m = re.search(r'[\s\n]+

(?P\w+)

', webpage) - uploader = unescapeHTML(m.group('uploader')) + m = re.search(r'
[\S\s]+?

(?P.+?)

', webpage) + uploader = clean_html(m.group('uploader')) info = { - 'id':video_id, - 'url':video_url, + 'id': video_id, + 'url': video_url, 'ext': 'mp4', 'title': title, 'thumbnail': thumbnail, @@@ -4051,7 -4074,7 +4107,7 @@@ class TEDIE(InfoExtractor) videoName=m.group('name') webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName) # If the url includes the language we get the title translated - title_RE=r'

(?P.*)</span></h1>' + title_RE=r'<span id="altHeadline" >(?P<title>.*)</span>' title=re.search(title_RE, webpage).group('title') info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?) "id":(?P<videoID>[\d]+).*? @@@ -4092,13 -4115,13 +4148,13 @@@ class MySpassIE(InfoExtractor) # extract values from metadata url_flv_el = metadata.find('url_flv') if url_flv_el is None: - self._downloader.trouble(u'ERROR: unable to extract download url') + self._downloader.report_error(u'unable to extract download url') return video_url = url_flv_el.text extension = os.path.splitext(video_url)[1][1:] title_el = metadata.find('title') if title_el is None: - self._downloader.trouble(u'ERROR: unable to extract title') + self._downloader.report_error(u'unable to extract title') return title = title_el.text format_id_el = metadata.find('format_id') @@@ -4127,89 -4150,6 +4183,89 @@@ } return [info] +class SpiegelIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('videoID') + + webpage = self._download_webpage(url, video_id) + m = re.search(r'<div class="spVideoTitle">(.*?)</div>', webpage) + if not m: + raise ExtractorError(u'Cannot find title') + video_title = unescapeHTML(m.group(1)) + + xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' + xml_code = self._download_webpage(xml_url, video_id, + note=u'Downloading XML', errnote=u'Failed to download XML') + + idoc = xml.etree.ElementTree.fromstring(xml_code) + last_type = idoc[-1] + filename = last_type.findall('./filename')[0].text + duration = float(last_type.findall('./duration')[0].text) + + video_url = 'http://video2.spiegel.de/flash/' + filename + video_ext = filename.rpartition('.')[2] + info = { + 'id': video_id, + 'url': video_url, + 'ext': video_ext, + 'title': video_title, + 'duration': duration, + } + return [info] + +class LiveLeakIE(InfoExtractor): + + _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)' + IE_NAME = u'liveleak' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group('video_id') + + webpage = self._download_webpage(url, video_id) + + m = re.search(r'file: "(.*?)",', webpage) + if not m: + self._downloader.report_error(u'unable to find video url') + return + video_url = m.group(1) + + m = re.search(r'<meta property="og:title" content="(?P<title>.*?)"', webpage) + if not m: + self._downloader.trouble(u'Cannot find video title') + title = unescapeHTML(m.group('title')).replace('LiveLeak.com -', '').strip() + + m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage) + if m: + desc = unescapeHTML(m.group('desc')) + else: + desc = None + + m = re.search(r'By:.*?(\w+)</a>', webpage) + if m: + uploader = clean_html(m.group(1)) + else: + uploader = None + + info = { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'description': desc, + 'uploader': uploader + } + + return [info] + + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. @@@ -4249,6 -4189,7 +4305,7 @@@ GooglePlusIE(), ArteTvIE(), NBAIE(), + WorldStarHipHopIE(), JustinTVIE(), FunnyOrDieIE(), SteamIE(), @@@ -4258,7 -4199,7 +4315,7 @@@ KeekIE(), TEDIE(), MySpassIE(), + SpiegelIE(), + LiveLeakIE(), GenericIE() ] - -