id: Video identifier.
url: Final video URL.
- uploader: Nickname of the video uploader, unescaped.
+ uploader: Full name of the video uploader, unescaped.
upload_date: Video upload date (YYYYMMDD).
title: Video title, unescaped.
ext: Video filename extension.
format: The video format, defaults to ext (used for --get-format)
thumbnail: Full URL to a video thumbnail image.
description: One-line video description.
+ uploader_id: Nickname or id of the video uploader.
player_url: SWF Player URL (used for rtmpdump).
subtitles: The .srt file contents.
urlhandle: [internal] The urlHandle to be used to download the file,
# uploader
if 'author' not in video_info:
- self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+ self._downloader.trouble(u'ERROR: unable to extract uploader name')
return
video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
+ # uploader_id
+ video_uploader_id = None
+ mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/user/([^"]+)">', video_webpage)
+ if mobj is not None:
+ video_uploader_id = mobj.group(1)
+ else:
+ self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
+
# title
if 'title' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract video title')
'id': video_id,
'url': video_real_url,
'uploader': video_uploader,
+ 'uploader_id': video_uploader_id,
'upload_date': upload_date,
'title': video_title,
'ext': video_extension,
# Extract title
video_title = config["video"]["title"]
- # Extract uploader
+ # Extract uploader and uploader_id
video_uploader = config["video"]["owner"]["name"]
+ video_uploader_id = config["video"]["owner"]["url"].split('/')[-1]
# Extract video thumbnail
video_thumbnail = config["video"]["thumbnail"]
# Extract upload date
video_upload_date = None
- mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage)
+ mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
if mobj is not None:
- video_upload_date = mobj.group(1)
+ video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
# Vimeo specific: extract request signature and timestamp
sig = config['request']['signature']
'id': video_id,
'url': video_url,
'uploader': video_uploader,
+ 'uploader_id': video_uploader_id,
'upload_date': video_upload_date,
'title': video_title,
'ext': video_extension,
break
offset += limit
return info
+
+class FunnyOrDieIE(InfoExtractor):
+ _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
+ IE_NAME = u'FunnyOrDie'
+
+ def report_extraction(self, video_id):
+ self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+ return
+
+ video_id = mobj.group('id')
+ self.report_extraction(video_id)
+ try:
+ urlh = compat_urllib_request.urlopen(url)
+ webpage_bytes = urlh.read()
+ webpage = webpage_bytes.decode('utf-8', 'ignore')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+ return
+
+ m = re.search(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', webpage, re.DOTALL)
+ if not m:
+ self._downloader.trouble(u'ERROR: unable to find video information')
+ video_url = unescapeHTML(m.group('url'))
+ print(video_url)
+
+ m = re.search(r"class='player_page_h1'>\s+<a.*?>(?P<title>.*?)</a>", webpage)
+ if not m:
+ self._downloader.trouble(u'Cannot find video title')
+ title = unescapeHTML(m.group('title'))
+
+ m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
+ if m:
+ desc = unescapeHTML(m.group('desc'))
+ else:
+ desc = None
+
+ info = {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': title,
+ 'description': desc,
+ }
+ return [info]