X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fflickr.py;h=9f166efd4851fe0833de8a85bac07a97ce9f1722;hb=HEAD;hp=791d5b61dd27efcaf0aa94d3bce2e1744aa72678;hpb=1ac4004f3ad0d4ea528c9883a16617abf5429448;p=youtube-dl diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 791d5b61d..9f166efd4 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -1,57 +1,116 @@ -import re +from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse_urlencode, +) from ..utils import ( ExtractorError, - unescapeHTML, + int_or_none, + qualities, ) class FlickrIE(InfoExtractor): - """Information Extractor for Flickr videos""" - _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P[\w\-_@]+)/(?P\d+).*' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - video_uploader_id = mobj.group('uploader_id') - webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id - webpage = self._download_webpage(webpage_url, video_id) + _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P\d+)' + _TEST = { + 'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', + 'md5': '164fe3fa6c22e18d448d4d5af2330f31', + 'info_dict': { + 'id': '5645318632', + 'ext': 'mpg', + 'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.', + 'title': 'Dark Hollow Waterfalls', + 'duration': 19, + 'timestamp': 1303528740, + 'upload_date': '20110423', + 'uploader_id': '10922353@N03', + 'uploader': 'Forest Wander', + 'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/', + 'comment_count': int, + 'view_count': int, + 'tags': list, + 'license': 'Attribution-ShareAlike', + } + } + _API_BASE_URL = 'https://api.flickr.com/services/rest?' + # https://help.yahoo.com/kb/flickr/SLN25525.html + _LICENSES = { + '0': 'All Rights Reserved', + '1': 'Attribution-NonCommercial-ShareAlike', + '2': 'Attribution-NonCommercial', + '3': 'Attribution-NonCommercial-NoDerivs', + '4': 'Attribution', + '5': 'Attribution-ShareAlike', + '6': 'Attribution-NoDerivs', + '7': 'No known copyright restrictions', + '8': 'United States government work', + '9': 'Public Domain Dedication (CC0)', + '10': 'Public Domain Work', + } - secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret') + def _call_api(self, method, video_id, api_key, note, secret=None): + query = { + 'photo_id': video_id, + 'method': 'flickr.%s' % method, + 'api_key': api_key, + 'format': 'json', + 'nojsoncallback': 1, + } + if secret: + query['secret'] = secret + data = self._download_json(self._API_BASE_URL + compat_urllib_parse_urlencode(query), video_id, note) + if data['stat'] != 'ok': + raise ExtractorError(data['message']) + return data - first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self' - first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage') - - node_id = self._html_search_regex(r'(\d+-\d+)', - first_xml, u'node_id') - - second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1' - second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage') + def _real_extract(self, url): + video_id = self._match_id(url) - self.report_extraction(video_id) + api_key = self._download_json( + 'https://www.flickr.com/hermes_error_beacon.gne', video_id, + 'Downloading api key')['site_key'] - mobj = re.search(r'