projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge pull request #7320 from remitamine/adobetv
[youtube-dl]
/
youtube_dl
/
extractor
/
flickr.py
diff --git
a/youtube_dl/extractor/flickr.py
b/youtube_dl/extractor/flickr.py
index 0c858b6544b919b1b569b4c4102447631298046e..91cd46e76cbacaf2dac242d14987b2ae777d6995 100644
(file)
--- a/
youtube_dl/extractor/flickr.py
+++ b/
youtube_dl/extractor/flickr.py
@@
-5,7
+5,8
@@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
from .common import InfoExtractor
from ..utils import (
ExtractorError,
- unescapeHTML,
+ find_xpath_attr,
+ sanitized_Request,
)
)
@@
-29,25
+30,31
@@
class FlickrIE(InfoExtractor):
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
- webpage = self._download_webpage(webpage_url, video_id)
+ req = sanitized_Request(webpage_url)
+ req.add_header(
+ 'User-Agent',
+ # it needs a more recent version
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
+ webpage = self._download_webpage(req, video_id)
- secret = self._search_regex(r
"photo_secret: '(\w+)'"
, webpage, 'secret')
+ secret = self._search_regex(r
'secret"\s*:\s*"(\w+)"'
, webpage, 'secret')
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
- first_xml = self._download_
webpage
(first_url, video_id, 'Downloading first data webpage')
+ first_xml = self._download_
xml
(first_url, video_id, 'Downloading first data webpage')
- node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
- first_xml, 'node_id')
+ node_id = find_xpath_attr(
+ first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
+ 'id').text
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
- second_xml = self._download_
webpage
(second_url, video_id, 'Downloading second data webpage')
+ second_xml = self._download_
xml
(second_url, video_id, 'Downloading second data webpage')
self.report_extraction(video_id)
self.report_extraction(video_id)
-
mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml
)
- if
mobj
is None:
+
stream = second_xml.find('.//STREAM'
)
+ if
stream
is None:
raise ExtractorError('Unable to extract video url')
raise ExtractorError('Unable to extract video url')
- video_url =
mobj.group(1) + unescapeHTML(mobj.group(2))
+ video_url =
stream.attrib['APP'] + stream.attrib['FULLPATH']
return {
'id': video_id,
return {
'id': video_id,