projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[YoutubeDL] store the subtitles to download in the 'requested_subtitles' field
[youtube-dl]
/
youtube_dl
/
extractor
/
ted.py
diff --git
a/youtube_dl/extractor/ted.py
b/youtube_dl/extractor/ted.py
index f8a87afdaf4d27c59b4b29491569b243331b2322..1809eaae403a4a35dcf6e90a33633fc4529bbf94 100644
(file)
--- a/
youtube_dl/extractor/ted.py
+++ b/
youtube_dl/extractor/ted.py
@@
-3,17
+3,17
@@
from __future__ import unicode_literals
import json
import re
import json
import re
-from .
subtitles import Subtitles
InfoExtractor
+from .
common import
InfoExtractor
-from ..
utils
import (
+from ..
compat
import (
compat_str,
)
compat_str,
)
-class TEDIE(
Subtitles
InfoExtractor):
+class TEDIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?P<proto>https?://)
_VALID_URL = r'''(?x)
(?P<proto>https?://)
- (?P<type>www|embed)(?P<urlmain>\.ted\.com/
+ (?P<type>www|embed
(?:-ssl)?
)(?P<urlmain>\.ted\.com/
(
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
(
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
@@
-98,7
+98,7
@@
class TEDIE(SubtitlesInfoExtractor):
def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE)
def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE)
- if m.group('type')
== 'embed'
:
+ if m.group('type')
.startswith('embed')
:
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
return self.url_result(desktop_url, 'TED')
name = m.group('name')
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
return self.url_result(desktop_url, 'TED')
name = m.group('name')
@@
-165,9
+165,6
@@
class TEDIE(SubtitlesInfoExtractor):
video_id = compat_str(talk_info['id'])
# subtitles
video_subtitles = self.extract_subtitles(video_id, talk_info)
video_id = compat_str(talk_info['id'])
# subtitles
video_subtitles = self.extract_subtitles(video_id, talk_info)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, talk_info)
- return
thumbnail = talk_info['thumb']
if not thumbnail.startswith('http'):
thumbnail = talk_info['thumb']
if not thumbnail.startswith('http'):
@@
-183,13
+180,18
@@
class TEDIE(SubtitlesInfoExtractor):
'duration': talk_info.get('duration'),
}
'duration': talk_info.get('duration'),
}
- def _get_
available_
subtitles(self, video_id, talk_info):
+ def _get_subtitles(self, video_id, talk_info):
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
if languages:
sub_lang_list = {}
for l in languages:
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
if languages:
sub_lang_list = {}
for l in languages:
- url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
- sub_lang_list[l] = url
+ sub_lang_list[l] = [
+ {
+ 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
+ 'ext': ext,
+ }
+ for ext in ['ted', 'srt']
+ ]
return sub_lang_list
else:
self._downloader.report_warning('video doesn\'t have subtitles')
return sub_lang_list
else:
self._downloader.report_warning('video doesn\'t have subtitles')
@@
-199,8
+201,9
@@
class TEDIE(SubtitlesInfoExtractor):
webpage = self._download_webpage(url, name)
config_json = self._html_search_regex(
webpage = self._download_webpage(url, name)
config_json = self._html_search_regex(
- r"data-config='([^']+)", webpage, 'config')
- config = json.loads(config_json)
+ r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
+ webpage, 'config')
+ config = json.loads(config_json)['config']
video_url = config['video']['url']
thumbnail = config.get('image', {}).get('url')
video_url = config['video']['url']
thumbnail = config.get('image', {}).get('url')