projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
4383718
)
[mlb] Fallback to extracting video id from webpage for all URLs that does not contain...
author
Sergey M․
<dstftw@gmail.com>
Fri, 8 May 2015 14:07:53 +0000
(20:07 +0600)
committer
Sergey M․
<dstftw@gmail.com>
Fri, 8 May 2015 14:07:53 +0000
(20:07 +0600)
youtube_dl/extractor/mlb.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/mlb.py
b/youtube_dl/extractor/mlb.py
index ee9ff73bf22f3fbcf769b63fdcbdf96395f1db4a..109eecefd9afffb719486256354bfe8b2fc8c6e7 100644
(file)
--- a/
youtube_dl/extractor/mlb.py
+++ b/
youtube_dl/extractor/mlb.py
@@
-10,7
+10,21
@@
from ..utils import (
class MLBIE(InfoExtractor):
class MLBIE(InfoExtractor):
- _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/(?:embed|m-internal-embed)\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/
+ (?:
+ (?:
+ (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
+ (?:
+ shared/video/embed/(?:embed|m-internal-embed)\.html|
+ [^/]+/video/play\.jsp
+ )\?.*?\bcontent_id=
+ )
+ (?P<id>n?\d+)|
+ (?P<path>.+?)
+ )
+ '''
_TESTS = [
{
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
_TESTS = [
{
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@
-95,6
+109,12
@@
class MLBIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ if not video_id:
+ video_path = mobj.group('path')
+ webpage = self._download_webpage(url, video_path)
+ video_id = self._search_regex(
+ r'data-videoid="(\d+)"', webpage, 'video id')
+
detail = self._download_xml(
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
detail = self._download_xml(
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)