Merge remote-tracking branch 'sahutd/master'
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 27 Jan 2014 11:21:00 +0000 (12:21 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 27 Jan 2014 11:21:00 +0000 (12:21 +0100)
12 files changed:
setup.py
youtube_dl/__init__.py
youtube_dl/downloader/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/common.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/huffpost.py [new file with mode: 0644]
youtube_dl/extractor/la7.py [new file with mode: 0644]
youtube_dl/extractor/malemotion.py [new file with mode: 0644]
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/websurg.py [deleted file]
youtube_dl/version.py

index 1f45159cd3e641f2257abf6d8781ce9f31bbda11..03e7b358e4ec1b4800e06f6796e386a808b67891 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,9 @@
 
 from __future__ import print_function
 
+import os.path
 import pkg_resources
+import warnings
 import sys
 
 try:
@@ -44,12 +46,24 @@ py2exe_params = {
 if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
     params = py2exe_params
 else:
+    files_spec = [
+        ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
+        ('share/doc/youtube_dl', ['README.txt']),
+        ('share/man/man1', ['youtube-dl.1'])
+    ]
+    root = os.path.dirname(os.path.abspath(__file__))
+    data_files = []
+    for dirname, files in files_spec:
+        resfiles = []
+        for fn in files:
+            if not os.path.exists(fn):
+                warnings.warn('Skipping file %s since it is not present. Type  make  to build all automatically generated files.' % fn)
+            else:
+                resfiles.append(fn)
+        data_files.append((dirname, resfiles))
+
     params = {
-        'data_files': [  # Installing system-wide would require sudo...
-            ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
-            ('share/doc/youtube_dl', ['README.txt']),
-            ('share/man/man1', ['youtube-dl.1'])
-        ]
+        'data_files': data_files,
     }
     if setuptools_available:
         params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
index 294fccb44ee356904e355d3520e9dc085c249eaa..08cf2f93407d8e22aef1294ad8226bcd366da147 100644 (file)
@@ -40,6 +40,7 @@ __authors__  = (
     'Michael Orlitzky',
     'Chris Gahan',
     'Saimadhav Heblikar',
+    'Mike Col',
 )
 
 __license__ = 'Public Domain'
index 0d9eb0001e774bdf75ccdf31c636947dfeecd330..aaa92bc758e59fce78265ebf01f87236c540e326 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 from .common import FileDownloader
 from .hls import HlsFD
 from .http import HttpFD
@@ -12,10 +14,11 @@ from ..utils import (
 def get_suitable_downloader(info_dict):
     """Get the downloader class that can handle the info dict."""
     url = info_dict['url']
+    protocol = info_dict.get('protocol')
 
     if url.startswith('rtmp'):
         return RtmpFD
-    if determine_ext(url) == u'm3u8':
+    if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
         return HlsFD
     if url.startswith('mms') or url.startswith('rtsp'):
         return MplayerFD
index ba3d4ac0eb9e75a35e35104c357564a50a9682a7..192baa9b898beef852cc06cde501d0c92f9f361f 100644 (file)
@@ -84,6 +84,7 @@ from .googlesearch import GoogleSearchIE
 from .hark import HarkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
+from .huffpost import HuffPostIE
 from .hypem import HypemIE
 from .ign import IGNIE, OneUPIE
 from .imdb import (
@@ -107,6 +108,7 @@ from .keezmovies import KeezMoviesIE
 from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
+from .la7 import LA7IE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE, LivestreamOriginalIE
 from .lynda import (
@@ -114,6 +116,7 @@ from .lynda import (
     LyndaCourseIE
 )
 from .macgamestore import MacGameStoreIE
+from .malemotion import MalemotionIE
 from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
@@ -220,7 +223,6 @@ from .vine import VineIE
 from .viki import VikiIE
 from .vk import VKIE
 from .wat import WatIE
-from .websurg import WeBSurgIE
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
index 3cf742a3b6f49751ee4827a7d1f5cde3a59a884c..f7478d4598e8f15ea802ec9451aff53ce452fe7a 100644 (file)
@@ -71,7 +71,7 @@ class InfoExtractor(object):
                     * player_url SWF Player URL (used for rtmpdump).
                     * protocol   The protocol that will be used for the actual
                                  download, lower-case.
-                                 "http", "https", "rtsp", "rtmp" or so.
+                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
                                  by this field.
@@ -466,6 +466,9 @@ class InfoExtractor(object):
         return RATING_TABLE.get(rating.lower(), None)
 
     def _sort_formats(self, formats):
+        if not formats:
+            raise ExtractorError(u'No video formats found')
+
         def _formats_key(f):
             # TODO remove the following workaround
             from ..utils import determine_ext
index e1933837d143ccec1550b2d6b9c7595c23096b67..829e5894fafc5f2f8a54c033d211808485cad4af 100644 (file)
@@ -332,10 +332,16 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded Facebook player
         mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'Facebook')
 
+        # Look for embedded Huffington Post player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'HuffPost')
+
         # Start with something easy: JW Player in SWFObject
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         if mobj is None:
diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py
new file mode 100644 (file)
index 0000000..0d1ea68
--- /dev/null
@@ -0,0 +1,82 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    unified_strdate,
+)
+
+
+class HuffPostIE(InfoExtractor):
+    IE_DESC = 'Huffington Post'
+    _VALID_URL = r'''(?x)
+        https?://(embed\.)?live\.huffingtonpost\.com/
+        (?:
+            r/segment/[^/]+/|
+            HPLEmbedPlayer/\?segmentId=
+        )
+        (?P<id>[0-9a-f]+)'''
+
+    _TEST = {
+        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
+        'file': '52dd3e4b02a7602131000677.mp4',
+        'md5': '55f5e8981c1c80a64706a44b74833de8',
+        'info_dict': {
+            'title': 'Legalese It! with @MikeSacksHP',
+            'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
+            'duration': 1549,
+            'upload_date': '20140124',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
+        data = self._download_json(api_url, video_id)['data']
+
+        video_title = data['title']
+        duration = parse_duration(data['running_time'])
+        upload_date = unified_strdate(data['schedule']['starts_at'])
+        description = data.get('description')
+
+        thumbnails = []
+        for url in data['images'].values():
+            m = re.match('.*-([0-9]+x[0-9]+)\.', url)
+            if not m:
+                continue
+            thumbnails.append({
+                'url': url,
+                'resolution': m.group(1),
+            })
+
+        formats = [{
+            'format': key,
+            'format_id': key.replace('/', '.'),
+            'ext': 'mp4',
+            'url': url,
+            'vcodec': 'none' if key.startswith('audio/') else None,
+        } for key, url in data['sources']['live'].items()]
+        if data.get('fivemin_id'):
+            fid = data['fivemin_id']
+            fcat = str(int(fid) // 100 + 1)
+            furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
+            formats.append({
+                'format': 'fivemin',
+                'url': furl,
+                'preference': 1,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'description': description,
+            'formats': formats,
+            'duration': duration,
+            'upload_date': upload_date,
+            'thumbnails': thumbnails,
+        }
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py
new file mode 100644 (file)
index 0000000..6d61f9a
--- /dev/null
@@ -0,0 +1,62 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+)
+
+
+class LA7IE(InfoExtractor):
+    IE_NAME = 'la7.tv'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?la7\.tv/
+        (?:
+            richplayer/\?assetid=|
+            \?contentId=
+        )
+        (?P<id>[0-9]+)'''
+
+    _TEST = {
+        'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
+        'file': '50355319.mp4',
+        'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
+        'info_dict': {
+            'title': 'IL DIVO',
+            'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',
+            'duration': 6254,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
+        doc = self._download_xml(xml_url, video_id)
+
+        video_title = doc.find('title').text
+        description = doc.find('description').text
+        duration = parse_duration(doc.find('duration').text)
+        thumbnail = doc.find('img').text
+        view_count = int(doc.find('views').text)
+
+        prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:')
+
+        formats = [{
+            'format': vnode.find('quality').text,
+            'tbr': int(vnode.find('quality').text),
+            'url': vnode.find('fms').text.strip().replace('mp4:', prefix),
+        } for vnode in doc.findall('.//videos/video')]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+            'view_count': view_count,
+        }
diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py
new file mode 100644 (file)
index 0000000..62e9909
--- /dev/null
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+)
+
+class MalemotionIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
+    _TEST = {
+        'url': 'http://malemotion.com/video/bien-dur.10ew',
+        'file': '10ew.mp4',
+        'md5': 'b3cc49f953b107e4a363cdff07d100ce',
+        'info_dict': {
+            "title": "Bien dur",
+            "age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group("id")
+
+        webpage = self._download_webpage(url, video_id)
+
+        self.report_extraction(video_id)
+
+        # Extract video URL
+        video_url = compat_urllib_parse.unquote(
+            self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
+
+        # Extract title
+        video_title = self._html_search_regex(
+            r'<title>(.*?)</title', webpage, 'title')
+
+        # Extract video thumbnail
+        video_thumbnail = self._search_regex(
+            r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
+
+        formats = [{
+            'url': video_url,
+            'ext': 'mp4',
+            'format_id': 'mp4',
+            'preference': 1,
+        }]
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'uploader': None,
+            'upload_date': None,
+            'title': video_title,
+            'thumbnail': video_thumbnail,
+            'description': None,
+            'age_limit': 18,
+        }
index ad5840ca2f4b8f2fe82c46cca261af86716f25dc..f7bc77c48cbc70cda5191dcca6da099f423771fc 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -9,11 +11,11 @@ from ..utils import (
 class TumblrIE(InfoExtractor):
     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
     _TEST = {
-        u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
-        u'file': u'54196191430.mp4',
-        u'md5': u'479bb068e5b16462f5176a6828829767',
-        u'info_dict': {
-            u"title": u"tatiana maslany news"
+        'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
+        'file': '54196191430.mp4',
+        'md5': '479bb068e5b16462f5176a6828829767',
+        'info_dict': {
+            "title": "tatiana maslany news"
         }
     }
 
@@ -28,18 +30,20 @@ class TumblrIE(InfoExtractor):
         re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
         video = re.search(re_video, webpage)
         if video is None:
-           raise ExtractorError(u'Unable to extract video')
+            raise ExtractorError('Unable to extract video')
         video_url = video.group('video_url')
         ext = video.group('ext')
 
-        video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
-            webpage, u'thumbnail', fatal=False)  # We pick the first poster
-        if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
+        video_thumbnail = self._search_regex(
+            r'posters.*?\[\\x22(.*?)\\x22',
+            webpage, 'thumbnail', fatal=False)  # We pick the first poster
+        if video_thumbnail:
+            video_thumbnail = video_thumbnail.replace('\\\\/', '/')
 
         # The only place where you can get a title, it's not complete,
         # but searching in other places doesn't work for all videos
         video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
-            webpage, u'title', flags=re.DOTALL)
+            webpage, 'title', flags=re.DOTALL)
 
         return [{'id': video_id,
                  'url': video_url,
diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py
deleted file mode 100644 (file)
index 43953bf..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-# coding: utf-8
-
-import re
-
-from ..utils import (
-    compat_urllib_request,
-    compat_urllib_parse
-)
-
-from .common import InfoExtractor
-
-class WeBSurgIE(InfoExtractor):
-    IE_NAME = u'websurg.com'
-    _VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)'
-
-    _TEST = {
-        u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012',
-        u'file': u'vd01en4012.mp4',
-        u'params': {
-            u'skip_download': True,
-        },
-        u'skip': u'Requires login information',
-    }
-    
-    _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1'
-
-    def _real_initialize(self):
-
-        login_form = {
-            'username': self._downloader.params['username'],
-            'password': self._downloader.params['password'],
-            'Submit': 1
-        }
-        
-        request = compat_urllib_request.Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
-        request.add_header(
-            'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8')
-        compat_urllib_request.urlopen(request).info()
-        webpage = self._download_webpage(self._LOGIN_URL, '', 'Logging in')
-        
-        if webpage != 'OK':
-            self._downloader.report_error(
-                u'Unable to log in: bad username/password')
-        
-    def _real_extract(self, url):
-        video_id = re.match(self._VALID_URL, url).group(1)
-        
-        webpage = self._download_webpage(url, video_id)
-        
-        url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage)
-        
-        return {'id': video_id,
-                'title': self._og_search_title(webpage),
-                'description': self._og_search_description(webpage),
-                'ext' : 'mp4',
-                'url' : url_info.group(1) + '/' + url_info.group(2),
-                'thumbnail': self._og_search_thumbnail(webpage)
-                }
index 0701961a50a204626fdec7ec61c3753b0faa5c55..dd3c37007efb2407b366e29c32365c995115814c 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.01.23.4'
+__version__ = '2014.01.27.1'