Merge remote-tracking branch 'sahutd/master'

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Mon, 27 Jan 2014 11:21:00 +0000 (12:21 +0100)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Mon, 27 Jan 2014 11:21:00 +0000 (12:21 +0100)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 27 Jan 2014 11:21:00 +0000 (12:21 +0100)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 27 Jan 2014 11:21:00 +0000 (12:21 +0100)
diff --git a/setup.py b/setup.py

index 1f45159cd3e641f2257abf6d8781ce9f31bbda11..03e7b358e4ec1b4800e06f6796e386a808b67891 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,9 @@
  
  from __future__ import print_function
  
+import os.path
  import pkg_resources
+import warnings
  import sys
  
  try:
@@ -44,12 +46,24 @@ py2exe_params = {
  if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
      params = py2exe_params
  else:
+    files_spec = [
+        ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
+        ('share/doc/youtube_dl', ['README.txt']),
+        ('share/man/man1', ['youtube-dl.1'])
+    ]
+    root = os.path.dirname(os.path.abspath(__file__))
+    data_files = []
+    for dirname, files in files_spec:
+        resfiles = []
+        for fn in files:
+            if not os.path.exists(fn):
+                warnings.warn('Skipping file %s since it is not present. Type  make  to build all automatically generated files.' % fn)
+            else:
+                resfiles.append(fn)
+        data_files.append((dirname, resfiles))
+
      params = {
-        'data_files': [  # Installing system-wide would require sudo...
-            ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
-            ('share/doc/youtube_dl', ['README.txt']),
-            ('share/man/man1', ['youtube-dl.1'])
-        ]
+        'data_files': data_files,
      }
      if setuptools_available:
          params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 294fccb44ee356904e355d3520e9dc085c249eaa..08cf2f93407d8e22aef1294ad8226bcd366da147 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -40,6 +40,7 @@ __authors__  = (
      'Michael Orlitzky',
      'Chris Gahan',
      'Saimadhav Heblikar',
+    'Mike Col',
  )
  
  __license__ = 'Public Domain'
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py

index 0d9eb0001e774bdf75ccdf31c636947dfeecd330..aaa92bc758e59fce78265ebf01f87236c540e326 100644 (file)
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  from .common import FileDownloader
  from .hls import HlsFD
  from .http import HttpFD
@@ -12,10 +14,11 @@ from ..utils import (
  def get_suitable_downloader(info_dict):
      """Get the downloader class that can handle the info dict."""
      url = info_dict['url']
+    protocol = info_dict.get('protocol')
  
      if url.startswith('rtmp'):
          return RtmpFD
-    if determine_ext(url) == u'm3u8':
+    if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
          return HlsFD
      if url.startswith('mms') or url.startswith('rtsp'):
          return MplayerFD
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index ba3d4ac0eb9e75a35e35104c357564a50a9682a7..192baa9b898beef852cc06cde501d0c92f9f361f 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -84,6 +84,7 @@ from .googlesearch import GoogleSearchIE
  from .hark import HarkIE
  from .hotnewhiphop import HotNewHipHopIE
  from .howcast import HowcastIE
+from .huffpost import HuffPostIE
  from .hypem import HypemIE
  from .ign import IGNIE, OneUPIE
  from .imdb import (
@@ -107,6 +108,7 @@ from .keezmovies import KeezMoviesIE
  from .khanacademy import KhanAcademyIE
  from .kickstarter import KickStarterIE
  from .keek import KeekIE
+from .la7 import LA7IE
  from .liveleak import LiveLeakIE
  from .livestream import LivestreamIE, LivestreamOriginalIE
  from .lynda import (
@@ -114,6 +116,7 @@ from .lynda import (
      LyndaCourseIE
  )
  from .macgamestore import MacGameStoreIE
+from .malemotion import MalemotionIE
  from .mdr import MDRIE
  from .metacafe import MetacafeIE
  from .metacritic import MetacriticIE
@@ -220,7 +223,6 @@ from .vine import VineIE
  from .viki import VikiIE
  from .vk import VKIE
  from .wat import WatIE
-from .websurg import WeBSurgIE
  from .weibo import WeiboIE
  from .wimp import WimpIE
  from .wistia import WistiaIE
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 3cf742a3b6f49751ee4827a7d1f5cde3a59a884c..f7478d4598e8f15ea802ec9451aff53ce452fe7a 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -71,7 +71,7 @@ class InfoExtractor(object):
                      * player_url SWF Player URL (used for rtmpdump).
                      * protocol   The protocol that will be used for the actual
                                   download, lower-case.
-                                 "http", "https", "rtsp", "rtmp" or so.
+                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
                      * preference Order number of this format. If this field is
                                   present and not None, the formats get sorted
                                   by this field.
@@ -466,6 +466,9 @@ class InfoExtractor(object):
          return RATING_TABLE.get(rating.lower(), None)
  
      def _sort_formats(self, formats):
+        if not formats:
+            raise ExtractorError(u'No video formats found')
+
          def _formats_key(f):
              # TODO remove the following workaround
              from ..utils import determine_ext
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index e1933837d143ccec1550b2d6b9c7595c23096b67..829e5894fafc5f2f8a54c033d211808485cad4af 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -332,10 +332,16 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded Facebook player
          mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'Facebook')
  
+        # Look for embedded Huffington Post player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'HuffPost')
+
          # Start with something easy: JW Player in SWFObject
          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
          if mobj is None:
diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py

new file mode 100644 (file)

index 0000000..0d1ea68
--- /dev/null
+++ b/youtube_dl/extractor/huffpost.py
@@ -0,0 +1,82 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    unified_strdate,
+)
+
+
+class HuffPostIE(InfoExtractor):
+    IE_DESC = 'Huffington Post'
+    _VALID_URL = r'''(?x)
+        https?://(embed\.)?live\.huffingtonpost\.com/
+        (?:
+            r/segment/[^/]+/|
+            HPLEmbedPlayer/\?segmentId=
+        )
+        (?P<id>[0-9a-f]+)'''
+
+    _TEST = {
+        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
+        'file': '52dd3e4b02a7602131000677.mp4',
+        'md5': '55f5e8981c1c80a64706a44b74833de8',
+        'info_dict': {
+            'title': 'Legalese It! with @MikeSacksHP',
+            'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
+            'duration': 1549,
+            'upload_date': '20140124',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
+        data = self._download_json(api_url, video_id)['data']
+
+        video_title = data['title']
+        duration = parse_duration(data['running_time'])
+        upload_date = unified_strdate(data['schedule']['starts_at'])
+        description = data.get('description')
+
+        thumbnails = []
+        for url in data['images'].values():
+            m = re.match('.*-([0-9]+x[0-9]+)\.', url)
+            if not m:
+                continue
+            thumbnails.append({
+                'url': url,
+                'resolution': m.group(1),
+            })
+
+        formats = [{
+            'format': key,
+            'format_id': key.replace('/', '.'),
+            'ext': 'mp4',
+            'url': url,
+            'vcodec': 'none' if key.startswith('audio/') else None,
+        } for key, url in data['sources']['live'].items()]
+        if data.get('fivemin_id'):
+            fid = data['fivemin_id']
+            fcat = str(int(fid) // 100 + 1)
+            furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
+            formats.append({
+                'format': 'fivemin',
+                'url': furl,
+                'preference': 1,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'description': description,
+            'formats': formats,
+            'duration': duration,
+            'upload_date': upload_date,
+            'thumbnails': thumbnails,
+        }
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py

new file mode 100644 (file)

index 0000000..6d61f9a
--- /dev/null
+++ b/youtube_dl/extractor/la7.py
@@ -0,0 +1,62 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+)
+
+
+class LA7IE(InfoExtractor):
+    IE_NAME = 'la7.tv'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?la7\.tv/
+        (?:
+            richplayer/\?assetid=|
+            \?contentId=
+        )
+        (?P<id>[0-9]+)'''
+
+    _TEST = {
+        'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
+        'file': '50355319.mp4',
+        'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
+        'info_dict': {
+            'title': 'IL DIVO',
+            'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',
+            'duration': 6254,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
+        doc = self._download_xml(xml_url, video_id)
+
+        video_title = doc.find('title').text
+        description = doc.find('description').text
+        duration = parse_duration(doc.find('duration').text)
+        thumbnail = doc.find('img').text
+        view_count = int(doc.find('views').text)
+
+        prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:')
+
+        formats = [{
+            'format': vnode.find('quality').text,
+            'tbr': int(vnode.find('quality').text),
+            'url': vnode.find('fms').text.strip().replace('mp4:', prefix),
+        } for vnode in doc.findall('.//videos/video')]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+            'view_count': view_count,
+        }
diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py

new file mode 100644 (file)

index 0000000..62e9909
--- /dev/null
+++ b/youtube_dl/extractor/malemotion.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+)
+
+class MalemotionIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
+    _TEST = {
+        'url': 'http://malemotion.com/video/bien-dur.10ew',
+        'file': '10ew.mp4',
+        'md5': 'b3cc49f953b107e4a363cdff07d100ce',
+        'info_dict': {
+            "title": "Bien dur",
+            "age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group("id")
+
+        webpage = self._download_webpage(url, video_id)
+
+        self.report_extraction(video_id)
+
+        # Extract video URL
+        video_url = compat_urllib_parse.unquote(
+            self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
+
+        # Extract title
+        video_title = self._html_search_regex(
+            r'<title>(.*?)</title', webpage, 'title')
+
+        # Extract video thumbnail
+        video_thumbnail = self._search_regex(
+            r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
+
+        formats = [{
+            'url': video_url,
+            'ext': 'mp4',
+            'format_id': 'mp4',
+            'preference': 1,
+        }]
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'uploader': None,
+            'upload_date': None,
+            'title': video_title,
+            'thumbnail': video_thumbnail,
+            'description': None,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py

index ad5840ca2f4b8f2fe82c46cca261af86716f25dc..f7bc77c48cbc70cda5191dcca6da099f423771fc 100644 (file)
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
@@ -9,11 +11,11 @@ from ..utils import (
  class TumblrIE(InfoExtractor):
      _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
      _TEST = {
-        u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
-        u'file': u'54196191430.mp4',
-        u'md5': u'479bb068e5b16462f5176a6828829767',
-        u'info_dict': {
-            u"title": u"tatiana maslany news"
+        'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
+        'file': '54196191430.mp4',
+        'md5': '479bb068e5b16462f5176a6828829767',
+        'info_dict': {
+            "title": "tatiana maslany news"
          }
      }
  
@@ -28,18 +30,20 @@ class TumblrIE(InfoExtractor):
          re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
          video = re.search(re_video, webpage)
          if video is None:
-           raise ExtractorError(u'Unable to extract video')
+            raise ExtractorError('Unable to extract video')
          video_url = video.group('video_url')
          ext = video.group('ext')
  
-        video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
-            webpage, u'thumbnail', fatal=False)  # We pick the first poster
-        if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
+        video_thumbnail = self._search_regex(
+            r'posters.*?\[\\x22(.*?)\\x22',
+            webpage, 'thumbnail', fatal=False)  # We pick the first poster
+        if video_thumbnail:
+            video_thumbnail = video_thumbnail.replace('\\\\/', '/')
  
          # The only place where you can get a title, it's not complete,
          # but searching in other places doesn't work for all videos
          video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
-            webpage, u'title', flags=re.DOTALL)
+            webpage, 'title', flags=re.DOTALL)
  
          return [{'id': video_id,
                   'url': video_url,
diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py

deleted file mode 100644 (file)

index 43953bf..0000000
--- a/youtube_dl/extractor/websurg.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# coding: utf-8
-
-import re
-
-from ..utils import (
-    compat_urllib_request,
-    compat_urllib_parse
-)
-
-from .common import InfoExtractor
-
-class WeBSurgIE(InfoExtractor):
-    IE_NAME = u'websurg.com'
-    _VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)'
-
-    _TEST = {
-        u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012',
-        u'file': u'vd01en4012.mp4',
-        u'params': {
-            u'skip_download': True,
-        },
-        u'skip': u'Requires login information',
-    }
-    
-    _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1'
-
-    def _real_initialize(self):
-
-        login_form = {
-            'username': self._downloader.params['username'],
-            'password': self._downloader.params['password'],
-            'Submit': 1
-        }
-        
-        request = compat_urllib_request.Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
-        request.add_header(
-            'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8')
-        compat_urllib_request.urlopen(request).info()
-        webpage = self._download_webpage(self._LOGIN_URL, '', 'Logging in')
-        
-        if webpage != 'OK':
-            self._downloader.report_error(
-                u'Unable to log in: bad username/password')
-        
-    def _real_extract(self, url):
-        video_id = re.match(self._VALID_URL, url).group(1)
-        
-        webpage = self._download_webpage(url, video_id)
-        
-        url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage)
-        
-        return {'id': video_id,
-                'title': self._og_search_title(webpage),
-                'description': self._og_search_description(webpage),
-                'ext' : 'mp4',
-                'url' : url_info.group(1) + '/' + url_info.group(2),
-                'thumbnail': self._og_search_thumbnail(webpage)
-                }
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 0701961a50a204626fdec7ec61c3753b0faa5c55..dd3c37007efb2407b366e29c32365c995115814c 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2014.01.23.4'
+__version__ = '2014.01.27.1'
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Mon, 27 Jan 2014 11:21:00 +0000 (12:21 +0100)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Mon, 27 Jan 2014 11:21:00 +0000 (12:21 +0100)
setup.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/downloader/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/huffpost.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/la7.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/malemotion.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tumblr.py		patch \| blob \| history
youtube_dl/extractor/websurg.py	[deleted file]	patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history