Merge branch 'master' of https://github.com/linhua55/youtube-dl into linhua55-master

[youtube-dl] / youtube_dl / extractor / snagfilms.py
diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py

index cb52eb72b98040951ce1127e4739e7ee9e116cb5..6977afb27850ff908f1c08fcc4ecc672b7b5cc9f 100644 (file)
--- a/youtube_dl/extractor/snagfilms.py
+++ b/youtube_dl/extractor/snagfilms.py
@@ -4,6 +4,7 @@ import re
  
  from .common import InfoExtractor
  from ..utils import (
+    ExtractorError,
      clean_html,
      determine_ext,
      int_or_none,
@@ -22,6 +23,15 @@ class SnagFilmsEmbedIE(InfoExtractor):
              'ext': 'mp4',
              'title': '#whilewewatch',
          }
+    }, {
+        # invalid labels, 360p is better that 480p
+        'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
+        'md5': '882fca19b9eb27ef865efeeaed376a48',
+        'info_dict': {
+            'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
+            'ext': 'mp4',
+            'title': 'Life in Limbo',
+        }
      }, {
          'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
          'only_matching': True,
@@ -30,7 +40,8 @@ class SnagFilmsEmbedIE(InfoExtractor):
      @staticmethod
      def _extract_url(webpage):
          mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1',
+            webpage)
          if mobj:
              return mobj.group('url')
  
@@ -39,6 +50,10 @@ class SnagFilmsEmbedIE(InfoExtractor):
  
          webpage = self._download_webpage(url, video_id)
  
+        if '>This film is not playable in your area.<' in webpage:
+            raise ExtractorError(
+                'Film %s is not playable in your area.' % video_id, expected=True)
+
          formats = []
          for source in self._parse_json(js_to_json(self._search_regex(
                  r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
@@ -46,14 +61,15 @@ class SnagFilmsEmbedIE(InfoExtractor):
              if not file_:
                  continue
              type_ = source.get('type')
-            format_id = source.get('label')
              ext = determine_ext(file_)
-            if any(_ == 'm3u8' for _ in (type_, ext)):
+            format_id = source.get('label') or ext
+            if all(v == 'm3u8' for v in (type_, ext)):
                  formats.extend(self._extract_m3u8_formats(
                      file_, video_id, 'mp4', m3u8_id='hls'))
              else:
                  bitrate = int_or_none(self._search_regex(
-                    r'(\d+)kbps', file_, 'bitrate', default=None))
+                    [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
+                    file_, 'bitrate', default=None))
                  height = int_or_none(self._search_regex(
                      r'^(\d+)[pP]$', format_id, 'height', default=None))
                  formats.append({
@@ -76,8 +92,8 @@ class SnagFilmsEmbedIE(InfoExtractor):
  
  
  class SnagFilmsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?snagfilms\.com/films/title/(?P<id>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)'
+    _TESTS = [{
          'url': 'http://www.snagfilms.com/films/title/lost_for_life',
          'md5': '19844f897b35af219773fd63bdec2942',
          'info_dict': {
@@ -90,13 +106,38 @@ class SnagFilmsIE(InfoExtractor):
              'duration': 4489,
              'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
          }
-    }
+    }, {
+        'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
+        'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
+        'info_dict': {
+            'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
+            'display_id': 'the_world_cut_project/india',
+            'ext': 'mp4',
+            'title': 'India',
+            'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 979,
+            'categories': ['Documentary', 'Sports', 'Politics']
+        }
+    }, {
+        # Film is not playable in your area.
+        'url': 'http://www.snagfilms.com/films/title/inside_mecca',
+        'only_matching': True,
+    }, {
+        # Film is not available.
+        'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          display_id = self._match_id(url)
  
          webpage = self._download_webpage(url, display_id)
  
+        if ">Sorry, the Film you're looking for is not available.<" in webpage:
+            raise ExtractorError(
+                'Film %s is not available.' % display_id, expected=True)
+
          film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
  
          snag = self._parse_json(