[youtube] fix hd720 format position

[youtube-dl] / youtube_dl / extractor / tvplay.py
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py

index 3d4c576c8f146753767ce9a9d763c716bdf6b1cb..e09b5f804d897954f4488344d27beaa8a7a2eea6 100644 (file)
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -5,29 +5,43 @@ import re
  
  from .common import InfoExtractor
  from ..compat import (
+    compat_HTTPError,
      compat_str,
      compat_urlparse,
  )
  from ..utils import (
+    determine_ext,
+    ExtractorError,
+    int_or_none,
      parse_iso8601,
      qualities,
-    determine_ext,
+    smuggle_url,
+    try_get,
+    unsmuggle_url,
      update_url_query,
-    int_or_none,
  )
  
  
  class TVPlayIE(InfoExtractor):
-    IE_DESC = 'TV3Play and related services'
-    _VALID_URL = r'''(?x)https?://(?:www\.)?
-        (?:tvplay(?:\.skaties)?\.lv/parraides|
-           (?:tv3play|play\.tv3)\.lt/programos|
-           tv3play(?:\.tv3)?\.ee/sisu|
-           tv(?:3|6|8|10)play\.se/program|
-           (?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer|
-           play\.novatv\.bg/programi
-        )/[^/]+/(?P<id>\d+)
-        '''
+    IE_NAME = 'mtg'
+    IE_DESC = 'MTG services'
+    _VALID_URL = r'''(?x)
+                    (?:
+                        mtg:|
+                        https?://
+                            (?:www\.)?
+                            (?:
+                                tvplay(?:\.skaties)?\.lv/parraides|
+                                (?:tv3play|play\.tv3)\.lt/programos|
+                                tv3play(?:\.tv3)?\.ee/sisu|
+                                (?:tv(?:3|6|8|10)play|viafree)\.se/program|
+                                (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
+                                play\.novatv\.bg/programi
+                            )
+                            /(?:[^/]+/)+
+                        )
+                        (?P<id>\d+)
+                    '''
      _TESTS = [
          {
              'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
@@ -192,26 +206,51 @@ class TVPlayIE(InfoExtractor):
              'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
              'only_matching': True,
          },
+        {
+            # views is null
+            'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
+            'only_matching': True,
+        },
          {
              'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
              'only_matching': True,
+        },
+        {
+            'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
+            'only_matching': True,
+        },
+        {
+            'url': 'mtg:418113',
+            'only_matching': True,
          }
      ]
  
      def _real_extract(self, url):
-        video_id = self._match_id(url)
+        url, smuggled_data = unsmuggle_url(url, {})
+        self._initialize_geo_bypass({
+            'countries': smuggled_data.get('geo_countries'),
+        })
  
+        video_id = self._match_id(url)
+        geo_country = self._search_regex(
+            r'https?://[^/]+\.([a-z]{2})', url,
+            'geo country', default=None)
+        if geo_country:
+            self._initialize_geo_bypass({'countries': [geo_country.upper()]})
          video = self._download_json(
-            'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
+            'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
  
          title = video['title']
  
-        if video.get('is_geo_blocked'):
-            self.report_warning(
-                'This content might not be available in your country due to copyright reasons')
-
-        streams = self._download_json(
-            'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
+        try:
+            streams = self._download_json(
+                'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id,
+                video_id, 'Downloading streams JSON')
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                msg = self._parse_json(e.cause.read().decode('utf-8'), video_id)
+                raise ExtractorError(msg['msg'], expected=True)
+            raise
  
          quality = qualities(['hls', 'medium', 'high'])
          formats = []
@@ -236,6 +275,8 @@ class TVPlayIE(InfoExtractor):
                      'ext': ext,
                  }
                  if video_url.startswith('rtmp'):
+                    if smuggled_data.get('skip_rtmp'):
+                        continue
                      m = re.search(
                          r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
                      if not m:
@@ -251,6 +292,11 @@ class TVPlayIE(InfoExtractor):
                          'url': video_url,
                      })
                  formats.append(fmt)
+
+        if not formats and video.get('is_geo_blocked'):
+            self.raise_geo_restricted(
+                'This content might not be available in your country due to copyright reasons')
+
          self._sort_formats(formats)
  
          # TODO: webvtt in m3u8
@@ -279,8 +325,123 @@ class TVPlayIE(InfoExtractor):
              'season_number': season_number,
              'duration': int_or_none(video.get('duration')),
              'timestamp': parse_iso8601(video.get('created_at')),
-            'view_count': int_or_none(video.get('views', {}).get('total')),
+            'view_count': try_get(video, lambda x: x['views']['total'], int),
              'age_limit': int_or_none(video.get('age_limit', 0)),
              'formats': formats,
              'subtitles': subtitles,
          }
+
+
+class ViafreeIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        viafree\.
+                        (?:
+                            (?:dk|no)/programmer|
+                            se/program
+                        )
+                        /(?:[^/]+/)+(?P<id>[^/?#&]+)
+                    '''
+    _TESTS = [{
+        'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
+        'info_dict': {
+            'id': '395375',
+            'ext': 'mp4',
+            'title': 'Husräddarna S02E02',
+            'description': 'md5:4db5c933e37db629b5a2f75dfb34829e',
+            'series': 'Husräddarna',
+            'season': 'Säsong 2',
+            'season_number': 2,
+            'duration': 2576,
+            'timestamp': 1400596321,
+            'upload_date': '20140520',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [TVPlayIE.ie_key()],
+    }, {
+        # with relatedClips
+        'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1',
+        'info_dict': {
+            'id': '758770',
+            'ext': 'mp4',
+            'title': 'Sommaren med YouTube-stjärnorna S01E01',
+            'description': 'md5:2bc69dce2c4bb48391e858539bbb0e3f',
+            'series': 'Sommaren med YouTube-stjärnorna',
+            'season': 'Säsong 1',
+            'season_number': 1,
+            'duration': 1326,
+            'timestamp': 1470905572,
+            'upload_date': '20160811',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [TVPlayIE.ie_key()],
+    }, {
+        # Different og:image URL schema
+        'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        data = self._parse_json(
+            self._search_regex(
+                r'(?s)window\.App\s*=\s*({.+?})\s*;\s*</script',
+                webpage, 'data', default='{}'),
+            video_id, transform_source=lambda x: re.sub(
+                r'(?s)function\s+[a-zA-Z_][\da-zA-Z_]*\s*\([^)]*\)\s*{[^}]*}\s*',
+                'null', x), fatal=False)
+
+        video_id = None
+
+        if data:
+            video_id = try_get(
+                data, lambda x: x['context']['dispatcher']['stores'][
+                    'ContentPageProgramStore']['currentVideo']['id'],
+                compat_str)
+
+        # Fallback #1 (extract from og:image URL schema)
+        if not video_id:
+            thumbnail = self._og_search_thumbnail(webpage, default=None)
+            if thumbnail:
+                video_id = self._search_regex(
+                    # Patterns seen:
+                    #  http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/inbox/765166/a2e95e5f1d735bab9f309fa345cc3f25.jpg
+                    #  http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/seasons/15204/758770/4a5ba509ca8bc043e1ebd1a76131cdf2.jpg
+                    r'https?://[^/]+/imagecache/(?:[^/]+/)+(\d{6,})/',
+                    thumbnail, 'video id', default=None)
+
+        # Fallback #2. Extract from raw JSON string.
+        # May extract wrong video id if relatedClips is present.
+        if not video_id:
+            video_id = self._search_regex(
+                r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
+                webpage, 'video id')
+
+        return self.url_result(
+            smuggle_url(
+                'mtg:%s' % video_id,
+                {
+                    'geo_countries': [
+                        compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]],
+                    # rtmp host mtgfs.fplive.net for viafree is unresolvable
+                    'skip_rtmp': True,
+                }),
+            ie=TVPlayIE.ie_key(), video_id=video_id)