[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / tvnet.py
index 0ec2da4da9361d7f10873cec63c13c0549d1ddac..4222ff9ee239fd2b55cc7771892ace9f3c59d43f 100644 (file)
@@ -4,15 +4,15 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     int_or_none,
     unescapeHTML,
+    url_or_none,
 )
 
 
 class TVNetIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)'
     _TESTS = [{
         # video
         'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h',
@@ -36,6 +36,18 @@ class TVNetIE(InfoExtractor):
             'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
             'is_live': False,
         },
+    }, {
+        'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705',
+        'info_dict': {
+            'id': '129999',
+            'ext': 'mp4',
+            'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)',
+            'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+            'is_live': False,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         # live stream
         'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1',
@@ -62,6 +74,9 @@ class TVNetIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -91,9 +106,8 @@ class TVNetIE(InfoExtractor):
         for stream in self._download_json(data_file, video_id):
             if not isinstance(stream, dict):
                 continue
-            stream_url = stream.get('url')
-            if (stream_url in stream_urls or not stream_url or
-                    not isinstance(stream_url, compat_str)):
+            stream_url = url_or_none(stream.get('url'))
+            if stream_url in stream_urls or not stream_url:
                 continue
             stream_urls.add(stream_url)
             formats.extend(self._extract_m3u8_formats(