Merge branch 'polskie-radio-programme' of https://github.com/JakubAdamWieczorek/youtu...
[youtube-dl] / youtube_dl / extractor / globo.py
index 6c0fc54dee86418f9499c15c31cca2359fd9cbe6..5638be48fbc41f179f53be64f6698be8404baf47 100644 (file)
@@ -19,7 +19,7 @@ from ..utils import (
 
 
 class GloboIE(InfoExtractor):
-    _VALID_URL = '(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
+    _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
 
     _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
     _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
@@ -60,9 +60,12 @@ class GloboIE(InfoExtractor):
     }, {
         'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
         'only_matching': True,
+    }, {
+        'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
+        'only_matching': True,
     }]
 
-    class MD5:
+    class MD5(object):
         HEX_FORMAT_LOWERCASE = 0
         HEX_FORMAT_UPPERCASE = 1
         BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = ''
@@ -366,11 +369,9 @@ class GloboIE(InfoExtractor):
             resource_url = resource['url']
             signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
             if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
-                m3u8_formats = self._extract_m3u8_formats(
+                formats.extend(self._extract_m3u8_formats(
                     signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False)
-                if m3u8_formats:
-                    formats.extend(m3u8_formats)
+                    m3u8_id='hls', fatal=False))
             else:
                 formats.append({
                     'url': signed_url,
@@ -395,17 +396,17 @@ class GloboIE(InfoExtractor):
 
 
 class GloboArticleIE(InfoExtractor):
-    _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)(?:\.html)?'
 
     _VIDEOID_REGEXES = [
         r'\bdata-video-id=["\'](\d{7,})',
         r'\bdata-player-videosids=["\'](\d{7,})',
-        r'\bvideosIDs\s*:\s*["\'](\d{7,})',
+        r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
         r'\bdata-id=["\'](\d{7,})',
         r'<div[^>]+\bid=["\'](\d{7,})',
     ]
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
         'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
         'info_dict': {
@@ -414,9 +415,18 @@ class GloboArticleIE(InfoExtractor):
             'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de aviĆ£o',
             'duration': 110.711,
             'uploader': 'Rede Globo',
-            'uploader_id': 196,
+            'uploader_id': '196',
         }
-    }
+    }, {
+        'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
+        'only_matching': True,
+    }]
 
     @classmethod
     def suitable(cls, url):