[amcnetworks] Recognize more BBC America URLs

[youtube-dl] / youtube_dl / extractor / telebruxelles.py
diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py

index a3d05f97d681b6cb4da6adf179a4f0a5744e5123..eefecc490c5d13476259497e79f7a3ebe68caee7 100644 (file)
--- a/youtube_dl/extractor/telebruxelles.py
+++ b/youtube_dl/extractor/telebruxelles.py
@@ -1,11 +1,13 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
  # coding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  
  
  class TeleBruxellesIE(InfoExtractor):
  from .common import InfoExtractor
  
  
  class TeleBruxellesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
      _TESTS = [{
          'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
          'md5': '59439e568c9ee42fb77588b2096b214f',
      _TESTS = [{
          'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
          'md5': '59439e568c9ee42fb77588b2096b214f',
@@ -39,18 +41,18 @@ class TeleBruxellesIE(InfoExtractor):
          webpage = self._download_webpage(url, display_id)
  
          article_id = self._html_search_regex(
          webpage = self._download_webpage(url, display_id)
  
          article_id = self._html_search_regex(
-            r"<article id=\"post-(\d+)\"", webpage, 'article ID')
+            r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
          title = self._html_search_regex(
              r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
          title = self._html_search_regex(
              r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
-        description = self._og_search_description(webpage)
+        description = self._og_search_description(webpage, default=None)
  
          rtmp_url = self._html_search_regex(
  
          rtmp_url = self._html_search_regex(
-            r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"",
+            r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
              webpage, 'RTMP url')
              webpage, 'RTMP url')
-        rtmp_url = rtmp_url.replace("\" + \"", "")
+        rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
  
          return {
  
          return {
-            'id': article_id,
+            'id': article_id or display_id,
              'display_id': display_id,
              'title': title,
              'description': description,
              'display_id': display_id,
              'title': title,
              'description': description,