Merge branch 'pr-twitter' of https://github.com/atomicdryad/youtube-dl into atomicdry...
[youtube-dl] / youtube_dl / extractor / mtv.py
index d160f4a220aa0e264b1d05a4154cd83d049b4416..302c9bf35bc6bb533c10f35ea11cd500012bba0a 100644 (file)
@@ -67,7 +67,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         return [{'url': url, 'ext': 'mp4'}]
 
     def _extract_video_formats(self, mdoc, mtvn_id):
-        if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
+        if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
             if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
                 self.to_screen('The normal version is not available from your '
                                'country, trying with the mobile version')
@@ -142,7 +142,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if title_el is None:
             title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
         if title_el is None:
-            title_el = itemdoc.find('.//title')
+            title_el = itemdoc.find('.//title') or itemdoc.find('./title')
             if title_el.text is None:
                 title_el = None
 
@@ -200,7 +200,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if mgid is None or ':' not in mgid:
             mgid = self._search_regex(
                 [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
-                webpage, 'mgid')
+                webpage, 'mgid', default=None)
+
+        if not mgid:
+            sm4_embed = self._html_search_meta(
+                'sm4:video:embed', webpage, 'sm4 embed', default='')
+            mgid = self._search_regex(
+                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
 
         videos_info = self._get_videos_info(mgid)
         return videos_info
@@ -222,6 +228,13 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
         },
     }
 
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
+        if mobj:
+            return mobj.group('url')
+
     def _get_feed_url(self, uri):
         video_id = self._id_from_uri(uri)
         site_id = uri.replace(video_id, '')