[generic] Detect bandcamp pages that use custom domains (closes #1662)
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sun, 27 Oct 2013 13:40:25 +0000 (14:40 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sun, 27 Oct 2013 13:40:25 +0000 (14:40 +0100)
They embed the original url in the 'og:url' property.

youtube_dl/extractor/generic.py

index ab4a5b7de5c50b7ffa5f0b30df415c61261ff606..2c8fcf5ae5df24a1dedc5e461feb5ac2300688a6 100644 (file)
@@ -41,7 +41,17 @@ class GenericIE(InfoExtractor):
                 u"uploader_id": u"skillsmatter",
                 u"uploader": u"Skills Matter",
             }
-        }
+        },
+        # bandcamp page with custom domain
+        {
+            u'url': u'http://bronyrock.com/track/the-pony-mash',
+            u'file': u'3235767654.mp3',
+            u'info_dict': {
+                u'title': u'The Pony Mash',
+                u'uploader': u'M_Pallante',
+            },
+            u'skip': u'There is a limit of 200 free downloads / month for the test song',
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -155,6 +165,12 @@ class GenericIE(InfoExtractor):
             surl = unescapeHTML(mobj.group(1))
             return self.url_result(surl, 'Youtube')
 
+        # Look for Bandcamp pages with custom domain
+        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
+        if mobj is not None:
+            burl = unescapeHTML(mobj.group(1))
+            return self.url_result(burl, 'Bandcamp')
+
         # Start with something easy: JW Player in SWFObject
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         if mobj is None: