Merge pull request #9595 from Kagami/vlive-site-update
authorYen Chi Hsuan <yan12125@gmail.com>
Wed, 25 May 2016 11:24:15 +0000 (19:24 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Wed, 25 May 2016 11:24:15 +0000 (19:24 +0800)
[vlive] Address site update

16 files changed:
AUTHORS
youtube_dl/extractor/byutv.py
youtube_dl/extractor/eporner.py
youtube_dl/extractor/espn.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/eyedotv.py [new file with mode: 0644]
youtube_dl/extractor/formula1.py
youtube_dl/extractor/groupon.py
youtube_dl/extractor/howcast.py
youtube_dl/extractor/ooyala.py
youtube_dl/extractor/radiocanada.py [new file with mode: 0644]
youtube_dl/extractor/teachingchannel.py
youtube_dl/extractor/veoh.py
youtube_dl/extractor/vice.py
youtube_dl/extractor/voxmedia.py
youtube_dl/extractor/xhamster.py

diff --git a/AUTHORS b/AUTHORS
index 5ca71ace713555e978b7cc5615a667ce1d57da16..3272fc6ea77178598579364f1070857f8d5bf6fd 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -172,3 +172,4 @@ blahgeek
 Kevin Deldycke
 inondle
 Tomáš Čech
+Déstin Reed
index dda98059e9041c651de5a211fccb2c106b11bb75..3aec601f8e7179570088e1ea5ad1f7b6d30f219d 100644 (file)
@@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
     _TEST = {
         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
+        'md5': '05850eb8c749e2ee05ad5a1c34668493',
         'info_dict': {
             'id': 'studio-c-season-5-episode-5',
             'ext': 'mp4',
@@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'add_ie': ['Ooyala'],
     }
 
     def _real_extract(self, url):
index e006921ec3f8d2a0aff0e6bb0595148469b1c256..581276694dcb996162f7ea13e010330a1b8888e0 100644 (file)
@@ -11,8 +11,8 @@ from ..utils import (
 
 
 class EpornerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
+    _TESTS = [{
         'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
         'md5': '39d486f046212d8e1b911c52ab4691f8',
         'info_dict': {
@@ -23,8 +23,22 @@ class EpornerIE(InfoExtractor):
             'duration': 1838,
             'view_count': int,
             'age_limit': 18,
-        }
-    }
+        },
+    },
+    # New (May 2016) URL layout
+    {
+        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
+        'md5': '3469eeaa93b6967a34cdbdbb9d064b33',
+        'info_dict': {
+            'id': '3YRUtzMcWn0',
+            'display_id': 'Star-Wars-XXX-Parody',
+            'ext': 'mp4',
+            'title': 'Star Wars XXX Parody',
+            'duration': 361.0,
+            'view_count': int,
+            'age_limit': 18,
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index db4b263bcbf40a9cb133d2a9729e4fe07292bae3..66c08bec47d8aa639cf758bb3e083b9772230c76 100644 (file)
@@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
     _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://espn.go.com/video/clip?id=10365079',
+        'md5': '60e5d097a523e767d06479335d1bdc58',
         'info_dict': {
             'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
             'ext': 'mp4',
@@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
             'description': None,
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
+        'add_ie': ['OoyalaExternal'],
     }, {
         # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
         'url': 'http://espn.go.com/video/clip?id=2743663',
+        'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
         'info_dict': {
             'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
             'ext': 'mp4',
             'title': 'Must-See Moments: Best of the MLS season',
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
+        'add_ie': ['OoyalaExternal'],
     }, {
         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
         'only_matching': True,
index d8b3170ba8720671c1664730c02a9ea229e887df..05561149a72463a5cf7b8d717b0b39b16b0ec288 100644 (file)
@@ -231,6 +231,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
 from .expotv import ExpoTVIE
 from .extremetube import ExtremeTubeIE
+from .eyedotv import EyedoTVIE
 from .facebook import FacebookIE
 from .faz import FazIE
 from .fc2 import FC2IE
@@ -617,6 +618,10 @@ from .qqmusic import (
     QQMusicPlaylistIE,
 )
 from .r7 import R7IE
+from .radiocanada import (
+    RadioCanadaIE,
+    RadioCanadaAudioVideoIE,
+)
 from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
diff --git a/youtube_dl/extractor/eyedotv.py b/youtube_dl/extractor/eyedotv.py
new file mode 100644 (file)
index 0000000..2f30351
--- /dev/null
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    parse_duration,
+    ExtractorError,
+)
+
+
+class EyedoTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
+        'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
+        'info_dict': {
+            'id': '16301',
+            'ext': 'mp4',
+            'title': 'Journée du conseil scientifique de l\'Afnic 2015',
+            'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
+            'uploader': 'Afnic Live',
+            'uploader_id': '8023',
+        }
+    }
+    _ROOT_URL = 'http://live.eyedo.net:1935/'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
+
+        def _add_ns(path):
+            return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
+
+        title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
+        state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
+        if state_live_code == 'avenir':
+            raise ExtractorError(
+                '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
+                expected=True)
+
+        is_live = state_live_code == 'live'
+        m3u8_url = None
+        # http://eyedo.tv/Content/Html5/Scripts/html5view.js
+        if is_live:
+            if xpath_text(video_data, 'Cdn') == 'true':
+                m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
+            else:
+                m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
+        else:
+            m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
+            'description': xpath_text(video_data, _add_ns('Description')),
+            'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
+            'uploader': xpath_text(video_data, _add_ns('Createur')),
+            'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
+            'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
+            'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
+        }
index 726393fccf7dbbde385887d3aba96a58d175a3a6..322c41e5afcb20935f4f7841ccb03303d5476f65 100644 (file)
@@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor):
             'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
             'ext': 'flv',
             'title': 'Race highlights - Spain 2016',
-        }
+        },
+        'add_ie': ['Ooyala'],
     }
 
     def _real_extract(self, url):
index 1dd0a81cca13a8a44e5e25e2245cce9563399b70..a6da909310a5591fe39a68244142a46fb24ce65d 100644 (file)
@@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor):
             'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
         },
         'playlist': [{
+            'md5': '42428ce8a00585f9bc36e49226eae7a1',
             'info_dict': {
                 'id': 'fk6OhWpXgIQ',
                 'ext': 'mp4',
@@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor):
                 'uploader_id': 'groupon',
                 'uploader': 'Groupon',
             },
+            'add_ie': ['Youtube'],
         }],
         'params': {
             'skip_download': True,
-        }
+        },
     }
 
     _PROVIDERS = {
index e8f51e545bfd2b89a251e1a4fbbeefe80aa371f9..7e36b85ad586984dfb761e4518b23d2b4a074bf7 100644 (file)
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
-        'md5': '8b743df908c42f60cf6496586c7f12c3',
+        'md5': '7d45932269a288149483144f01b99789',
         'info_dict': {
             'id': '390161',
             'ext': 'mp4',
@@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
             'duration': 56.823,
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
+        'add_ie': ['Ooyala'],
     }
 
     def _real_extract(self, url):
index 09bc291f028964a2ac36818e5ef25725ddf6fad9..2038a6ba5001283e786905a23c429d2418762515 100644 (file)
@@ -8,6 +8,7 @@ from ..utils import (
     float_or_none,
     ExtractorError,
     unsmuggle_url,
+    determine_ext,
 )
 from ..compat import compat_urllib_parse_urlencode
 
@@ -37,26 +38,27 @@ class OoyalaBaseIE(InfoExtractor):
         formats = []
         if cur_auth_data['authorized']:
             for stream in cur_auth_data['streams']:
-                url = base64.b64decode(
+                s_url = base64.b64decode(
                     stream['url']['data'].encode('ascii')).decode('utf-8')
-                if url in urls:
+                if s_url in urls:
                     continue
-                urls.append(url)
+                urls.append(s_url)
+                ext = determine_ext(s_url, None)
                 delivery_type = stream['delivery_type']
-                if delivery_type == 'hls' or '.m3u8' in url:
+                if delivery_type == 'hls' or ext == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
-                        url, embed_code, 'mp4', 'm3u8_native',
+                        s_url, embed_code, 'mp4', 'm3u8_native',
                         m3u8_id='hls', fatal=False))
-                elif delivery_type == 'hds' or '.f4m' in url:
+                elif delivery_type == 'hds' or ext == 'f4m':
                     formats.extend(self._extract_f4m_formats(
-                        url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
-                elif '.smil' in url:
+                        s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
+                elif ext == 'smil':
                     formats.extend(self._extract_smil_formats(
-                        url, embed_code, fatal=False))
+                        s_url, embed_code, fatal=False))
                 else:
                     formats.append({
-                        'url': url,
-                        'ext': stream.get('delivery_type'),
+                        'url': s_url,
+                        'ext': ext or stream.get('delivery_type'),
                         'vcodec': stream.get('video_codec'),
                         'format_id': delivery_type,
                         'width': int_or_none(stream.get('width')),
diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py
new file mode 100644 (file)
index 0000000..4f05bbd
--- /dev/null
@@ -0,0 +1,130 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    find_xpath_attr,
+    determine_ext,
+    int_or_none,
+    unified_strdate,
+    xpath_element,
+    ExtractorError,
+)
+
+
+class RadioCanadaIE(InfoExtractor):
+    IE_NAME = 'radiocanada'
+    _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
+        'info_dict': {
+            'id': '7184272',
+            'ext': 'flv',
+            'title': 'Le parcours du tireur capté sur vidéo',
+            'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
+            'upload_date': '20141023',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        app_code, video_id = re.match(self._VALID_URL, url).groups()
+
+        formats = []
+        # TODO: extract m3u8 and f4m formats
+        # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
+        # f4m formats can be extracted using flashhd device_type but they produce unplayable file
+        for device_type in ('flash',):
+            v_data = self._download_xml(
+                'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
+                video_id, note='Downloading %s XML' % device_type, query={
+                    'appCode': app_code,
+                    'idMedia': video_id,
+                    'connectionType': 'broadband',
+                    'multibitrate': 'true',
+                    'deviceType': device_type,
+                    # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
+                    'paysJ391wsHjbOJwvCs26toz': 'CA',
+                    'bypasslock': 'NZt5K62gRqfc',
+                })
+            v_url = xpath_text(v_data, 'url')
+            if not v_url:
+                continue
+            if v_url == 'null':
+                raise ExtractorError('%s said: %s' % (
+                    self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
+            ext = determine_ext(v_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
+            else:
+                ext = determine_ext(v_url)
+                bitrates = xpath_element(v_data, 'bitrates')
+                for url_e in bitrates.findall('url'):
+                    tbr = int_or_none(url_e.get('bitrate'))
+                    if not tbr:
+                        continue
+                    formats.append({
+                        'format_id': 'rtmp-%d' % tbr,
+                        'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
+                        'ext': 'flv',
+                        'protocol': 'rtmp',
+                        'width': int_or_none(url_e.get('width')),
+                        'height': int_or_none(url_e.get('height')),
+                        'tbr': tbr,
+                    })
+        self._sort_formats(formats)
+
+        metadata = self._download_xml(
+            'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
+            video_id, note='Downloading metadata XML', query={
+                'appCode': app_code,
+                'idMedia': video_id,
+            })
+
+        def get_meta(name):
+            el = find_xpath_attr(metadata, './/Meta', 'name', name)
+            return el.text if el is not None else None
+
+        return {
+            'id': video_id,
+            'title': get_meta('Title'),
+            'description': get_meta('Description') or get_meta('ShortDescription'),
+            'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
+            'duration': int_or_none(get_meta('length')),
+            'series': get_meta('Emission'),
+            'season_number': int_or_none('SrcSaison'),
+            'episode_number': int_or_none('SrcEpisode'),
+            'upload_date': unified_strdate(get_meta('Date')),
+            'formats': formats,
+        }
+
+
+class RadioCanadaAudioVideoIE(InfoExtractor):
+    'radiocanada:audiovideo'
+    _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
+        'info_dict': {
+            'id': '7527184',
+            'ext': 'flv',
+            'title': 'Barack Obama au Vietnam',
+            'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
+            'upload_date': '20160523',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
index e0477382ceabea0769bd0575ceb1f350ce8c0911..d14d93e3ab1ae87902dc275e1208964a86b6b840 100644 (file)
@@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):
 
     _TEST = {
         'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+        'md5': '3d6361864d7cac20b57c8784da17166f',
         'info_dict': {
             'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
             'ext': 'mp4',
@@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
             'duration': 422.255,
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
+        'add_ie': ['Ooyala'],
     }
 
     def _real_extract(self, url):
index 23ce0a0d1929febac87f789374d8411d7b7ddd00..0f5d6873808ed2dce5cde2e6239b6973cf809367 100644 (file)
@@ -37,6 +37,7 @@ class VeohIE(InfoExtractor):
                 'uploader': 'afp-news',
                 'duration': 123,
             },
+            'skip': 'This video has been deleted.',
         },
         {
             'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
index 95daf4dfdf2155dbbab26f2896cf3c42e0f33e2f..e2b2ce0981cc8767ade2f5ef4c8bc52759b86af3 100644 (file)
@@ -11,12 +11,14 @@ class ViceIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
+        'md5': 'e9d77741f9e42ba583e683cd170660f7',
         'info_dict': {
             'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
             'ext': 'flv',
             'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
             'duration': 725.983,
         },
+        'add_ie': ['Ooyala'],
     }, {
         'url': 'http://www.vice.com/video/how-to-hack-a-car',
         'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
@@ -29,6 +31,7 @@ class ViceIE(InfoExtractor):
             'uploader': 'Motherboard',
             'upload_date': '20140529',
         },
+        'add_ie': ['Youtube'],
     }, {
         'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
         'only_matching': True,
index 9d73600aa8b752985f3b3b2c4375af343e1c3d6b..b1b32ad44ecfd796e46219a87ea71caa3587face 100644 (file)
@@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Google\'s new material design direction',
             'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
-        }
+        },
+        'add_ie': ['Ooyala'],
     }, {
         # data-ooyala-id
         'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
@@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'The Nexus 6: hands-on with Google\'s phablet',
             'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
-        }
+        },
+        'add_ie': ['Ooyala'],
     }, {
         # volume embed
         'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
@@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'The new frontier of LGBTQ civil rights, explained',
             'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
-        }
+        },
+        'add_ie': ['Ooyala'],
     }, {
         # youtube embed
         'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
@@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor):
             'upload_date': '20160324',
             'uploader_id': 'voxdotcom',
             'uploader': 'Vox',
-        }
+        },
+        'add_ie': ['Youtube'],
     }, {
         # SBN.VideoLinkset.entryGroup multiple ooyala embeds
         'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
index b3547174dd92beffafaf8f220b50b94a25f2fa2b..bd8e1af2e0f6c25fc44aea36c23b813b092b4438 100644 (file)
@@ -12,37 +12,52 @@ from ..utils import (
 
 
 class XHamsterIE(InfoExtractor):
-    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
-    _TESTS = [
-        {
-            'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
-            'info_dict': {
-                'id': '1509445',
-                'ext': 'mp4',
-                'title': 'FemaleAgent Shy beauty takes the bait',
-                'upload_date': '20121014',
-                'uploader': 'Ruseful2011',
-                'duration': 893.52,
-                'age_limit': 18,
-            }
+    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
+    _TESTS = [{
+        'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
+        'md5': '8281348b8d3c53d39fffb377d24eac4e',
+        'info_dict': {
+            'id': '1509445',
+            'ext': 'mp4',
+            'title': 'FemaleAgent Shy beauty takes the bait',
+            'upload_date': '20121014',
+            'uploader': 'Ruseful2011',
+            'duration': 893.52,
+            'age_limit': 18,
         },
-        {
-            'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
-            'info_dict': {
-                'id': '2221348',
-                'ext': 'mp4',
-                'title': 'Britney Spears  Sexy Booty',
-                'upload_date': '20130914',
-                'uploader': 'jojo747400',
-                'duration': 200.48,
-                'age_limit': 18,
-            }
+    }, {
+        'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+        'info_dict': {
+            'id': '2221348',
+            'ext': 'mp4',
+            'title': 'Britney Spears  Sexy Booty',
+            'upload_date': '20130914',
+            'uploader': 'jojo747400',
+            'duration': 200.48,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # empty seo
+        'url': 'http://xhamster.com/movies/5667973/.html',
+        'info_dict': {
+            'id': '5667973',
+            'ext': 'mp4',
+            'title': '....',
+            'upload_date': '20160208',
+            'uploader': 'parejafree',
+            'duration': 72.0,
+            'age_limit': 18,
         },
-        {
-            'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
-            'only_matching': True,
+        'params': {
+            'skip_download': True,
         },
-    ]
+    }, {
+        'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         def extract_video_url(webpage, name):
@@ -170,7 +185,7 @@ class XHamsterEmbedIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._search_regex(
-            r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
+            r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
             webpage, 'xhamster url', default=None)
 
         if not video_url: