# encoding: utf-8
from __future__ import unicode_literals
+import collections
import re
import json
import sys
get_element_by_class,
int_or_none,
orderedSet,
- parse_duration,
remove_start,
str_to_int,
unescapeHTML,
unified_strdate,
urlencode_postdata,
)
-from .vimeo import VimeoIE
+from .dailymotion import DailymotionIE
from .pladform import PladformIE
+from .vimeo import VimeoIE
class VKBaseIE(InfoExtractor):
# what actually happens.
# We will workaround this VK issue by resetting the remixlhk cookie to
# the first one manually.
- cookies = url_handle.headers.get('Set-Cookie')
- if sys.version_info[0] >= 3:
- cookies = cookies.encode('iso-8859-1')
- cookies = cookies.decode('utf-8')
- remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
- if remixlhk:
- value, domain = remixlhk.groups()
- self._set_cookie(domain, 'remixlhk', value)
+ for header, cookies in url_handle.headers.items():
+ if header.lower() != 'set-cookie':
+ continue
+ if sys.version_info[0] >= 3:
+ cookies = cookies.encode('iso-8859-1')
+ cookies = cookies.decode('utf-8')
+ remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
+ if remixlhk:
+ value, domain = remixlhk.groups()
+ self._set_cookie(domain, 'remixlhk', value)
+ break
login_page = self._download_webpage(
'https://login.vk.com/?act=login', None,
'view_count': int,
},
},
+ {
+ # dailymotion embed
+ 'url': 'https://vk.com/video-37468416_456239855',
+ 'info_dict': {
+ 'id': 'k3lz2cmXyRuJQSjGHUv',
+ 'ext': 'mp4',
+ 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
+ 'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
+ 'uploader': 'AniLibria.Tv',
+ 'upload_date': '20160914',
+ 'uploader_id': 'x1p5vl5',
+ 'timestamp': 1473877246,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
{
# video key is extra_data not url\d+
'url': 'http://vk.com/video-110305615_171782105',
m_rutube.group(1).replace('\\', ''))
return self.url_result(rutube_url)
+ dailymotion_urls = DailymotionIE._extract_urls(info_page)
+ if dailymotion_urls:
+ return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
+
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
if m_opts:
m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
'skip_download': True,
},
}],
+ 'params': {
+ 'usenetrc': True,
+ },
'skip': 'Requires vk account credentials',
}, {
# single YouTube embed, no leading -
'title': 'Sergey Gorbunov - Wall post 85155021_6319',
},
'playlist_count': 1,
+ 'params': {
+ 'usenetrc': True,
+ },
'skip': 'Requires vk account credentials',
}, {
# wall page URL
raise ExtractorError('VK said: %s' % error, expected=True)
description = clean_html(get_element_by_class('wall_post_text', webpage))
- uploader = clean_html(get_element_by_class(
- 'fw_post_author', webpage)) or self._og_search_description(webpage)
+ uploader = clean_html(get_element_by_class('author', webpage))
thumbnail = self._og_search_thumbnail(webpage)
entries = []
- for audio in re.finditer(r'''(?sx)
- <input[^>]+
- id=(?P<q1>["\'])audio_info(?P<id>\d+_\d+).*?(?P=q1)[^>]+
- value=(?P<q2>["\'])(?P<url>http.+?)(?P=q2)
- .+?
- </table>''', webpage):
- audio_html = audio.group(0)
- audio_id = audio.group('id')
- duration = parse_duration(get_element_by_class('duration', audio_html))
- track = self._html_search_regex(
- r'<span[^>]+id=["\']title%s[^>]*>([^<]+)' % audio_id,
- audio_html, 'title', default=None)
- artist = self._html_search_regex(
- r'>([^<]+)</a></b>\s*&ndash', audio_html,
- 'artist', default=None)
- entries.append({
- 'id': audio_id,
- 'url': audio.group('url'),
- 'title': '%s - %s' % (artist, track) if artist and track else audio_id,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'uploader': uploader,
- 'artist': artist,
- 'track': track,
- })
+ audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage)
+ if audio_ids:
+ al_audio = self._download_webpage(
+ 'https://vk.com/al_audio.php', post_id,
+ note='Downloading audio info', fatal=False,
+ data=urlencode_postdata({
+ 'act': 'reload_audio',
+ 'al': '1',
+ 'ids': ','.join(audio_ids)
+ }))
+ if al_audio:
+ Audio = collections.namedtuple(
+ 'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration'])
+ audios = self._parse_json(
+ self._search_regex(
+ r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'),
+ post_id, fatal=False, transform_source=unescapeHTML)
+ if isinstance(audios, list):
+ for audio in audios:
+ a = Audio._make(audio[:6])
+ entries.append({
+ 'id': '%s_%s' % (a.user_id, a.id),
+ 'url': a.url,
+ 'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id,
+ 'thumbnail': thumbnail,
+ 'duration': a.duration,
+ 'uploader': uploader,
+ 'artist': a.artist,
+ 'track': a.track,
+ })
for video in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):