[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / anvato.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import base64
5 import hashlib
6 import json
7 import random
8 import re
9 import time
10
11 from .common import InfoExtractor
12 from ..aes import aes_encrypt
13 from ..compat import compat_str
14 from ..utils import (
15     bytes_to_intlist,
16     determine_ext,
17     intlist_to_bytes,
18     int_or_none,
19     strip_jsonp,
20     unescapeHTML,
21     unsmuggle_url,
22 )
23
24
25 def md5_text(s):
26     if not isinstance(s, compat_str):
27         s = compat_str(s)
28     return hashlib.md5(s.encode('utf-8')).hexdigest()
29
30
31 class AnvatoIE(InfoExtractor):
32     _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
33
34     # Copied from anvplayer.min.js
35     _ANVACK_TABLE = {
36         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
37         'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
38         'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
39         'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
40         'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
41         'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
42         'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
43         'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
44         'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
45         'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
46         'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
47         'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
48         'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
49         'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
50         'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
51         'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
52         'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
53         'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
54         'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
55         'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
56         'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
57         'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
58         'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
59         'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
60         'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
61         'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
62         'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
63         'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
64         'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
65         'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
66         'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
67         'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
68         'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
69         'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
70         'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
71         'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
72         'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
73         'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
74         'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
75         'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
76         'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
77         'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
78         'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
79         'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
80         'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
81         'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
82         'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
83         'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
84         'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
85         'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
86         'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
87         'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
88         'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
89         'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
90         'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
91         'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
92         'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
93         'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
94         'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
95         'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
96         'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
97         'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
98         'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
99         'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
100         'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
101         'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
102         'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
103         'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
104         'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
105         'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
106         'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
107         'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
108         'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
109         'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
110         'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
111         'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
112         'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
113         'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
114         'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
115         'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
116         'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
117         'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
118         'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
119         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
120     }
121
122     _MCP_TO_ACCESS_KEY_TABLE = {
123         'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
124         'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
125         'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
126         'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
127         'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
128         'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
129         'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
130         'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
131         'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
132         'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
133         'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
134         'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
135     }
136
137     _API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
138
139     _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
140     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
141
142     _TESTS = [{
143         # from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
144         'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
145         'info_dict': {
146             'id': '4465496',
147             'ext': 'mp4',
148             'title': 'VIDEO: Humpback whale breaches right next to NH boat',
149             'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
150             'duration': 22,
151             'timestamp': 1534855680,
152             'upload_date': '20180821',
153             'uploader': 'ANV',
154         },
155         'params': {
156             'skip_download': True,
157         },
158     }, {
159         # from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
160         'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
161         'only_matching': True,
162     }]
163
164     def __init__(self, *args, **kwargs):
165         super(AnvatoIE, self).__init__(*args, **kwargs)
166         self.__server_time = None
167
168     def _server_time(self, access_key, video_id):
169         if self.__server_time is not None:
170             return self.__server_time
171
172         self.__server_time = int(self._download_json(
173             self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
174             note='Fetching server time')['server_time'])
175
176         return self.__server_time
177
178     def _api_prefix(self, access_key):
179         return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
180
181     def _get_video_json(self, access_key, video_id):
182         # See et() in anvplayer.min.js, which is an alias of getVideoJSON()
183         video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
184         server_time = self._server_time(access_key, video_id)
185         input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
186
187         auth_secret = intlist_to_bytes(aes_encrypt(
188             bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
189
190         video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
191         anvrid = md5_text(time.time() * 1000 * random.random())[:30]
192         payload = {
193             'api': {
194                 'anvrid': anvrid,
195                 'anvstk': md5_text('%s|%s|%d|%s' % (
196                     access_key, anvrid, server_time,
197                     self._ANVACK_TABLE.get(access_key, self._API_KEY))),
198                 'anvts': server_time,
199             },
200         }
201
202         return self._download_json(
203             video_data_url, video_id, transform_source=strip_jsonp,
204             data=json.dumps(payload).encode('utf-8'))
205
206     def _get_anvato_videos(self, access_key, video_id):
207         video_data = self._get_video_json(access_key, video_id)
208
209         formats = []
210         for published_url in video_data['published_urls']:
211             video_url = published_url['embed_url']
212             media_format = published_url.get('format')
213             ext = determine_ext(video_url)
214
215             if ext == 'smil' or media_format == 'smil':
216                 formats.extend(self._extract_smil_formats(video_url, video_id))
217                 continue
218
219             tbr = int_or_none(published_url.get('kbps'))
220             a_format = {
221                 'url': video_url,
222                 'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
223                 'tbr': tbr if tbr != 0 else None,
224             }
225
226             if media_format == 'm3u8' and tbr is not None:
227                 a_format.update({
228                     'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
229                     'ext': 'mp4',
230                 })
231             elif media_format == 'm3u8-variant' or ext == 'm3u8':
232                 formats.extend(self._extract_m3u8_formats(
233                     video_url, video_id, 'mp4', entry_protocol='m3u8_native',
234                     m3u8_id='hls', fatal=False))
235                 continue
236             elif ext == 'mp3' or media_format == 'mp3':
237                 a_format['vcodec'] = 'none'
238             else:
239                 a_format.update({
240                     'width': int_or_none(published_url.get('width')),
241                     'height': int_or_none(published_url.get('height')),
242                 })
243             formats.append(a_format)
244
245         self._sort_formats(formats)
246
247         subtitles = {}
248         for caption in video_data.get('captions', []):
249             a_caption = {
250                 'url': caption['url'],
251                 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
252             }
253             subtitles.setdefault(caption['language'], []).append(a_caption)
254
255         return {
256             'id': video_id,
257             'formats': formats,
258             'title': video_data.get('def_title'),
259             'description': video_data.get('def_description'),
260             'tags': video_data.get('def_tags', '').split(','),
261             'categories': video_data.get('categories'),
262             'thumbnail': video_data.get('thumbnail'),
263             'timestamp': int_or_none(video_data.get(
264                 'ts_published') or video_data.get('ts_added')),
265             'uploader': video_data.get('mcp_id'),
266             'duration': int_or_none(video_data.get('duration')),
267             'subtitles': subtitles,
268         }
269
270     @staticmethod
271     def _extract_urls(ie, webpage, video_id):
272         entries = []
273         for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
274             anvplayer_data = ie._parse_json(
275                 mobj.group('anvp'), video_id, transform_source=unescapeHTML,
276                 fatal=False)
277             if not anvplayer_data:
278                 continue
279             video = anvplayer_data.get('video')
280             if not isinstance(video, compat_str) or not video.isdigit():
281                 continue
282             access_key = anvplayer_data.get('accessKey')
283             if not access_key:
284                 mcp = anvplayer_data.get('mcp')
285                 if mcp:
286                     access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
287                         mcp.lower())
288             if not access_key:
289                 continue
290             entries.append(ie.url_result(
291                 'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
292                 video_id=video))
293         return entries
294
295     def _extract_anvato_videos(self, webpage, video_id):
296         anvplayer_data = self._parse_json(
297             self._html_search_regex(
298                 self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
299             video_id)
300         return self._get_anvato_videos(
301             anvplayer_data['accessKey'], anvplayer_data['video'])
302
303     def _real_extract(self, url):
304         url, smuggled_data = unsmuggle_url(url, {})
305         self._initialize_geo_bypass({
306             'countries': smuggled_data.get('geo_countries'),
307         })
308
309         mobj = re.match(self._VALID_URL, url)
310         access_key, video_id = mobj.group('access_key_or_mcp', 'id')
311         if access_key not in self._ANVACK_TABLE:
312             access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
313                 access_key) or access_key
314         return self._get_anvato_videos(access_key, video_id)