2 from __future__ import unicode_literals
22 import xml.etree.ElementTree
26 import urllib.request as compat_urllib_request
27 except ImportError: # Python 2
28 import urllib2 as compat_urllib_request
31 import urllib.error as compat_urllib_error
32 except ImportError: # Python 2
33 import urllib2 as compat_urllib_error
36 import urllib.parse as compat_urllib_parse
37 except ImportError: # Python 2
38 import urllib as compat_urllib_parse
41 from urllib.parse import urlparse as compat_urllib_parse_urlparse
42 except ImportError: # Python 2
43 from urlparse import urlparse as compat_urllib_parse_urlparse
46 import urllib.parse as compat_urlparse
47 except ImportError: # Python 2
48 import urlparse as compat_urlparse
51 import urllib.response as compat_urllib_response
52 except ImportError: # Python 2
53 import urllib as compat_urllib_response
56 import http.cookiejar as compat_cookiejar
57 except ImportError: # Python 2
58 import cookielib as compat_cookiejar
60 if sys.version_info[0] == 2:
61 class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
62 def __init__(self, version, name, value, *args, **kwargs):
63 if isinstance(name, compat_str):
65 if isinstance(value, compat_str):
66 value = value.encode()
67 compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
69 compat_cookiejar_Cookie = compat_cookiejar.Cookie
72 import http.cookies as compat_cookies
73 except ImportError: # Python 2
74 import Cookie as compat_cookies
77 import html.entities as compat_html_entities
78 except ImportError: # Python 2
79 import htmlentitydefs as compat_html_entities
82 compat_html_entities_html5 = compat_html_entities.html5
83 except AttributeError:
84 # Copied from CPython 3.5.1 html/entities.py
85 compat_html_entities_html5 = {
94 'acE;': '\u223e\u0333',
108 'Afr;': '\U0001d504',
109 'afr;': '\U0001d51e',
114 'alefsym;': '\u2135',
129 'andslope;': '\u2a58',
135 'angmsdaa;': '\u29a8',
136 'angmsdab;': '\u29a9',
137 'angmsdac;': '\u29aa',
138 'angmsdad;': '\u29ab',
139 'angmsdae;': '\u29ac',
140 'angmsdaf;': '\u29ad',
141 'angmsdag;': '\u29ae',
142 'angmsdah;': '\u29af',
144 'angrtvb;': '\u22be',
145 'angrtvbd;': '\u299d',
148 'angzarr;': '\u237c',
151 'Aopf;': '\U0001d538',
152 'aopf;': '\U0001d552',
159 'ApplyFunction;': '\u2061',
161 'approxeq;': '\u224a',
166 'Ascr;': '\U0001d49c',
167 'ascr;': '\U0001d4b6',
171 'asympeq;': '\u224d',
180 'awconint;': '\u2233',
182 'backcong;': '\u224c',
183 'backepsilon;': '\u03f6',
184 'backprime;': '\u2035',
185 'backsim;': '\u223d',
186 'backsimeq;': '\u22cd',
187 'Backslash;': '\u2216',
192 'barwedge;': '\u2305',
194 'bbrktbrk;': '\u23b6',
200 'Because;': '\u2235',
201 'because;': '\u2235',
202 'bemptyv;': '\u29b0',
205 'Bernoullis;': '\u212c',
209 'between;': '\u226c',
210 'Bfr;': '\U0001d505',
211 'bfr;': '\U0001d51f',
213 'bigcirc;': '\u25ef',
215 'bigodot;': '\u2a00',
216 'bigoplus;': '\u2a01',
217 'bigotimes;': '\u2a02',
218 'bigsqcup;': '\u2a06',
219 'bigstar;': '\u2605',
220 'bigtriangledown;': '\u25bd',
221 'bigtriangleup;': '\u25b3',
222 'biguplus;': '\u2a04',
224 'bigwedge;': '\u22c0',
226 'blacklozenge;': '\u29eb',
227 'blacksquare;': '\u25aa',
228 'blacktriangle;': '\u25b4',
229 'blacktriangledown;': '\u25be',
230 'blacktriangleleft;': '\u25c2',
231 'blacktriangleright;': '\u25b8',
238 'bnequiv;': '\u2261\u20e5',
241 'Bopf;': '\U0001d539',
242 'bopf;': '\U0001d553',
265 'boxminus;': '\u229f',
266 'boxplus;': '\u229e',
267 'boxtimes;': '\u22a0',
296 'bscr;': '\U0001d4b7',
302 'bsolhsub;': '\u27c8',
315 'capbrcup;': '\u2a49',
319 'CapitalDifferentialD;': '\u2145',
320 'caps;': '\u2229\ufe00',
323 'Cayleys;': '\u212d',
333 'Cconint;': '\u2230',
335 'ccupssm;': '\u2a50',
341 'cemptyv;': '\u29b2',
344 'CenterDot;': '\xb7',
345 'centerdot;': '\xb7',
347 'cfr;': '\U0001d520',
351 'checkmark;': '\u2713',
357 'circlearrowleft;': '\u21ba',
358 'circlearrowright;': '\u21bb',
359 'circledast;': '\u229b',
360 'circledcirc;': '\u229a',
361 'circleddash;': '\u229d',
362 'CircleDot;': '\u2299',
364 'circledS;': '\u24c8',
365 'CircleMinus;': '\u2296',
366 'CirclePlus;': '\u2295',
367 'CircleTimes;': '\u2297',
370 'cirfnint;': '\u2a10',
372 'cirscir;': '\u29c2',
373 'ClockwiseContourIntegral;': '\u2232',
374 'CloseCurlyDoubleQuote;': '\u201d',
375 'CloseCurlyQuote;': '\u2019',
377 'clubsuit;': '\u2663',
382 'coloneq;': '\u2254',
387 'complement;': '\u2201',
388 'complexes;': '\u2102',
390 'congdot;': '\u2a6d',
391 'Congruent;': '\u2261',
394 'ContourIntegral;': '\u222e',
396 'copf;': '\U0001d554',
398 'Coproduct;': '\u2210',
404 'CounterClockwiseContourIntegral;': '\u2233',
408 'Cscr;': '\U0001d49e',
409 'cscr;': '\U0001d4b8',
415 'cudarrl;': '\u2938',
416 'cudarrr;': '\u2935',
420 'cularrp;': '\u293d',
423 'cupbrcap;': '\u2a48',
429 'cups;': '\u222a\ufe00',
431 'curarrm;': '\u293c',
432 'curlyeqprec;': '\u22de',
433 'curlyeqsucc;': '\u22df',
434 'curlyvee;': '\u22ce',
435 'curlywedge;': '\u22cf',
438 'curvearrowleft;': '\u21b6',
439 'curvearrowright;': '\u21b7',
442 'cwconint;': '\u2232',
454 'dbkarow;': '\u290f',
462 'ddagger;': '\u2021',
464 'DDotrahd;': '\u2911',
465 'ddotseq;': '\u2a77',
471 'demptyv;': '\u29b1',
473 'Dfr;': '\U0001d507',
474 'dfr;': '\U0001d521',
478 'DiacriticalAcute;': '\xb4',
479 'DiacriticalDot;': '\u02d9',
480 'DiacriticalDoubleAcute;': '\u02dd',
481 'DiacriticalGrave;': '`',
482 'DiacriticalTilde;': '\u02dc',
484 'Diamond;': '\u22c4',
485 'diamond;': '\u22c4',
486 'diamondsuit;': '\u2666',
489 'DifferentialD;': '\u2146',
490 'digamma;': '\u03dd',
495 'divideontimes;': '\u22c7',
502 'Dopf;': '\U0001d53b',
503 'dopf;': '\U0001d555',
508 'doteqdot;': '\u2251',
509 'DotEqual;': '\u2250',
510 'dotminus;': '\u2238',
511 'dotplus;': '\u2214',
512 'dotsquare;': '\u22a1',
513 'doublebarwedge;': '\u2306',
514 'DoubleContourIntegral;': '\u222f',
515 'DoubleDot;': '\xa8',
516 'DoubleDownArrow;': '\u21d3',
517 'DoubleLeftArrow;': '\u21d0',
518 'DoubleLeftRightArrow;': '\u21d4',
519 'DoubleLeftTee;': '\u2ae4',
520 'DoubleLongLeftArrow;': '\u27f8',
521 'DoubleLongLeftRightArrow;': '\u27fa',
522 'DoubleLongRightArrow;': '\u27f9',
523 'DoubleRightArrow;': '\u21d2',
524 'DoubleRightTee;': '\u22a8',
525 'DoubleUpArrow;': '\u21d1',
526 'DoubleUpDownArrow;': '\u21d5',
527 'DoubleVerticalBar;': '\u2225',
528 'DownArrow;': '\u2193',
529 'Downarrow;': '\u21d3',
530 'downarrow;': '\u2193',
531 'DownArrowBar;': '\u2913',
532 'DownArrowUpArrow;': '\u21f5',
533 'DownBreve;': '\u0311',
534 'downdownarrows;': '\u21ca',
535 'downharpoonleft;': '\u21c3',
536 'downharpoonright;': '\u21c2',
537 'DownLeftRightVector;': '\u2950',
538 'DownLeftTeeVector;': '\u295e',
539 'DownLeftVector;': '\u21bd',
540 'DownLeftVectorBar;': '\u2956',
541 'DownRightTeeVector;': '\u295f',
542 'DownRightVector;': '\u21c1',
543 'DownRightVectorBar;': '\u2957',
544 'DownTee;': '\u22a4',
545 'DownTeeArrow;': '\u21a7',
546 'drbkarow;': '\u2910',
549 'Dscr;': '\U0001d49f',
550 'dscr;': '\U0001d4b9',
561 'dwangle;': '\u29a6',
564 'dzigrarr;': '\u27ff',
586 'Efr;': '\U0001d508',
587 'efr;': '\U0001d522',
596 'Element;': '\u2208',
597 'elinters;': '\u23e7',
604 'emptyset;': '\u2205',
605 'EmptySmallSquare;': '\u25fb',
607 'EmptyVerySmallSquare;': '\u25ab',
616 'Eopf;': '\U0001d53c',
617 'eopf;': '\U0001d556',
622 'Epsilon;': '\u0395',
623 'epsilon;': '\u03b5',
626 'eqcolon;': '\u2255',
628 'eqslantgtr;': '\u2a96',
629 'eqslantless;': '\u2a95',
632 'EqualTilde;': '\u2242',
634 'Equilibrium;': '\u21cc',
636 'equivDD;': '\u2a78',
637 'eqvparsl;': '\u29e5',
659 'expectation;': '\u2130',
660 'ExponentialE;': '\u2147',
661 'exponentiale;': '\u2147',
662 'fallingdotseq;': '\u2252',
669 'Ffr;': '\U0001d509',
670 'ffr;': '\U0001d523',
672 'FilledSmallSquare;': '\u25fc',
673 'FilledVerySmallSquare;': '\u25aa',
679 'Fopf;': '\U0001d53d',
680 'fopf;': '\U0001d557',
685 'Fouriertrf;': '\u2131',
686 'fpartint;': '\u2a0d',
708 'fscr;': '\U0001d4bb',
730 'geqslant;': '\u2a7e',
734 'gesdoto;': '\u2a82',
735 'gesdotol;': '\u2a84',
736 'gesl;': '\u22db\ufe00',
738 'Gfr;': '\U0001d50a',
739 'gfr;': '\U0001d524',
751 'gnapprox;': '\u2a8a',
757 'Gopf;': '\U0001d53e',
758 'gopf;': '\U0001d558',
760 'GreaterEqual;': '\u2265',
761 'GreaterEqualLess;': '\u22db',
762 'GreaterFullEqual;': '\u2267',
763 'GreaterGreater;': '\u2aa2',
764 'GreaterLess;': '\u2277',
765 'GreaterSlantEqual;': '\u2a7e',
766 'GreaterTilde;': '\u2273',
767 'Gscr;': '\U0001d4a2',
781 'gtquest;': '\u2a7c',
782 'gtrapprox;': '\u2a86',
785 'gtreqless;': '\u22db',
786 'gtreqqless;': '\u2a8c',
787 'gtrless;': '\u2277',
789 'gvertneqq;': '\u2269\ufe00',
790 'gvnE;': '\u2269\ufe00',
799 'harrcir;': '\u2948',
806 'heartsuit;': '\u2665',
810 'hfr;': '\U0001d525',
811 'HilbertSpace;': '\u210b',
812 'hksearow;': '\u2925',
813 'hkswarow;': '\u2926',
816 'hookleftarrow;': '\u21a9',
817 'hookrightarrow;': '\u21aa',
819 'hopf;': '\U0001d559',
821 'HorizontalLine;': '\u2500',
823 'hscr;': '\U0001d4bd',
827 'HumpDownHump;': '\u224e',
828 'HumpEqual;': '\u224f',
849 'ifr;': '\U0001d526',
865 'ImaginaryI;': '\u2148',
866 'imagline;': '\u2110',
867 'imagpart;': '\u2111',
871 'Implies;': '\u21d2',
875 'infintie;': '\u29dd',
880 'integers;': '\u2124',
881 'Integral;': '\u222b',
882 'intercal;': '\u22ba',
883 'Intersection;': '\u22c2',
884 'intlarhk;': '\u2a17',
885 'intprod;': '\u2a3c',
886 'InvisibleComma;': '\u2063',
887 'InvisibleTimes;': '\u2062',
892 'Iopf;': '\U0001d540',
893 'iopf;': '\U0001d55a',
900 'iscr;': '\U0001d4be',
902 'isindot;': '\u22f5',
920 'Jfr;': '\U0001d50d',
921 'jfr;': '\U0001d527',
923 'Jopf;': '\U0001d541',
924 'jopf;': '\U0001d55b',
925 'Jscr;': '\U0001d4a5',
926 'jscr;': '\U0001d4bf',
938 'Kfr;': '\U0001d50e',
939 'kfr;': '\U0001d528',
945 'Kopf;': '\U0001d542',
946 'kopf;': '\U0001d55c',
947 'Kscr;': '\U0001d4a6',
948 'kscr;': '\U0001d4c0',
952 'laemptyv;': '\u29b4',
961 'Laplacetrf;': '\u2112',
968 'larrbfs;': '\u291f',
973 'larrsim;': '\u2973',
979 'lates;': '\u2aad\ufe00',
986 'lbrksld;': '\u298f',
987 'lbrkslu;': '\u298d',
999 'ldrdhar;': '\u2967',
1000 'ldrushar;': '\u294b',
1004 'LeftAngleBracket;': '\u27e8',
1005 'LeftArrow;': '\u2190',
1006 'Leftarrow;': '\u21d0',
1007 'leftarrow;': '\u2190',
1008 'LeftArrowBar;': '\u21e4',
1009 'LeftArrowRightArrow;': '\u21c6',
1010 'leftarrowtail;': '\u21a2',
1011 'LeftCeiling;': '\u2308',
1012 'LeftDoubleBracket;': '\u27e6',
1013 'LeftDownTeeVector;': '\u2961',
1014 'LeftDownVector;': '\u21c3',
1015 'LeftDownVectorBar;': '\u2959',
1016 'LeftFloor;': '\u230a',
1017 'leftharpoondown;': '\u21bd',
1018 'leftharpoonup;': '\u21bc',
1019 'leftleftarrows;': '\u21c7',
1020 'LeftRightArrow;': '\u2194',
1021 'Leftrightarrow;': '\u21d4',
1022 'leftrightarrow;': '\u2194',
1023 'leftrightarrows;': '\u21c6',
1024 'leftrightharpoons;': '\u21cb',
1025 'leftrightsquigarrow;': '\u21ad',
1026 'LeftRightVector;': '\u294e',
1027 'LeftTee;': '\u22a3',
1028 'LeftTeeArrow;': '\u21a4',
1029 'LeftTeeVector;': '\u295a',
1030 'leftthreetimes;': '\u22cb',
1031 'LeftTriangle;': '\u22b2',
1032 'LeftTriangleBar;': '\u29cf',
1033 'LeftTriangleEqual;': '\u22b4',
1034 'LeftUpDownVector;': '\u2951',
1035 'LeftUpTeeVector;': '\u2960',
1036 'LeftUpVector;': '\u21bf',
1037 'LeftUpVectorBar;': '\u2958',
1038 'LeftVector;': '\u21bc',
1039 'LeftVectorBar;': '\u2952',
1044 'leqslant;': '\u2a7d',
1047 'lesdot;': '\u2a7f',
1048 'lesdoto;': '\u2a81',
1049 'lesdotor;': '\u2a83',
1050 'lesg;': '\u22da\ufe00',
1051 'lesges;': '\u2a93',
1052 'lessapprox;': '\u2a85',
1053 'lessdot;': '\u22d6',
1054 'lesseqgtr;': '\u22da',
1055 'lesseqqgtr;': '\u2a8b',
1056 'LessEqualGreater;': '\u22da',
1057 'LessFullEqual;': '\u2266',
1058 'LessGreater;': '\u2276',
1059 'lessgtr;': '\u2276',
1060 'LessLess;': '\u2aa1',
1061 'lesssim;': '\u2272',
1062 'LessSlantEqual;': '\u2a7d',
1063 'LessTilde;': '\u2272',
1064 'lfisht;': '\u297c',
1065 'lfloor;': '\u230a',
1066 'Lfr;': '\U0001d50f',
1067 'lfr;': '\U0001d529',
1073 'lharul;': '\u296a',
1080 'llcorner;': '\u231e',
1081 'Lleftarrow;': '\u21da',
1082 'llhard;': '\u296b',
1084 'Lmidot;': '\u013f',
1085 'lmidot;': '\u0140',
1086 'lmoust;': '\u23b0',
1087 'lmoustache;': '\u23b0',
1089 'lnapprox;': '\u2a89',
1098 'LongLeftArrow;': '\u27f5',
1099 'Longleftarrow;': '\u27f8',
1100 'longleftarrow;': '\u27f5',
1101 'LongLeftRightArrow;': '\u27f7',
1102 'Longleftrightarrow;': '\u27fa',
1103 'longleftrightarrow;': '\u27f7',
1104 'longmapsto;': '\u27fc',
1105 'LongRightArrow;': '\u27f6',
1106 'Longrightarrow;': '\u27f9',
1107 'longrightarrow;': '\u27f6',
1108 'looparrowleft;': '\u21ab',
1109 'looparrowright;': '\u21ac',
1111 'Lopf;': '\U0001d543',
1112 'lopf;': '\U0001d55d',
1113 'loplus;': '\u2a2d',
1114 'lotimes;': '\u2a34',
1115 'lowast;': '\u2217',
1117 'LowerLeftArrow;': '\u2199',
1118 'LowerRightArrow;': '\u2198',
1120 'lozenge;': '\u25ca',
1123 'lparlt;': '\u2993',
1125 'lrcorner;': '\u231f',
1127 'lrhard;': '\u296d',
1130 'lsaquo;': '\u2039',
1132 'lscr;': '\U0001d4c1',
1140 'lsquor;': '\u201a',
1141 'Lstrok;': '\u0141',
1142 'lstrok;': '\u0142',
1151 'lthree;': '\u22cb',
1152 'ltimes;': '\u22c9',
1153 'ltlarr;': '\u2976',
1154 'ltquest;': '\u2a7b',
1158 'ltrPar;': '\u2996',
1159 'lurdshar;': '\u294a',
1160 'luruhar;': '\u2966',
1161 'lvertneqq;': '\u2268\ufe00',
1162 'lvnE;': '\u2268\ufe00',
1167 'maltese;': '\u2720',
1170 'mapsto;': '\u21a6',
1171 'mapstodown;': '\u21a7',
1172 'mapstoleft;': '\u21a4',
1173 'mapstoup;': '\u21a5',
1174 'marker;': '\u25ae',
1175 'mcomma;': '\u2a29',
1180 'measuredangle;': '\u2221',
1181 'MediumSpace;': '\u205f',
1182 'Mellintrf;': '\u2133',
1183 'Mfr;': '\U0001d510',
1184 'mfr;': '\U0001d52a',
1190 'midcir;': '\u2af0',
1194 'minusb;': '\u229f',
1195 'minusd;': '\u2238',
1196 'minusdu;': '\u2a2a',
1197 'MinusPlus;': '\u2213',
1200 'mnplus;': '\u2213',
1201 'models;': '\u22a7',
1202 'Mopf;': '\U0001d544',
1203 'mopf;': '\U0001d55e',
1206 'mscr;': '\U0001d4c2',
1207 'mstpos;': '\u223e',
1210 'multimap;': '\u22b8',
1213 'Nacute;': '\u0143',
1214 'nacute;': '\u0144',
1215 'nang;': '\u2220\u20d2',
1217 'napE;': '\u2a70\u0338',
1218 'napid;': '\u224b\u0338',
1220 'napprox;': '\u2249',
1222 'natural;': '\u266e',
1223 'naturals;': '\u2115',
1226 'nbump;': '\u224e\u0338',
1227 'nbumpe;': '\u224f\u0338',
1229 'Ncaron;': '\u0147',
1230 'ncaron;': '\u0148',
1231 'Ncedil;': '\u0145',
1232 'ncedil;': '\u0146',
1234 'ncongdot;': '\u2a6d\u0338',
1240 'nearhk;': '\u2924',
1243 'nearrow;': '\u2197',
1244 'nedot;': '\u2250\u0338',
1245 'NegativeMediumSpace;': '\u200b',
1246 'NegativeThickSpace;': '\u200b',
1247 'NegativeThinSpace;': '\u200b',
1248 'NegativeVeryThinSpace;': '\u200b',
1249 'nequiv;': '\u2262',
1250 'nesear;': '\u2928',
1251 'nesim;': '\u2242\u0338',
1252 'NestedGreaterGreater;': '\u226b',
1253 'NestedLessLess;': '\u226a',
1255 'nexist;': '\u2204',
1256 'nexists;': '\u2204',
1257 'Nfr;': '\U0001d511',
1258 'nfr;': '\U0001d52b',
1259 'ngE;': '\u2267\u0338',
1262 'ngeqq;': '\u2267\u0338',
1263 'ngeqslant;': '\u2a7e\u0338',
1264 'nges;': '\u2a7e\u0338',
1265 'nGg;': '\u22d9\u0338',
1267 'nGt;': '\u226b\u20d2',
1270 'nGtv;': '\u226b\u0338',
1283 'nlE;': '\u2266\u0338',
1285 'nLeftarrow;': '\u21cd',
1286 'nleftarrow;': '\u219a',
1287 'nLeftrightarrow;': '\u21ce',
1288 'nleftrightarrow;': '\u21ae',
1290 'nleqq;': '\u2266\u0338',
1291 'nleqslant;': '\u2a7d\u0338',
1292 'nles;': '\u2a7d\u0338',
1294 'nLl;': '\u22d8\u0338',
1296 'nLt;': '\u226a\u20d2',
1299 'nltrie;': '\u22ec',
1300 'nLtv;': '\u226a\u0338',
1302 'NoBreak;': '\u2060',
1303 'NonBreakingSpace;': '\xa0',
1305 'nopf;': '\U0001d55f',
1309 'NotCongruent;': '\u2262',
1310 'NotCupCap;': '\u226d',
1311 'NotDoubleVerticalBar;': '\u2226',
1312 'NotElement;': '\u2209',
1313 'NotEqual;': '\u2260',
1314 'NotEqualTilde;': '\u2242\u0338',
1315 'NotExists;': '\u2204',
1316 'NotGreater;': '\u226f',
1317 'NotGreaterEqual;': '\u2271',
1318 'NotGreaterFullEqual;': '\u2267\u0338',
1319 'NotGreaterGreater;': '\u226b\u0338',
1320 'NotGreaterLess;': '\u2279',
1321 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1322 'NotGreaterTilde;': '\u2275',
1323 'NotHumpDownHump;': '\u224e\u0338',
1324 'NotHumpEqual;': '\u224f\u0338',
1326 'notindot;': '\u22f5\u0338',
1327 'notinE;': '\u22f9\u0338',
1328 'notinva;': '\u2209',
1329 'notinvb;': '\u22f7',
1330 'notinvc;': '\u22f6',
1331 'NotLeftTriangle;': '\u22ea',
1332 'NotLeftTriangleBar;': '\u29cf\u0338',
1333 'NotLeftTriangleEqual;': '\u22ec',
1334 'NotLess;': '\u226e',
1335 'NotLessEqual;': '\u2270',
1336 'NotLessGreater;': '\u2278',
1337 'NotLessLess;': '\u226a\u0338',
1338 'NotLessSlantEqual;': '\u2a7d\u0338',
1339 'NotLessTilde;': '\u2274',
1340 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1341 'NotNestedLessLess;': '\u2aa1\u0338',
1343 'notniva;': '\u220c',
1344 'notnivb;': '\u22fe',
1345 'notnivc;': '\u22fd',
1346 'NotPrecedes;': '\u2280',
1347 'NotPrecedesEqual;': '\u2aaf\u0338',
1348 'NotPrecedesSlantEqual;': '\u22e0',
1349 'NotReverseElement;': '\u220c',
1350 'NotRightTriangle;': '\u22eb',
1351 'NotRightTriangleBar;': '\u29d0\u0338',
1352 'NotRightTriangleEqual;': '\u22ed',
1353 'NotSquareSubset;': '\u228f\u0338',
1354 'NotSquareSubsetEqual;': '\u22e2',
1355 'NotSquareSuperset;': '\u2290\u0338',
1356 'NotSquareSupersetEqual;': '\u22e3',
1357 'NotSubset;': '\u2282\u20d2',
1358 'NotSubsetEqual;': '\u2288',
1359 'NotSucceeds;': '\u2281',
1360 'NotSucceedsEqual;': '\u2ab0\u0338',
1361 'NotSucceedsSlantEqual;': '\u22e1',
1362 'NotSucceedsTilde;': '\u227f\u0338',
1363 'NotSuperset;': '\u2283\u20d2',
1364 'NotSupersetEqual;': '\u2289',
1365 'NotTilde;': '\u2241',
1366 'NotTildeEqual;': '\u2244',
1367 'NotTildeFullEqual;': '\u2247',
1368 'NotTildeTilde;': '\u2249',
1369 'NotVerticalBar;': '\u2224',
1371 'nparallel;': '\u2226',
1372 'nparsl;': '\u2afd\u20e5',
1373 'npart;': '\u2202\u0338',
1374 'npolint;': '\u2a14',
1376 'nprcue;': '\u22e0',
1377 'npre;': '\u2aaf\u0338',
1379 'npreceq;': '\u2aaf\u0338',
1382 'nrarrc;': '\u2933\u0338',
1383 'nrarrw;': '\u219d\u0338',
1384 'nRightarrow;': '\u21cf',
1385 'nrightarrow;': '\u219b',
1387 'nrtrie;': '\u22ed',
1389 'nsccue;': '\u22e1',
1390 'nsce;': '\u2ab0\u0338',
1391 'Nscr;': '\U0001d4a9',
1392 'nscr;': '\U0001d4c3',
1393 'nshortmid;': '\u2224',
1394 'nshortparallel;': '\u2226',
1397 'nsimeq;': '\u2244',
1400 'nsqsube;': '\u22e2',
1401 'nsqsupe;': '\u22e3',
1403 'nsubE;': '\u2ac5\u0338',
1405 'nsubset;': '\u2282\u20d2',
1406 'nsubseteq;': '\u2288',
1407 'nsubseteqq;': '\u2ac5\u0338',
1409 'nsucceq;': '\u2ab0\u0338',
1411 'nsupE;': '\u2ac6\u0338',
1413 'nsupset;': '\u2283\u20d2',
1414 'nsupseteq;': '\u2289',
1415 'nsupseteqq;': '\u2ac6\u0338',
1422 'ntriangleleft;': '\u22ea',
1423 'ntrianglelefteq;': '\u22ec',
1424 'ntriangleright;': '\u22eb',
1425 'ntrianglerighteq;': '\u22ed',
1429 'numero;': '\u2116',
1431 'nvap;': '\u224d\u20d2',
1432 'nVDash;': '\u22af',
1433 'nVdash;': '\u22ae',
1434 'nvDash;': '\u22ad',
1435 'nvdash;': '\u22ac',
1436 'nvge;': '\u2265\u20d2',
1438 'nvHarr;': '\u2904',
1439 'nvinfin;': '\u29de',
1440 'nvlArr;': '\u2902',
1441 'nvle;': '\u2264\u20d2',
1443 'nvltrie;': '\u22b4\u20d2',
1444 'nvrArr;': '\u2903',
1445 'nvrtrie;': '\u22b5\u20d2',
1446 'nvsim;': '\u223c\u20d2',
1447 'nwarhk;': '\u2923',
1450 'nwarrow;': '\u2196',
1451 'nwnear;': '\u2927',
1465 'Odblac;': '\u0150',
1466 'odblac;': '\u0151',
1469 'odsold;': '\u29bc',
1473 'Ofr;': '\U0001d512',
1474 'ofr;': '\U0001d52c',
1486 'olcross;': '\u29bb',
1493 'Omicron;': '\u039f',
1494 'omicron;': '\u03bf',
1496 'ominus;': '\u2296',
1497 'Oopf;': '\U0001d546',
1498 'oopf;': '\U0001d560',
1500 'OpenCurlyDoubleQuote;': '\u201c',
1501 'OpenCurlyQuote;': '\u2018',
1509 'orderof;': '\u2134',
1514 'origof;': '\u22b6',
1516 'orslope;': '\u2a57',
1519 'Oscr;': '\U0001d4aa',
1530 'Otimes;': '\u2a37',
1531 'otimes;': '\u2297',
1532 'otimesas;': '\u2a36',
1538 'OverBar;': '\u203e',
1539 'OverBrace;': '\u23de',
1540 'OverBracket;': '\u23b4',
1541 'OverParenthesis;': '\u23dc',
1545 'parallel;': '\u2225',
1546 'parsim;': '\u2af3',
1549 'PartialD;': '\u2202',
1554 'permil;': '\u2030',
1556 'pertenk;': '\u2031',
1557 'Pfr;': '\U0001d513',
1558 'pfr;': '\U0001d52d',
1562 'phmmat;': '\u2133',
1566 'pitchfork;': '\u22d4',
1568 'planck;': '\u210f',
1569 'planckh;': '\u210e',
1570 'plankv;': '\u210f',
1572 'plusacir;': '\u2a23',
1574 'pluscir;': '\u2a22',
1575 'plusdo;': '\u2214',
1576 'plusdu;': '\u2a25',
1578 'PlusMinus;': '\xb1',
1581 'plussim;': '\u2a26',
1582 'plustwo;': '\u2a27',
1584 'Poincareplane;': '\u210c',
1585 'pointint;': '\u2a15',
1587 'popf;': '\U0001d561',
1597 'precapprox;': '\u2ab7',
1598 'preccurlyeq;': '\u227c',
1599 'Precedes;': '\u227a',
1600 'PrecedesEqual;': '\u2aaf',
1601 'PrecedesSlantEqual;': '\u227c',
1602 'PrecedesTilde;': '\u227e',
1603 'preceq;': '\u2aaf',
1604 'precnapprox;': '\u2ab9',
1605 'precneqq;': '\u2ab5',
1606 'precnsim;': '\u22e8',
1607 'precsim;': '\u227e',
1610 'primes;': '\u2119',
1613 'prnsim;': '\u22e8',
1615 'Product;': '\u220f',
1616 'profalar;': '\u232e',
1617 'profline;': '\u2312',
1618 'profsurf;': '\u2313',
1620 'Proportion;': '\u2237',
1621 'Proportional;': '\u221d',
1622 'propto;': '\u221d',
1624 'prurel;': '\u22b0',
1625 'Pscr;': '\U0001d4ab',
1626 'pscr;': '\U0001d4c5',
1629 'puncsp;': '\u2008',
1630 'Qfr;': '\U0001d514',
1631 'qfr;': '\U0001d52e',
1634 'qopf;': '\U0001d562',
1635 'qprime;': '\u2057',
1636 'Qscr;': '\U0001d4ac',
1637 'qscr;': '\U0001d4c6',
1638 'quaternions;': '\u210d',
1639 'quatint;': '\u2a16',
1641 'questeq;': '\u225f',
1647 'race;': '\u223d\u0331',
1648 'Racute;': '\u0154',
1649 'racute;': '\u0155',
1651 'raemptyv;': '\u29b3',
1656 'rangle;': '\u27e9',
1662 'rarrap;': '\u2975',
1664 'rarrbfs;': '\u2920',
1666 'rarrfs;': '\u291e',
1667 'rarrhk;': '\u21aa',
1668 'rarrlp;': '\u21ac',
1669 'rarrpl;': '\u2945',
1670 'rarrsim;': '\u2974',
1671 'Rarrtl;': '\u2916',
1672 'rarrtl;': '\u21a3',
1674 'rAtail;': '\u291c',
1675 'ratail;': '\u291a',
1677 'rationals;': '\u211a',
1685 'rbrksld;': '\u298e',
1686 'rbrkslu;': '\u2990',
1687 'Rcaron;': '\u0158',
1688 'rcaron;': '\u0159',
1689 'Rcedil;': '\u0156',
1690 'rcedil;': '\u0157',
1696 'rdldhar;': '\u2969',
1698 'rdquor;': '\u201d',
1702 'realine;': '\u211b',
1703 'realpart;': '\u211c',
1710 'ReverseElement;': '\u220b',
1711 'ReverseEquilibrium;': '\u21cb',
1712 'ReverseUpEquilibrium;': '\u296f',
1713 'rfisht;': '\u297d',
1714 'rfloor;': '\u230b',
1716 'rfr;': '\U0001d52f',
1720 'rharul;': '\u296c',
1724 'RightAngleBracket;': '\u27e9',
1725 'RightArrow;': '\u2192',
1726 'Rightarrow;': '\u21d2',
1727 'rightarrow;': '\u2192',
1728 'RightArrowBar;': '\u21e5',
1729 'RightArrowLeftArrow;': '\u21c4',
1730 'rightarrowtail;': '\u21a3',
1731 'RightCeiling;': '\u2309',
1732 'RightDoubleBracket;': '\u27e7',
1733 'RightDownTeeVector;': '\u295d',
1734 'RightDownVector;': '\u21c2',
1735 'RightDownVectorBar;': '\u2955',
1736 'RightFloor;': '\u230b',
1737 'rightharpoondown;': '\u21c1',
1738 'rightharpoonup;': '\u21c0',
1739 'rightleftarrows;': '\u21c4',
1740 'rightleftharpoons;': '\u21cc',
1741 'rightrightarrows;': '\u21c9',
1742 'rightsquigarrow;': '\u219d',
1743 'RightTee;': '\u22a2',
1744 'RightTeeArrow;': '\u21a6',
1745 'RightTeeVector;': '\u295b',
1746 'rightthreetimes;': '\u22cc',
1747 'RightTriangle;': '\u22b3',
1748 'RightTriangleBar;': '\u29d0',
1749 'RightTriangleEqual;': '\u22b5',
1750 'RightUpDownVector;': '\u294f',
1751 'RightUpTeeVector;': '\u295c',
1752 'RightUpVector;': '\u21be',
1753 'RightUpVectorBar;': '\u2954',
1754 'RightVector;': '\u21c0',
1755 'RightVectorBar;': '\u2953',
1757 'risingdotseq;': '\u2253',
1761 'rmoust;': '\u23b1',
1762 'rmoustache;': '\u23b1',
1769 'ropf;': '\U0001d563',
1770 'roplus;': '\u2a2e',
1771 'rotimes;': '\u2a35',
1772 'RoundImplies;': '\u2970',
1774 'rpargt;': '\u2994',
1775 'rppolint;': '\u2a12',
1777 'Rrightarrow;': '\u21db',
1778 'rsaquo;': '\u203a',
1780 'rscr;': '\U0001d4c7',
1785 'rsquor;': '\u2019',
1786 'rthree;': '\u22cc',
1787 'rtimes;': '\u22ca',
1791 'rtriltri;': '\u29ce',
1792 'RuleDelayed;': '\u29f4',
1793 'ruluhar;': '\u2968',
1795 'Sacute;': '\u015a',
1796 'sacute;': '\u015b',
1801 'Scaron;': '\u0160',
1802 'scaron;': '\u0161',
1806 'Scedil;': '\u015e',
1807 'scedil;': '\u015f',
1812 'scnsim;': '\u22e9',
1813 'scpolint;': '\u2a13',
1820 'searhk;': '\u2925',
1823 'searrow;': '\u2198',
1827 'seswar;': '\u2929',
1828 'setminus;': '\u2216',
1831 'Sfr;': '\U0001d516',
1832 'sfr;': '\U0001d530',
1833 'sfrown;': '\u2322',
1835 'SHCHcy;': '\u0429',
1836 'shchcy;': '\u0449',
1839 'ShortDownArrow;': '\u2193',
1840 'ShortLeftArrow;': '\u2190',
1841 'shortmid;': '\u2223',
1842 'shortparallel;': '\u2225',
1843 'ShortRightArrow;': '\u2192',
1844 'ShortUpArrow;': '\u2191',
1849 'sigmaf;': '\u03c2',
1850 'sigmav;': '\u03c2',
1852 'simdot;': '\u2a6a',
1860 'simplus;': '\u2a24',
1861 'simrarr;': '\u2972',
1863 'SmallCircle;': '\u2218',
1864 'smallsetminus;': '\u2216',
1865 'smashp;': '\u2a33',
1866 'smeparsl;': '\u29e4',
1871 'smtes;': '\u2aac\ufe00',
1872 'SOFTcy;': '\u042c',
1873 'softcy;': '\u044c',
1876 'solbar;': '\u233f',
1877 'Sopf;': '\U0001d54a',
1878 'sopf;': '\U0001d564',
1879 'spades;': '\u2660',
1880 'spadesuit;': '\u2660',
1883 'sqcaps;': '\u2293\ufe00',
1885 'sqcups;': '\u2294\ufe00',
1888 'sqsube;': '\u2291',
1889 'sqsubset;': '\u228f',
1890 'sqsubseteq;': '\u2291',
1892 'sqsupe;': '\u2292',
1893 'sqsupset;': '\u2290',
1894 'sqsupseteq;': '\u2292',
1896 'Square;': '\u25a1',
1897 'square;': '\u25a1',
1898 'SquareIntersection;': '\u2293',
1899 'SquareSubset;': '\u228f',
1900 'SquareSubsetEqual;': '\u2291',
1901 'SquareSuperset;': '\u2290',
1902 'SquareSupersetEqual;': '\u2292',
1903 'SquareUnion;': '\u2294',
1904 'squarf;': '\u25aa',
1907 'Sscr;': '\U0001d4ae',
1908 'sscr;': '\U0001d4c8',
1909 'ssetmn;': '\u2216',
1910 'ssmile;': '\u2323',
1911 'sstarf;': '\u22c6',
1915 'straightepsilon;': '\u03f5',
1916 'straightphi;': '\u03d5',
1920 'subdot;': '\u2abd',
1923 'subedot;': '\u2ac3',
1924 'submult;': '\u2ac1',
1927 'subplus;': '\u2abf',
1928 'subrarr;': '\u2979',
1929 'Subset;': '\u22d0',
1930 'subset;': '\u2282',
1931 'subseteq;': '\u2286',
1932 'subseteqq;': '\u2ac5',
1933 'SubsetEqual;': '\u2286',
1934 'subsetneq;': '\u228a',
1935 'subsetneqq;': '\u2acb',
1936 'subsim;': '\u2ac7',
1937 'subsub;': '\u2ad5',
1938 'subsup;': '\u2ad3',
1940 'succapprox;': '\u2ab8',
1941 'succcurlyeq;': '\u227d',
1942 'Succeeds;': '\u227b',
1943 'SucceedsEqual;': '\u2ab0',
1944 'SucceedsSlantEqual;': '\u227d',
1945 'SucceedsTilde;': '\u227f',
1946 'succeq;': '\u2ab0',
1947 'succnapprox;': '\u2aba',
1948 'succneqq;': '\u2ab6',
1949 'succnsim;': '\u22e9',
1950 'succsim;': '\u227f',
1951 'SuchThat;': '\u220b',
1963 'supdot;': '\u2abe',
1964 'supdsub;': '\u2ad8',
1967 'supedot;': '\u2ac4',
1968 'Superset;': '\u2283',
1969 'SupersetEqual;': '\u2287',
1970 'suphsol;': '\u27c9',
1971 'suphsub;': '\u2ad7',
1972 'suplarr;': '\u297b',
1973 'supmult;': '\u2ac2',
1976 'supplus;': '\u2ac0',
1977 'Supset;': '\u22d1',
1978 'supset;': '\u2283',
1979 'supseteq;': '\u2287',
1980 'supseteqq;': '\u2ac6',
1981 'supsetneq;': '\u228b',
1982 'supsetneqq;': '\u2acc',
1983 'supsim;': '\u2ac8',
1984 'supsub;': '\u2ad4',
1985 'supsup;': '\u2ad6',
1986 'swarhk;': '\u2926',
1989 'swarrow;': '\u2199',
1990 'swnwar;': '\u292a',
1994 'target;': '\u2316',
1998 'Tcaron;': '\u0164',
1999 'tcaron;': '\u0165',
2000 'Tcedil;': '\u0162',
2001 'tcedil;': '\u0163',
2005 'telrec;': '\u2315',
2006 'Tfr;': '\U0001d517',
2007 'tfr;': '\U0001d531',
2008 'there4;': '\u2234',
2009 'Therefore;': '\u2234',
2010 'therefore;': '\u2234',
2013 'thetasym;': '\u03d1',
2014 'thetav;': '\u03d1',
2015 'thickapprox;': '\u2248',
2016 'thicksim;': '\u223c',
2017 'ThickSpace;': '\u205f\u200a',
2018 'thinsp;': '\u2009',
2019 'ThinSpace;': '\u2009',
2021 'thksim;': '\u223c',
2028 'TildeEqual;': '\u2243',
2029 'TildeFullEqual;': '\u2245',
2030 'TildeTilde;': '\u2248',
2033 'timesb;': '\u22a0',
2034 'timesbar;': '\u2a31',
2035 'timesd;': '\u2a30',
2039 'topbot;': '\u2336',
2040 'topcir;': '\u2af1',
2041 'Topf;': '\U0001d54b',
2042 'topf;': '\U0001d565',
2043 'topfork;': '\u2ada',
2045 'tprime;': '\u2034',
2048 'triangle;': '\u25b5',
2049 'triangledown;': '\u25bf',
2050 'triangleleft;': '\u25c3',
2051 'trianglelefteq;': '\u22b4',
2052 'triangleq;': '\u225c',
2053 'triangleright;': '\u25b9',
2054 'trianglerighteq;': '\u22b5',
2055 'tridot;': '\u25ec',
2057 'triminus;': '\u2a3a',
2058 'TripleDot;': '\u20db',
2059 'triplus;': '\u2a39',
2061 'tritime;': '\u2a3b',
2062 'trpezium;': '\u23e2',
2063 'Tscr;': '\U0001d4af',
2064 'tscr;': '\U0001d4c9',
2069 'Tstrok;': '\u0166',
2070 'tstrok;': '\u0167',
2072 'twoheadleftarrow;': '\u219e',
2073 'twoheadrightarrow;': '\u21a0',
2081 'Uarrocir;': '\u2949',
2084 'Ubreve;': '\u016c',
2085 'ubreve;': '\u016d',
2093 'Udblac;': '\u0170',
2094 'udblac;': '\u0171',
2096 'ufisht;': '\u297e',
2097 'Ufr;': '\U0001d518',
2098 'ufr;': '\U0001d532',
2107 'ulcorn;': '\u231c',
2108 'ulcorner;': '\u231c',
2109 'ulcrop;': '\u230f',
2116 'UnderBrace;': '\u23df',
2117 'UnderBracket;': '\u23b5',
2118 'UnderParenthesis;': '\u23dd',
2120 'UnionPlus;': '\u228e',
2123 'Uopf;': '\U0001d54c',
2124 'uopf;': '\U0001d566',
2125 'UpArrow;': '\u2191',
2126 'Uparrow;': '\u21d1',
2127 'uparrow;': '\u2191',
2128 'UpArrowBar;': '\u2912',
2129 'UpArrowDownArrow;': '\u21c5',
2130 'UpDownArrow;': '\u2195',
2131 'Updownarrow;': '\u21d5',
2132 'updownarrow;': '\u2195',
2133 'UpEquilibrium;': '\u296e',
2134 'upharpoonleft;': '\u21bf',
2135 'upharpoonright;': '\u21be',
2137 'UpperLeftArrow;': '\u2196',
2138 'UpperRightArrow;': '\u2197',
2142 'Upsilon;': '\u03a5',
2143 'upsilon;': '\u03c5',
2145 'UpTeeArrow;': '\u21a5',
2146 'upuparrows;': '\u21c8',
2147 'urcorn;': '\u231d',
2148 'urcorner;': '\u231d',
2149 'urcrop;': '\u230e',
2153 'Uscr;': '\U0001d4b0',
2154 'uscr;': '\U0001d4ca',
2156 'Utilde;': '\u0168',
2157 'utilde;': '\u0169',
2165 'uwangle;': '\u29a7',
2166 'vangrt;': '\u299c',
2167 'varepsilon;': '\u03f5',
2168 'varkappa;': '\u03f0',
2169 'varnothing;': '\u2205',
2170 'varphi;': '\u03d5',
2172 'varpropto;': '\u221d',
2175 'varrho;': '\u03f1',
2176 'varsigma;': '\u03c2',
2177 'varsubsetneq;': '\u228a\ufe00',
2178 'varsubsetneqq;': '\u2acb\ufe00',
2179 'varsupsetneq;': '\u228b\ufe00',
2180 'varsupsetneqq;': '\u2acc\ufe00',
2181 'vartheta;': '\u03d1',
2182 'vartriangleleft;': '\u22b2',
2183 'vartriangleright;': '\u22b3',
2193 'Vdashl;': '\u2ae6',
2196 'veebar;': '\u22bb',
2198 'vellip;': '\u22ee',
2199 'Verbar;': '\u2016',
2203 'VerticalBar;': '\u2223',
2204 'VerticalLine;': '|',
2205 'VerticalSeparator;': '\u2758',
2206 'VerticalTilde;': '\u2240',
2207 'VeryThinSpace;': '\u200a',
2208 'Vfr;': '\U0001d519',
2209 'vfr;': '\U0001d533',
2211 'vnsub;': '\u2282\u20d2',
2212 'vnsup;': '\u2283\u20d2',
2213 'Vopf;': '\U0001d54d',
2214 'vopf;': '\U0001d567',
2217 'Vscr;': '\U0001d4b1',
2218 'vscr;': '\U0001d4cb',
2219 'vsubnE;': '\u2acb\ufe00',
2220 'vsubne;': '\u228a\ufe00',
2221 'vsupnE;': '\u2acc\ufe00',
2222 'vsupne;': '\u228b\ufe00',
2223 'Vvdash;': '\u22aa',
2224 'vzigzag;': '\u299a',
2227 'wedbar;': '\u2a5f',
2230 'wedgeq;': '\u2259',
2231 'weierp;': '\u2118',
2232 'Wfr;': '\U0001d51a',
2233 'wfr;': '\U0001d534',
2234 'Wopf;': '\U0001d54e',
2235 'wopf;': '\U0001d568',
2238 'wreath;': '\u2240',
2239 'Wscr;': '\U0001d4b2',
2240 'wscr;': '\U0001d4cc',
2245 'Xfr;': '\U0001d51b',
2246 'xfr;': '\U0001d535',
2256 'Xopf;': '\U0001d54f',
2257 'xopf;': '\U0001d569',
2258 'xoplus;': '\u2a01',
2259 'xotime;': '\u2a02',
2262 'Xscr;': '\U0001d4b3',
2263 'xscr;': '\U0001d4cd',
2264 'xsqcup;': '\u2a06',
2265 'xuplus;': '\u2a04',
2268 'xwedge;': '\u22c0',
2281 'Yfr;': '\U0001d51c',
2282 'yfr;': '\U0001d536',
2285 'Yopf;': '\U0001d550',
2286 'yopf;': '\U0001d56a',
2287 'Yscr;': '\U0001d4b4',
2288 'yscr;': '\U0001d4ce',
2294 'Zacute;': '\u0179',
2295 'zacute;': '\u017a',
2296 'Zcaron;': '\u017d',
2297 'zcaron;': '\u017e',
2302 'zeetrf;': '\u2128',
2303 'ZeroWidthSpace;': '\u200b',
2307 'zfr;': '\U0001d537',
2310 'zigrarr;': '\u21dd',
2312 'zopf;': '\U0001d56b',
2313 'Zscr;': '\U0001d4b5',
2314 'zscr;': '\U0001d4cf',
2320 import http.client as compat_http_client
2321 except ImportError: # Python 2
2322 import httplib as compat_http_client
2325 from urllib.error import HTTPError as compat_HTTPError
2326 except ImportError: # Python 2
2327 from urllib2 import HTTPError as compat_HTTPError
2330 from urllib.request import urlretrieve as compat_urlretrieve
2331 except ImportError: # Python 2
2332 from urllib import urlretrieve as compat_urlretrieve
2335 from html.parser import HTMLParser as compat_HTMLParser
2336 except ImportError: # Python 2
2337 from HTMLParser import HTMLParser as compat_HTMLParser
2340 from HTMLParser import HTMLParseError as compat_HTMLParseError
2341 except ImportError: # Python <3.4
2343 from html.parser import HTMLParseError as compat_HTMLParseError
2344 except ImportError: # Python >3.4
2346 # HTMLParseError has been deprecated in Python 3.3 and removed in
2347 # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
2348 # and uniform cross-version exceptiong handling
2349 class compat_HTMLParseError(Exception):
2353 from subprocess import DEVNULL
2354 compat_subprocess_get_DEVNULL = lambda: DEVNULL
2356 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
2359 import http.server as compat_http_server
2361 import BaseHTTPServer as compat_http_server
2364 compat_str = unicode # Python 2
2369 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
2370 from urllib.parse import unquote as compat_urllib_parse_unquote
2371 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
2372 except ImportError: # Python 2
2373 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
2374 else re.compile(r'([\x00-\x7f]+)'))
2376 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2377 # implementations from cpython 3.4.3's stdlib. Python 2's version
2378 # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
2380 def compat_urllib_parse_unquote_to_bytes(string):
2381 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2382 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2383 # unescaped non-ASCII characters, which URIs should not.
2385 # Is it a string-like object?
2388 if isinstance(string, compat_str):
2389 string = string.encode('utf-8')
2390 bits = string.split(b'%')
2395 for item in bits[1:]:
2397 append(compat_urllib_parse._hextochr[item[:2]])
2402 return b''.join(res)
2404 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
2405 """Replace %xx escapes by their single-character equivalent. The optional
2406 encoding and errors parameters specify how to decode percent-encoded
2407 sequences into Unicode characters, as accepted by the bytes.decode()
2409 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2410 sequences are replaced by a placeholder character.
2412 unquote('abc%20def') -> 'abc def'.
2414 if '%' not in string:
2417 if encoding is None:
2421 bits = _asciire.split(string)
2424 for i in range(1, len(bits), 2):
2425 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
2429 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
2430 """Like unquote(), but also replace plus signs by spaces, as required for
2431 unquoting HTML form values.
2433 unquote_plus('%7e/abc+def') -> '~/abc def'
2435 string = string.replace('+', ' ')
2436 return compat_urllib_parse_unquote(string, encoding, errors)
2439 from urllib.parse import urlencode as compat_urllib_parse_urlencode
2440 except ImportError: # Python 2
2441 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2442 # Possible solutions are to either port it from python 3 with all
2443 # the friends or manually ensure input query contains only byte strings.
2444 # We will stick with latter thus recursively encoding the whole query.
2445 def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
2447 if isinstance(e, dict):
2449 elif isinstance(e, (list, tuple,)):
2450 list_e = encode_list(e)
2451 e = tuple(list_e) if isinstance(e, tuple) else list_e
2452 elif isinstance(e, compat_str):
2453 e = e.encode(encoding)
2457 return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
2460 return [encode_elem(e) for e in l]
2462 return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
2465 from urllib.request import DataHandler as compat_urllib_request_DataHandler
2466 except ImportError: # Python < 3.4
2467 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2468 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
2469 def data_open(self, req):
2470 # data URLs as specified in RFC 2397.
2472 # ignores POSTed data
2475 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2476 # mediatype := [ type "/" subtype ] *( ";" parameter )
2478 # parameter := attribute "=" value
2479 url = req.get_full_url()
2481 scheme, data = url.split(':', 1)
2482 mediatype, data = data.split(',', 1)
2484 # even base64 encoded data URLs might be quoted so unquote in any case:
2485 data = compat_urllib_parse_unquote_to_bytes(data)
2486 if mediatype.endswith(';base64'):
2487 data = binascii.a2b_base64(data)
2488 mediatype = mediatype[:-7]
2491 mediatype = 'text/plain;charset=US-ASCII'
2493 headers = email.message_from_string(
2494 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
2496 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
2499 compat_basestring = basestring # Python 2
2501 compat_basestring = str
2504 compat_chr = unichr # Python 2
2509 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
2510 except ImportError: # Python 2.6
2511 from xml.parsers.expat import ExpatError as compat_xml_parse_error
2514 etree = xml.etree.ElementTree
2517 class _TreeBuilder(etree.TreeBuilder):
2518 def doctype(self, name, pubid, system):
2523 # xml.etree.ElementTree.Element is a method in Python <=2.6 and
2524 # the following will crash with:
2525 # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
2526 isinstance(None, xml.etree.ElementTree.Element)
2527 from xml.etree.ElementTree import Element as compat_etree_Element
2528 except TypeError: # Python <=2.6
2529 from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
2531 if sys.version_info[0] >= 3:
2532 def compat_etree_fromstring(text):
2533 return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
2535 # python 2.x tries to encode unicode strings with ascii (see the
2536 # XMLParser._fixtext method)
2538 _etree_iter = etree.Element.iter
2539 except AttributeError: # Python <=2.6
2540 def _etree_iter(root):
2541 for el in root.findall('*'):
2543 for sub in _etree_iter(el):
2546 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2548 def _XML(text, parser=None):
2550 parser = etree.XMLParser(target=_TreeBuilder())
2552 return parser.close()
2554 def _element_factory(*args, **kwargs):
2555 el = etree.Element(*args, **kwargs)
2556 for k, v in el.items():
2557 if isinstance(v, bytes):
2558 el.set(k, v.decode('utf-8'))
2561 def compat_etree_fromstring(text):
2562 doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
2563 for el in _etree_iter(doc):
2564 if el.text is not None and isinstance(el.text, bytes):
2565 el.text = el.text.decode('utf-8')
2568 if hasattr(etree, 'register_namespace'):
2569 compat_etree_register_namespace = etree.register_namespace
2571 def compat_etree_register_namespace(prefix, uri):
2572 """Register a namespace prefix.
2573 The registry is global, and any existing mapping for either the
2574 given prefix or the namespace URI will be removed.
2575 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2576 attributes in this namespace will be serialized with prefix if possible.
2577 ValueError is raised if prefix is reserved or is invalid.
2579 if re.match(r"ns\d+$", prefix):
2580 raise ValueError("Prefix format reserved for internal use")
2581 for k, v in list(etree._namespace_map.items()):
2582 if k == uri or v == prefix:
2583 del etree._namespace_map[k]
2584 etree._namespace_map[uri] = prefix
2586 if sys.version_info < (2, 7):
2587 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2588 # .//node does not match if a node is a direct child of . !
2589 def compat_xpath(xpath):
2590 if isinstance(xpath, compat_str):
2591 xpath = xpath.encode('ascii')
2594 compat_xpath = lambda xpath: xpath
2597 from urllib.parse import parse_qs as compat_parse_qs
2598 except ImportError: # Python 2
2599 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2600 # Python 2's version is apparently totally broken
2602 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
2603 encoding='utf-8', errors='replace'):
2604 qs, _coerce_result = qs, compat_str
2605 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
2607 for name_value in pairs:
2608 if not name_value and not strict_parsing:
2610 nv = name_value.split('=', 1)
2613 raise ValueError('bad query field: %r' % (name_value,))
2614 # Handle case of a control-name with no equal sign
2615 if keep_blank_values:
2619 if len(nv[1]) or keep_blank_values:
2620 name = nv[0].replace('+', ' ')
2621 name = compat_urllib_parse_unquote(
2622 name, encoding=encoding, errors=errors)
2623 name = _coerce_result(name)
2624 value = nv[1].replace('+', ' ')
2625 value = compat_urllib_parse_unquote(
2626 value, encoding=encoding, errors=errors)
2627 value = _coerce_result(value)
2628 r.append((name, value))
2631 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
2632 encoding='utf-8', errors='replace'):
2634 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
2635 encoding=encoding, errors=errors)
2636 for name, value in pairs:
2637 if name in parsed_result:
2638 parsed_result[name].append(value)
2640 parsed_result[name] = [value]
2641 return parsed_result
2644 compat_os_name = os._name if os.name == 'java' else os.name
2647 if compat_os_name == 'nt':
2648 def compat_shlex_quote(s):
2649 return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
2652 from shlex import quote as compat_shlex_quote
2653 except ImportError: # Python < 3.3
2654 def compat_shlex_quote(s):
2655 if re.match(r'^[-_\w./]+$', s):
2658 return "'" + s.replace("'", "'\"'\"'") + "'"
2662 args = shlex.split('ä¸æ–‡')
2663 assert (isinstance(args, list)
2664 and isinstance(args[0], compat_str)
2665 and args[0] == 'ä¸æ–‡')
2666 compat_shlex_split = shlex.split
2667 except (AssertionError, UnicodeEncodeError):
2668 # Working around shlex issue with unicode strings on some python 2
2669 # versions (see http://bugs.python.org/issue1548891)
2670 def compat_shlex_split(s, comments=False, posix=True):
2671 if isinstance(s, compat_str):
2672 s = s.encode('utf-8')
2673 return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
2683 if sys.version_info >= (3, 0):
2684 compat_getenv = os.getenv
2685 compat_expanduser = os.path.expanduser
2687 def compat_setenv(key, value, env=os.environ):
2690 # Environment variables should be decoded with filesystem encoding.
2691 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2693 def compat_getenv(key, default=None):
2694 from .utils import get_filesystem_encoding
2695 env = os.getenv(key, default)
2697 env = env.decode(get_filesystem_encoding())
2700 def compat_setenv(key, value, env=os.environ):
2702 from .utils import get_filesystem_encoding
2703 return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
2704 env[encode(key)] = encode(value)
2706 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2707 # environment variables with filesystem encoding. We will work around this by
2708 # providing adjusted implementations.
2709 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2710 # for different platforms with correct environment variables decoding.
2712 if compat_os_name == 'posix':
2713 def compat_expanduser(path):
2714 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2716 if not path.startswith('~'):
2718 i = path.find('/', 1)
2722 if 'HOME' not in os.environ:
2724 userhome = pwd.getpwuid(os.getuid()).pw_dir
2726 userhome = compat_getenv('HOME')
2730 pwent = pwd.getpwnam(path[1:i])
2733 userhome = pwent.pw_dir
2734 userhome = userhome.rstrip('/')
2735 return (userhome + path[i:]) or '/'
2736 elif compat_os_name in ('nt', 'ce'):
2737 def compat_expanduser(path):
2738 """Expand ~ and ~user constructs.
2740 If user or $HOME is unknown, do nothing."""
2744 while i < n and path[i] not in '/\\':
2747 if 'HOME' in os.environ:
2748 userhome = compat_getenv('HOME')
2749 elif 'USERPROFILE' in os.environ:
2750 userhome = compat_getenv('USERPROFILE')
2751 elif 'HOMEPATH' not in os.environ:
2755 drive = compat_getenv('HOMEDRIVE')
2758 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
2761 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
2763 return userhome + path[i:]
2765 compat_expanduser = os.path.expanduser
2768 if compat_os_name == 'nt' and sys.version_info < (3, 8):
2769 # os.path.realpath on Windows does not follow symbolic links
2770 # prior to Python 3.8 (see https://bugs.python.org/issue9949)
2771 def compat_realpath(path):
2772 while os.path.islink(path):
2773 path = os.path.abspath(os.readlink(path))
2776 compat_realpath = os.path.realpath
2779 if sys.version_info < (3, 0):
2780 def compat_print(s):
2781 from .utils import preferredencoding
2782 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
2784 def compat_print(s):
2785 assert isinstance(s, compat_str)
2789 if sys.version_info < (3, 0) and sys.platform == 'win32':
2790 def compat_getpass(prompt, *args, **kwargs):
2791 if isinstance(prompt, compat_str):
2792 from .utils import preferredencoding
2793 prompt = prompt.encode(preferredencoding())
2794 return getpass.getpass(prompt, *args, **kwargs)
2796 compat_getpass = getpass.getpass
2799 compat_input = raw_input
2800 except NameError: # Python 3
2801 compat_input = input
2803 # Python < 2.6.5 require kwargs to be bytes
2807 _testfunc(**{'x': 0})
2809 def compat_kwargs(kwargs):
2810 return dict((bytes(k), v) for k, v in kwargs.items())
2812 compat_kwargs = lambda kwargs: kwargs
2816 compat_numeric_types = (int, float, long, complex)
2817 except NameError: # Python 3
2818 compat_numeric_types = (int, float, complex)
2822 compat_integer_types = (int, long)
2823 except NameError: # Python 3
2824 compat_integer_types = (int, )
2827 if sys.version_info < (2, 7):
2828 def compat_socket_create_connection(address, timeout, source_address=None):
2829 host, port = address
2831 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
2832 af, socktype, proto, canonname, sa = res
2835 sock = socket.socket(af, socktype, proto)
2836 sock.settimeout(timeout)
2838 sock.bind(source_address)
2841 except socket.error as _:
2843 if sock is not None:
2848 raise socket.error('getaddrinfo returns an empty list')
2850 compat_socket_create_connection = socket.create_connection
2853 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
2854 # See http://bugs.python.org/issue9161 for what is broken
2855 def workaround_optparse_bug9161():
2856 op = optparse.OptionParser()
2857 og = optparse.OptionGroup(op, 'foo')
2861 real_add_option = optparse.OptionGroup.add_option
2863 def _compat_add_option(self, *args, **kwargs):
2865 v.encode('ascii', 'replace') if isinstance(v, compat_str)
2867 bargs = [enc(a) for a in args]
2869 (k, enc(v)) for k, v in kwargs.items())
2870 return real_add_option(self, *bargs, **bkwargs)
2871 optparse.OptionGroup.add_option = _compat_add_option
2874 if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
2875 compat_get_terminal_size = shutil.get_terminal_size
2877 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
2879 def compat_get_terminal_size(fallback=(80, 24)):
2880 columns = compat_getenv('COLUMNS')
2882 columns = int(columns)
2885 lines = compat_getenv('LINES')
2891 if columns is None or lines is None or columns <= 0 or lines <= 0:
2893 sp = subprocess.Popen(
2895 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2896 out, err = sp.communicate()
2897 _lines, _columns = map(int, out.split())
2899 _columns, _lines = _terminal_size(*fallback)
2901 if columns is None or columns <= 0:
2903 if lines is None or lines <= 0:
2905 return _terminal_size(columns, lines)
2908 itertools.count(start=0, step=1)
2909 compat_itertools_count = itertools.count
2910 except TypeError: # Python 2.6
2911 def compat_itertools_count(start=0, step=1):
2917 if sys.version_info >= (3, 0):
2918 from tokenize import tokenize as compat_tokenize_tokenize
2920 from tokenize import generate_tokens as compat_tokenize_tokenize
2924 struct.pack('!I', 0)
2926 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2927 # See https://bugs.python.org/issue19099
2928 def compat_struct_pack(spec, *args):
2929 if isinstance(spec, compat_str):
2930 spec = spec.encode('ascii')
2931 return struct.pack(spec, *args)
2933 def compat_struct_unpack(spec, *args):
2934 if isinstance(spec, compat_str):
2935 spec = spec.encode('ascii')
2936 return struct.unpack(spec, *args)
2938 class compat_Struct(struct.Struct):
2939 def __init__(self, fmt):
2940 if isinstance(fmt, compat_str):
2941 fmt = fmt.encode('ascii')
2942 super(compat_Struct, self).__init__(fmt)
2944 compat_struct_pack = struct.pack
2945 compat_struct_unpack = struct.unpack
2946 if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
2947 class compat_Struct(struct.Struct):
2948 def unpack(self, string):
2949 if not isinstance(string, buffer): # noqa: F821
2950 string = buffer(string) # noqa: F821
2951 return super(compat_Struct, self).unpack(string)
2953 compat_Struct = struct.Struct
2957 from future_builtins import zip as compat_zip
2958 except ImportError: # not 2.6+ or is 3.x
2960 from itertools import izip as compat_zip # < 2.5 or 3.x
2965 if sys.version_info < (3, 3):
2966 def compat_b64decode(s, *args, **kwargs):
2967 if isinstance(s, compat_str):
2968 s = s.encode('ascii')
2969 return base64.b64decode(s, *args, **kwargs)
2971 compat_b64decode = base64.b64decode
2974 if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
2975 # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
2976 # names, see the original PyPy issue [1] and the youtube-dl one [2].
2977 # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
2978 # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
2979 def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
2980 real = ctypes.WINFUNCTYPE(*args, **kwargs)
2982 def resf(tpl, *args, **kwargs):
2984 return real((str(funcname), dll), *args, **kwargs)
2988 def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
2989 return ctypes.WINFUNCTYPE(*args, **kwargs)
2993 'compat_HTMLParseError',
2994 'compat_HTMLParser',
2998 'compat_basestring',
3001 'compat_cookiejar_Cookie',
3003 'compat_ctypes_WINFUNCTYPE',
3004 'compat_etree_Element',
3005 'compat_etree_fromstring',
3006 'compat_etree_register_namespace',
3007 'compat_expanduser',
3008 'compat_get_terminal_size',
3011 'compat_html_entities',
3012 'compat_html_entities_html5',
3013 'compat_http_client',
3014 'compat_http_server',
3016 'compat_integer_types',
3017 'compat_itertools_count',
3019 'compat_numeric_types',
3026 'compat_shlex_quote',
3027 'compat_shlex_split',
3028 'compat_socket_create_connection',
3030 'compat_struct_pack',
3031 'compat_struct_unpack',
3032 'compat_subprocess_get_DEVNULL',
3033 'compat_tokenize_tokenize',
3034 'compat_urllib_error',
3035 'compat_urllib_parse',
3036 'compat_urllib_parse_unquote',
3037 'compat_urllib_parse_unquote_plus',
3038 'compat_urllib_parse_unquote_to_bytes',
3039 'compat_urllib_parse_urlencode',
3040 'compat_urllib_parse_urlparse',
3041 'compat_urllib_request',
3042 'compat_urllib_request_DataHandler',
3043 'compat_urllib_response',
3045 'compat_urlretrieve',
3046 'compat_xml_parse_error',
3049 'workaround_optparse_bug9161',