2 from __future__ import unicode_literals
22 import xml.etree.ElementTree
26 import urllib.request as compat_urllib_request
27 except ImportError: # Python 2
28 import urllib2 as compat_urllib_request
31 import urllib.error as compat_urllib_error
32 except ImportError: # Python 2
33 import urllib2 as compat_urllib_error
36 import urllib.parse as compat_urllib_parse
37 except ImportError: # Python 2
38 import urllib as compat_urllib_parse
41 from urllib.parse import urlparse as compat_urllib_parse_urlparse
42 except ImportError: # Python 2
43 from urlparse import urlparse as compat_urllib_parse_urlparse
46 import urllib.parse as compat_urlparse
47 except ImportError: # Python 2
48 import urlparse as compat_urlparse
51 import urllib.response as compat_urllib_response
52 except ImportError: # Python 2
53 import urllib as compat_urllib_response
56 import http.cookiejar as compat_cookiejar
57 except ImportError: # Python 2
58 import cookielib as compat_cookiejar
61 import http.cookies as compat_cookies
62 except ImportError: # Python 2
63 import Cookie as compat_cookies
66 import html.entities as compat_html_entities
67 except ImportError: # Python 2
68 import htmlentitydefs as compat_html_entities
71 compat_html_entities_html5 = compat_html_entities.html5
72 except AttributeError:
73 # Copied from CPython 3.5.1 html/entities.py
74 compat_html_entities_html5 = {
83 'acE;': '\u223e\u0333',
103 'alefsym;': '\u2135',
118 'andslope;': '\u2a58',
124 'angmsdaa;': '\u29a8',
125 'angmsdab;': '\u29a9',
126 'angmsdac;': '\u29aa',
127 'angmsdad;': '\u29ab',
128 'angmsdae;': '\u29ac',
129 'angmsdaf;': '\u29ad',
130 'angmsdag;': '\u29ae',
131 'angmsdah;': '\u29af',
133 'angrtvb;': '\u22be',
134 'angrtvbd;': '\u299d',
137 'angzarr;': '\u237c',
140 'Aopf;': '\U0001d538',
141 'aopf;': '\U0001d552',
148 'ApplyFunction;': '\u2061',
150 'approxeq;': '\u224a',
155 'Ascr;': '\U0001d49c',
156 'ascr;': '\U0001d4b6',
160 'asympeq;': '\u224d',
169 'awconint;': '\u2233',
171 'backcong;': '\u224c',
172 'backepsilon;': '\u03f6',
173 'backprime;': '\u2035',
174 'backsim;': '\u223d',
175 'backsimeq;': '\u22cd',
176 'Backslash;': '\u2216',
181 'barwedge;': '\u2305',
183 'bbrktbrk;': '\u23b6',
189 'Because;': '\u2235',
190 'because;': '\u2235',
191 'bemptyv;': '\u29b0',
194 'Bernoullis;': '\u212c',
198 'between;': '\u226c',
199 'Bfr;': '\U0001d505',
200 'bfr;': '\U0001d51f',
202 'bigcirc;': '\u25ef',
204 'bigodot;': '\u2a00',
205 'bigoplus;': '\u2a01',
206 'bigotimes;': '\u2a02',
207 'bigsqcup;': '\u2a06',
208 'bigstar;': '\u2605',
209 'bigtriangledown;': '\u25bd',
210 'bigtriangleup;': '\u25b3',
211 'biguplus;': '\u2a04',
213 'bigwedge;': '\u22c0',
215 'blacklozenge;': '\u29eb',
216 'blacksquare;': '\u25aa',
217 'blacktriangle;': '\u25b4',
218 'blacktriangledown;': '\u25be',
219 'blacktriangleleft;': '\u25c2',
220 'blacktriangleright;': '\u25b8',
227 'bnequiv;': '\u2261\u20e5',
230 'Bopf;': '\U0001d539',
231 'bopf;': '\U0001d553',
254 'boxminus;': '\u229f',
255 'boxplus;': '\u229e',
256 'boxtimes;': '\u22a0',
285 'bscr;': '\U0001d4b7',
291 'bsolhsub;': '\u27c8',
304 'capbrcup;': '\u2a49',
308 'CapitalDifferentialD;': '\u2145',
309 'caps;': '\u2229\ufe00',
312 'Cayleys;': '\u212d',
322 'Cconint;': '\u2230',
324 'ccupssm;': '\u2a50',
330 'cemptyv;': '\u29b2',
333 'CenterDot;': '\xb7',
334 'centerdot;': '\xb7',
336 'cfr;': '\U0001d520',
340 'checkmark;': '\u2713',
346 'circlearrowleft;': '\u21ba',
347 'circlearrowright;': '\u21bb',
348 'circledast;': '\u229b',
349 'circledcirc;': '\u229a',
350 'circleddash;': '\u229d',
351 'CircleDot;': '\u2299',
353 'circledS;': '\u24c8',
354 'CircleMinus;': '\u2296',
355 'CirclePlus;': '\u2295',
356 'CircleTimes;': '\u2297',
359 'cirfnint;': '\u2a10',
361 'cirscir;': '\u29c2',
362 'ClockwiseContourIntegral;': '\u2232',
363 'CloseCurlyDoubleQuote;': '\u201d',
364 'CloseCurlyQuote;': '\u2019',
366 'clubsuit;': '\u2663',
371 'coloneq;': '\u2254',
376 'complement;': '\u2201',
377 'complexes;': '\u2102',
379 'congdot;': '\u2a6d',
380 'Congruent;': '\u2261',
383 'ContourIntegral;': '\u222e',
385 'copf;': '\U0001d554',
387 'Coproduct;': '\u2210',
393 'CounterClockwiseContourIntegral;': '\u2233',
397 'Cscr;': '\U0001d49e',
398 'cscr;': '\U0001d4b8',
404 'cudarrl;': '\u2938',
405 'cudarrr;': '\u2935',
409 'cularrp;': '\u293d',
412 'cupbrcap;': '\u2a48',
418 'cups;': '\u222a\ufe00',
420 'curarrm;': '\u293c',
421 'curlyeqprec;': '\u22de',
422 'curlyeqsucc;': '\u22df',
423 'curlyvee;': '\u22ce',
424 'curlywedge;': '\u22cf',
427 'curvearrowleft;': '\u21b6',
428 'curvearrowright;': '\u21b7',
431 'cwconint;': '\u2232',
443 'dbkarow;': '\u290f',
451 'ddagger;': '\u2021',
453 'DDotrahd;': '\u2911',
454 'ddotseq;': '\u2a77',
460 'demptyv;': '\u29b1',
462 'Dfr;': '\U0001d507',
463 'dfr;': '\U0001d521',
467 'DiacriticalAcute;': '\xb4',
468 'DiacriticalDot;': '\u02d9',
469 'DiacriticalDoubleAcute;': '\u02dd',
470 'DiacriticalGrave;': '`',
471 'DiacriticalTilde;': '\u02dc',
473 'Diamond;': '\u22c4',
474 'diamond;': '\u22c4',
475 'diamondsuit;': '\u2666',
478 'DifferentialD;': '\u2146',
479 'digamma;': '\u03dd',
484 'divideontimes;': '\u22c7',
491 'Dopf;': '\U0001d53b',
492 'dopf;': '\U0001d555',
497 'doteqdot;': '\u2251',
498 'DotEqual;': '\u2250',
499 'dotminus;': '\u2238',
500 'dotplus;': '\u2214',
501 'dotsquare;': '\u22a1',
502 'doublebarwedge;': '\u2306',
503 'DoubleContourIntegral;': '\u222f',
504 'DoubleDot;': '\xa8',
505 'DoubleDownArrow;': '\u21d3',
506 'DoubleLeftArrow;': '\u21d0',
507 'DoubleLeftRightArrow;': '\u21d4',
508 'DoubleLeftTee;': '\u2ae4',
509 'DoubleLongLeftArrow;': '\u27f8',
510 'DoubleLongLeftRightArrow;': '\u27fa',
511 'DoubleLongRightArrow;': '\u27f9',
512 'DoubleRightArrow;': '\u21d2',
513 'DoubleRightTee;': '\u22a8',
514 'DoubleUpArrow;': '\u21d1',
515 'DoubleUpDownArrow;': '\u21d5',
516 'DoubleVerticalBar;': '\u2225',
517 'DownArrow;': '\u2193',
518 'Downarrow;': '\u21d3',
519 'downarrow;': '\u2193',
520 'DownArrowBar;': '\u2913',
521 'DownArrowUpArrow;': '\u21f5',
522 'DownBreve;': '\u0311',
523 'downdownarrows;': '\u21ca',
524 'downharpoonleft;': '\u21c3',
525 'downharpoonright;': '\u21c2',
526 'DownLeftRightVector;': '\u2950',
527 'DownLeftTeeVector;': '\u295e',
528 'DownLeftVector;': '\u21bd',
529 'DownLeftVectorBar;': '\u2956',
530 'DownRightTeeVector;': '\u295f',
531 'DownRightVector;': '\u21c1',
532 'DownRightVectorBar;': '\u2957',
533 'DownTee;': '\u22a4',
534 'DownTeeArrow;': '\u21a7',
535 'drbkarow;': '\u2910',
538 'Dscr;': '\U0001d49f',
539 'dscr;': '\U0001d4b9',
550 'dwangle;': '\u29a6',
553 'dzigrarr;': '\u27ff',
575 'Efr;': '\U0001d508',
576 'efr;': '\U0001d522',
585 'Element;': '\u2208',
586 'elinters;': '\u23e7',
593 'emptyset;': '\u2205',
594 'EmptySmallSquare;': '\u25fb',
596 'EmptyVerySmallSquare;': '\u25ab',
605 'Eopf;': '\U0001d53c',
606 'eopf;': '\U0001d556',
611 'Epsilon;': '\u0395',
612 'epsilon;': '\u03b5',
615 'eqcolon;': '\u2255',
617 'eqslantgtr;': '\u2a96',
618 'eqslantless;': '\u2a95',
621 'EqualTilde;': '\u2242',
623 'Equilibrium;': '\u21cc',
625 'equivDD;': '\u2a78',
626 'eqvparsl;': '\u29e5',
648 'expectation;': '\u2130',
649 'ExponentialE;': '\u2147',
650 'exponentiale;': '\u2147',
651 'fallingdotseq;': '\u2252',
658 'Ffr;': '\U0001d509',
659 'ffr;': '\U0001d523',
661 'FilledSmallSquare;': '\u25fc',
662 'FilledVerySmallSquare;': '\u25aa',
668 'Fopf;': '\U0001d53d',
669 'fopf;': '\U0001d557',
674 'Fouriertrf;': '\u2131',
675 'fpartint;': '\u2a0d',
697 'fscr;': '\U0001d4bb',
719 'geqslant;': '\u2a7e',
723 'gesdoto;': '\u2a82',
724 'gesdotol;': '\u2a84',
725 'gesl;': '\u22db\ufe00',
727 'Gfr;': '\U0001d50a',
728 'gfr;': '\U0001d524',
740 'gnapprox;': '\u2a8a',
746 'Gopf;': '\U0001d53e',
747 'gopf;': '\U0001d558',
749 'GreaterEqual;': '\u2265',
750 'GreaterEqualLess;': '\u22db',
751 'GreaterFullEqual;': '\u2267',
752 'GreaterGreater;': '\u2aa2',
753 'GreaterLess;': '\u2277',
754 'GreaterSlantEqual;': '\u2a7e',
755 'GreaterTilde;': '\u2273',
756 'Gscr;': '\U0001d4a2',
770 'gtquest;': '\u2a7c',
771 'gtrapprox;': '\u2a86',
774 'gtreqless;': '\u22db',
775 'gtreqqless;': '\u2a8c',
776 'gtrless;': '\u2277',
778 'gvertneqq;': '\u2269\ufe00',
779 'gvnE;': '\u2269\ufe00',
788 'harrcir;': '\u2948',
795 'heartsuit;': '\u2665',
799 'hfr;': '\U0001d525',
800 'HilbertSpace;': '\u210b',
801 'hksearow;': '\u2925',
802 'hkswarow;': '\u2926',
805 'hookleftarrow;': '\u21a9',
806 'hookrightarrow;': '\u21aa',
808 'hopf;': '\U0001d559',
810 'HorizontalLine;': '\u2500',
812 'hscr;': '\U0001d4bd',
816 'HumpDownHump;': '\u224e',
817 'HumpEqual;': '\u224f',
838 'ifr;': '\U0001d526',
854 'ImaginaryI;': '\u2148',
855 'imagline;': '\u2110',
856 'imagpart;': '\u2111',
860 'Implies;': '\u21d2',
864 'infintie;': '\u29dd',
869 'integers;': '\u2124',
870 'Integral;': '\u222b',
871 'intercal;': '\u22ba',
872 'Intersection;': '\u22c2',
873 'intlarhk;': '\u2a17',
874 'intprod;': '\u2a3c',
875 'InvisibleComma;': '\u2063',
876 'InvisibleTimes;': '\u2062',
881 'Iopf;': '\U0001d540',
882 'iopf;': '\U0001d55a',
889 'iscr;': '\U0001d4be',
891 'isindot;': '\u22f5',
909 'Jfr;': '\U0001d50d',
910 'jfr;': '\U0001d527',
912 'Jopf;': '\U0001d541',
913 'jopf;': '\U0001d55b',
914 'Jscr;': '\U0001d4a5',
915 'jscr;': '\U0001d4bf',
927 'Kfr;': '\U0001d50e',
928 'kfr;': '\U0001d528',
934 'Kopf;': '\U0001d542',
935 'kopf;': '\U0001d55c',
936 'Kscr;': '\U0001d4a6',
937 'kscr;': '\U0001d4c0',
941 'laemptyv;': '\u29b4',
950 'Laplacetrf;': '\u2112',
957 'larrbfs;': '\u291f',
962 'larrsim;': '\u2973',
968 'lates;': '\u2aad\ufe00',
975 'lbrksld;': '\u298f',
976 'lbrkslu;': '\u298d',
988 'ldrdhar;': '\u2967',
989 'ldrushar;': '\u294b',
993 'LeftAngleBracket;': '\u27e8',
994 'LeftArrow;': '\u2190',
995 'Leftarrow;': '\u21d0',
996 'leftarrow;': '\u2190',
997 'LeftArrowBar;': '\u21e4',
998 'LeftArrowRightArrow;': '\u21c6',
999 'leftarrowtail;': '\u21a2',
1000 'LeftCeiling;': '\u2308',
1001 'LeftDoubleBracket;': '\u27e6',
1002 'LeftDownTeeVector;': '\u2961',
1003 'LeftDownVector;': '\u21c3',
1004 'LeftDownVectorBar;': '\u2959',
1005 'LeftFloor;': '\u230a',
1006 'leftharpoondown;': '\u21bd',
1007 'leftharpoonup;': '\u21bc',
1008 'leftleftarrows;': '\u21c7',
1009 'LeftRightArrow;': '\u2194',
1010 'Leftrightarrow;': '\u21d4',
1011 'leftrightarrow;': '\u2194',
1012 'leftrightarrows;': '\u21c6',
1013 'leftrightharpoons;': '\u21cb',
1014 'leftrightsquigarrow;': '\u21ad',
1015 'LeftRightVector;': '\u294e',
1016 'LeftTee;': '\u22a3',
1017 'LeftTeeArrow;': '\u21a4',
1018 'LeftTeeVector;': '\u295a',
1019 'leftthreetimes;': '\u22cb',
1020 'LeftTriangle;': '\u22b2',
1021 'LeftTriangleBar;': '\u29cf',
1022 'LeftTriangleEqual;': '\u22b4',
1023 'LeftUpDownVector;': '\u2951',
1024 'LeftUpTeeVector;': '\u2960',
1025 'LeftUpVector;': '\u21bf',
1026 'LeftUpVectorBar;': '\u2958',
1027 'LeftVector;': '\u21bc',
1028 'LeftVectorBar;': '\u2952',
1033 'leqslant;': '\u2a7d',
1036 'lesdot;': '\u2a7f',
1037 'lesdoto;': '\u2a81',
1038 'lesdotor;': '\u2a83',
1039 'lesg;': '\u22da\ufe00',
1040 'lesges;': '\u2a93',
1041 'lessapprox;': '\u2a85',
1042 'lessdot;': '\u22d6',
1043 'lesseqgtr;': '\u22da',
1044 'lesseqqgtr;': '\u2a8b',
1045 'LessEqualGreater;': '\u22da',
1046 'LessFullEqual;': '\u2266',
1047 'LessGreater;': '\u2276',
1048 'lessgtr;': '\u2276',
1049 'LessLess;': '\u2aa1',
1050 'lesssim;': '\u2272',
1051 'LessSlantEqual;': '\u2a7d',
1052 'LessTilde;': '\u2272',
1053 'lfisht;': '\u297c',
1054 'lfloor;': '\u230a',
1055 'Lfr;': '\U0001d50f',
1056 'lfr;': '\U0001d529',
1062 'lharul;': '\u296a',
1069 'llcorner;': '\u231e',
1070 'Lleftarrow;': '\u21da',
1071 'llhard;': '\u296b',
1073 'Lmidot;': '\u013f',
1074 'lmidot;': '\u0140',
1075 'lmoust;': '\u23b0',
1076 'lmoustache;': '\u23b0',
1078 'lnapprox;': '\u2a89',
1087 'LongLeftArrow;': '\u27f5',
1088 'Longleftarrow;': '\u27f8',
1089 'longleftarrow;': '\u27f5',
1090 'LongLeftRightArrow;': '\u27f7',
1091 'Longleftrightarrow;': '\u27fa',
1092 'longleftrightarrow;': '\u27f7',
1093 'longmapsto;': '\u27fc',
1094 'LongRightArrow;': '\u27f6',
1095 'Longrightarrow;': '\u27f9',
1096 'longrightarrow;': '\u27f6',
1097 'looparrowleft;': '\u21ab',
1098 'looparrowright;': '\u21ac',
1100 'Lopf;': '\U0001d543',
1101 'lopf;': '\U0001d55d',
1102 'loplus;': '\u2a2d',
1103 'lotimes;': '\u2a34',
1104 'lowast;': '\u2217',
1106 'LowerLeftArrow;': '\u2199',
1107 'LowerRightArrow;': '\u2198',
1109 'lozenge;': '\u25ca',
1112 'lparlt;': '\u2993',
1114 'lrcorner;': '\u231f',
1116 'lrhard;': '\u296d',
1119 'lsaquo;': '\u2039',
1121 'lscr;': '\U0001d4c1',
1129 'lsquor;': '\u201a',
1130 'Lstrok;': '\u0141',
1131 'lstrok;': '\u0142',
1140 'lthree;': '\u22cb',
1141 'ltimes;': '\u22c9',
1142 'ltlarr;': '\u2976',
1143 'ltquest;': '\u2a7b',
1147 'ltrPar;': '\u2996',
1148 'lurdshar;': '\u294a',
1149 'luruhar;': '\u2966',
1150 'lvertneqq;': '\u2268\ufe00',
1151 'lvnE;': '\u2268\ufe00',
1156 'maltese;': '\u2720',
1159 'mapsto;': '\u21a6',
1160 'mapstodown;': '\u21a7',
1161 'mapstoleft;': '\u21a4',
1162 'mapstoup;': '\u21a5',
1163 'marker;': '\u25ae',
1164 'mcomma;': '\u2a29',
1169 'measuredangle;': '\u2221',
1170 'MediumSpace;': '\u205f',
1171 'Mellintrf;': '\u2133',
1172 'Mfr;': '\U0001d510',
1173 'mfr;': '\U0001d52a',
1179 'midcir;': '\u2af0',
1183 'minusb;': '\u229f',
1184 'minusd;': '\u2238',
1185 'minusdu;': '\u2a2a',
1186 'MinusPlus;': '\u2213',
1189 'mnplus;': '\u2213',
1190 'models;': '\u22a7',
1191 'Mopf;': '\U0001d544',
1192 'mopf;': '\U0001d55e',
1195 'mscr;': '\U0001d4c2',
1196 'mstpos;': '\u223e',
1199 'multimap;': '\u22b8',
1202 'Nacute;': '\u0143',
1203 'nacute;': '\u0144',
1204 'nang;': '\u2220\u20d2',
1206 'napE;': '\u2a70\u0338',
1207 'napid;': '\u224b\u0338',
1209 'napprox;': '\u2249',
1211 'natural;': '\u266e',
1212 'naturals;': '\u2115',
1215 'nbump;': '\u224e\u0338',
1216 'nbumpe;': '\u224f\u0338',
1218 'Ncaron;': '\u0147',
1219 'ncaron;': '\u0148',
1220 'Ncedil;': '\u0145',
1221 'ncedil;': '\u0146',
1223 'ncongdot;': '\u2a6d\u0338',
1229 'nearhk;': '\u2924',
1232 'nearrow;': '\u2197',
1233 'nedot;': '\u2250\u0338',
1234 'NegativeMediumSpace;': '\u200b',
1235 'NegativeThickSpace;': '\u200b',
1236 'NegativeThinSpace;': '\u200b',
1237 'NegativeVeryThinSpace;': '\u200b',
1238 'nequiv;': '\u2262',
1239 'nesear;': '\u2928',
1240 'nesim;': '\u2242\u0338',
1241 'NestedGreaterGreater;': '\u226b',
1242 'NestedLessLess;': '\u226a',
1244 'nexist;': '\u2204',
1245 'nexists;': '\u2204',
1246 'Nfr;': '\U0001d511',
1247 'nfr;': '\U0001d52b',
1248 'ngE;': '\u2267\u0338',
1251 'ngeqq;': '\u2267\u0338',
1252 'ngeqslant;': '\u2a7e\u0338',
1253 'nges;': '\u2a7e\u0338',
1254 'nGg;': '\u22d9\u0338',
1256 'nGt;': '\u226b\u20d2',
1259 'nGtv;': '\u226b\u0338',
1272 'nlE;': '\u2266\u0338',
1274 'nLeftarrow;': '\u21cd',
1275 'nleftarrow;': '\u219a',
1276 'nLeftrightarrow;': '\u21ce',
1277 'nleftrightarrow;': '\u21ae',
1279 'nleqq;': '\u2266\u0338',
1280 'nleqslant;': '\u2a7d\u0338',
1281 'nles;': '\u2a7d\u0338',
1283 'nLl;': '\u22d8\u0338',
1285 'nLt;': '\u226a\u20d2',
1288 'nltrie;': '\u22ec',
1289 'nLtv;': '\u226a\u0338',
1291 'NoBreak;': '\u2060',
1292 'NonBreakingSpace;': '\xa0',
1294 'nopf;': '\U0001d55f',
1298 'NotCongruent;': '\u2262',
1299 'NotCupCap;': '\u226d',
1300 'NotDoubleVerticalBar;': '\u2226',
1301 'NotElement;': '\u2209',
1302 'NotEqual;': '\u2260',
1303 'NotEqualTilde;': '\u2242\u0338',
1304 'NotExists;': '\u2204',
1305 'NotGreater;': '\u226f',
1306 'NotGreaterEqual;': '\u2271',
1307 'NotGreaterFullEqual;': '\u2267\u0338',
1308 'NotGreaterGreater;': '\u226b\u0338',
1309 'NotGreaterLess;': '\u2279',
1310 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1311 'NotGreaterTilde;': '\u2275',
1312 'NotHumpDownHump;': '\u224e\u0338',
1313 'NotHumpEqual;': '\u224f\u0338',
1315 'notindot;': '\u22f5\u0338',
1316 'notinE;': '\u22f9\u0338',
1317 'notinva;': '\u2209',
1318 'notinvb;': '\u22f7',
1319 'notinvc;': '\u22f6',
1320 'NotLeftTriangle;': '\u22ea',
1321 'NotLeftTriangleBar;': '\u29cf\u0338',
1322 'NotLeftTriangleEqual;': '\u22ec',
1323 'NotLess;': '\u226e',
1324 'NotLessEqual;': '\u2270',
1325 'NotLessGreater;': '\u2278',
1326 'NotLessLess;': '\u226a\u0338',
1327 'NotLessSlantEqual;': '\u2a7d\u0338',
1328 'NotLessTilde;': '\u2274',
1329 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1330 'NotNestedLessLess;': '\u2aa1\u0338',
1332 'notniva;': '\u220c',
1333 'notnivb;': '\u22fe',
1334 'notnivc;': '\u22fd',
1335 'NotPrecedes;': '\u2280',
1336 'NotPrecedesEqual;': '\u2aaf\u0338',
1337 'NotPrecedesSlantEqual;': '\u22e0',
1338 'NotReverseElement;': '\u220c',
1339 'NotRightTriangle;': '\u22eb',
1340 'NotRightTriangleBar;': '\u29d0\u0338',
1341 'NotRightTriangleEqual;': '\u22ed',
1342 'NotSquareSubset;': '\u228f\u0338',
1343 'NotSquareSubsetEqual;': '\u22e2',
1344 'NotSquareSuperset;': '\u2290\u0338',
1345 'NotSquareSupersetEqual;': '\u22e3',
1346 'NotSubset;': '\u2282\u20d2',
1347 'NotSubsetEqual;': '\u2288',
1348 'NotSucceeds;': '\u2281',
1349 'NotSucceedsEqual;': '\u2ab0\u0338',
1350 'NotSucceedsSlantEqual;': '\u22e1',
1351 'NotSucceedsTilde;': '\u227f\u0338',
1352 'NotSuperset;': '\u2283\u20d2',
1353 'NotSupersetEqual;': '\u2289',
1354 'NotTilde;': '\u2241',
1355 'NotTildeEqual;': '\u2244',
1356 'NotTildeFullEqual;': '\u2247',
1357 'NotTildeTilde;': '\u2249',
1358 'NotVerticalBar;': '\u2224',
1360 'nparallel;': '\u2226',
1361 'nparsl;': '\u2afd\u20e5',
1362 'npart;': '\u2202\u0338',
1363 'npolint;': '\u2a14',
1365 'nprcue;': '\u22e0',
1366 'npre;': '\u2aaf\u0338',
1368 'npreceq;': '\u2aaf\u0338',
1371 'nrarrc;': '\u2933\u0338',
1372 'nrarrw;': '\u219d\u0338',
1373 'nRightarrow;': '\u21cf',
1374 'nrightarrow;': '\u219b',
1376 'nrtrie;': '\u22ed',
1378 'nsccue;': '\u22e1',
1379 'nsce;': '\u2ab0\u0338',
1380 'Nscr;': '\U0001d4a9',
1381 'nscr;': '\U0001d4c3',
1382 'nshortmid;': '\u2224',
1383 'nshortparallel;': '\u2226',
1386 'nsimeq;': '\u2244',
1389 'nsqsube;': '\u22e2',
1390 'nsqsupe;': '\u22e3',
1392 'nsubE;': '\u2ac5\u0338',
1394 'nsubset;': '\u2282\u20d2',
1395 'nsubseteq;': '\u2288',
1396 'nsubseteqq;': '\u2ac5\u0338',
1398 'nsucceq;': '\u2ab0\u0338',
1400 'nsupE;': '\u2ac6\u0338',
1402 'nsupset;': '\u2283\u20d2',
1403 'nsupseteq;': '\u2289',
1404 'nsupseteqq;': '\u2ac6\u0338',
1411 'ntriangleleft;': '\u22ea',
1412 'ntrianglelefteq;': '\u22ec',
1413 'ntriangleright;': '\u22eb',
1414 'ntrianglerighteq;': '\u22ed',
1418 'numero;': '\u2116',
1420 'nvap;': '\u224d\u20d2',
1421 'nVDash;': '\u22af',
1422 'nVdash;': '\u22ae',
1423 'nvDash;': '\u22ad',
1424 'nvdash;': '\u22ac',
1425 'nvge;': '\u2265\u20d2',
1427 'nvHarr;': '\u2904',
1428 'nvinfin;': '\u29de',
1429 'nvlArr;': '\u2902',
1430 'nvle;': '\u2264\u20d2',
1432 'nvltrie;': '\u22b4\u20d2',
1433 'nvrArr;': '\u2903',
1434 'nvrtrie;': '\u22b5\u20d2',
1435 'nvsim;': '\u223c\u20d2',
1436 'nwarhk;': '\u2923',
1439 'nwarrow;': '\u2196',
1440 'nwnear;': '\u2927',
1454 'Odblac;': '\u0150',
1455 'odblac;': '\u0151',
1458 'odsold;': '\u29bc',
1462 'Ofr;': '\U0001d512',
1463 'ofr;': '\U0001d52c',
1475 'olcross;': '\u29bb',
1482 'Omicron;': '\u039f',
1483 'omicron;': '\u03bf',
1485 'ominus;': '\u2296',
1486 'Oopf;': '\U0001d546',
1487 'oopf;': '\U0001d560',
1489 'OpenCurlyDoubleQuote;': '\u201c',
1490 'OpenCurlyQuote;': '\u2018',
1498 'orderof;': '\u2134',
1503 'origof;': '\u22b6',
1505 'orslope;': '\u2a57',
1508 'Oscr;': '\U0001d4aa',
1519 'Otimes;': '\u2a37',
1520 'otimes;': '\u2297',
1521 'otimesas;': '\u2a36',
1527 'OverBar;': '\u203e',
1528 'OverBrace;': '\u23de',
1529 'OverBracket;': '\u23b4',
1530 'OverParenthesis;': '\u23dc',
1534 'parallel;': '\u2225',
1535 'parsim;': '\u2af3',
1538 'PartialD;': '\u2202',
1543 'permil;': '\u2030',
1545 'pertenk;': '\u2031',
1546 'Pfr;': '\U0001d513',
1547 'pfr;': '\U0001d52d',
1551 'phmmat;': '\u2133',
1555 'pitchfork;': '\u22d4',
1557 'planck;': '\u210f',
1558 'planckh;': '\u210e',
1559 'plankv;': '\u210f',
1561 'plusacir;': '\u2a23',
1563 'pluscir;': '\u2a22',
1564 'plusdo;': '\u2214',
1565 'plusdu;': '\u2a25',
1567 'PlusMinus;': '\xb1',
1570 'plussim;': '\u2a26',
1571 'plustwo;': '\u2a27',
1573 'Poincareplane;': '\u210c',
1574 'pointint;': '\u2a15',
1576 'popf;': '\U0001d561',
1586 'precapprox;': '\u2ab7',
1587 'preccurlyeq;': '\u227c',
1588 'Precedes;': '\u227a',
1589 'PrecedesEqual;': '\u2aaf',
1590 'PrecedesSlantEqual;': '\u227c',
1591 'PrecedesTilde;': '\u227e',
1592 'preceq;': '\u2aaf',
1593 'precnapprox;': '\u2ab9',
1594 'precneqq;': '\u2ab5',
1595 'precnsim;': '\u22e8',
1596 'precsim;': '\u227e',
1599 'primes;': '\u2119',
1602 'prnsim;': '\u22e8',
1604 'Product;': '\u220f',
1605 'profalar;': '\u232e',
1606 'profline;': '\u2312',
1607 'profsurf;': '\u2313',
1609 'Proportion;': '\u2237',
1610 'Proportional;': '\u221d',
1611 'propto;': '\u221d',
1613 'prurel;': '\u22b0',
1614 'Pscr;': '\U0001d4ab',
1615 'pscr;': '\U0001d4c5',
1618 'puncsp;': '\u2008',
1619 'Qfr;': '\U0001d514',
1620 'qfr;': '\U0001d52e',
1623 'qopf;': '\U0001d562',
1624 'qprime;': '\u2057',
1625 'Qscr;': '\U0001d4ac',
1626 'qscr;': '\U0001d4c6',
1627 'quaternions;': '\u210d',
1628 'quatint;': '\u2a16',
1630 'questeq;': '\u225f',
1636 'race;': '\u223d\u0331',
1637 'Racute;': '\u0154',
1638 'racute;': '\u0155',
1640 'raemptyv;': '\u29b3',
1645 'rangle;': '\u27e9',
1651 'rarrap;': '\u2975',
1653 'rarrbfs;': '\u2920',
1655 'rarrfs;': '\u291e',
1656 'rarrhk;': '\u21aa',
1657 'rarrlp;': '\u21ac',
1658 'rarrpl;': '\u2945',
1659 'rarrsim;': '\u2974',
1660 'Rarrtl;': '\u2916',
1661 'rarrtl;': '\u21a3',
1663 'rAtail;': '\u291c',
1664 'ratail;': '\u291a',
1666 'rationals;': '\u211a',
1674 'rbrksld;': '\u298e',
1675 'rbrkslu;': '\u2990',
1676 'Rcaron;': '\u0158',
1677 'rcaron;': '\u0159',
1678 'Rcedil;': '\u0156',
1679 'rcedil;': '\u0157',
1685 'rdldhar;': '\u2969',
1687 'rdquor;': '\u201d',
1691 'realine;': '\u211b',
1692 'realpart;': '\u211c',
1699 'ReverseElement;': '\u220b',
1700 'ReverseEquilibrium;': '\u21cb',
1701 'ReverseUpEquilibrium;': '\u296f',
1702 'rfisht;': '\u297d',
1703 'rfloor;': '\u230b',
1705 'rfr;': '\U0001d52f',
1709 'rharul;': '\u296c',
1713 'RightAngleBracket;': '\u27e9',
1714 'RightArrow;': '\u2192',
1715 'Rightarrow;': '\u21d2',
1716 'rightarrow;': '\u2192',
1717 'RightArrowBar;': '\u21e5',
1718 'RightArrowLeftArrow;': '\u21c4',
1719 'rightarrowtail;': '\u21a3',
1720 'RightCeiling;': '\u2309',
1721 'RightDoubleBracket;': '\u27e7',
1722 'RightDownTeeVector;': '\u295d',
1723 'RightDownVector;': '\u21c2',
1724 'RightDownVectorBar;': '\u2955',
1725 'RightFloor;': '\u230b',
1726 'rightharpoondown;': '\u21c1',
1727 'rightharpoonup;': '\u21c0',
1728 'rightleftarrows;': '\u21c4',
1729 'rightleftharpoons;': '\u21cc',
1730 'rightrightarrows;': '\u21c9',
1731 'rightsquigarrow;': '\u219d',
1732 'RightTee;': '\u22a2',
1733 'RightTeeArrow;': '\u21a6',
1734 'RightTeeVector;': '\u295b',
1735 'rightthreetimes;': '\u22cc',
1736 'RightTriangle;': '\u22b3',
1737 'RightTriangleBar;': '\u29d0',
1738 'RightTriangleEqual;': '\u22b5',
1739 'RightUpDownVector;': '\u294f',
1740 'RightUpTeeVector;': '\u295c',
1741 'RightUpVector;': '\u21be',
1742 'RightUpVectorBar;': '\u2954',
1743 'RightVector;': '\u21c0',
1744 'RightVectorBar;': '\u2953',
1746 'risingdotseq;': '\u2253',
1750 'rmoust;': '\u23b1',
1751 'rmoustache;': '\u23b1',
1758 'ropf;': '\U0001d563',
1759 'roplus;': '\u2a2e',
1760 'rotimes;': '\u2a35',
1761 'RoundImplies;': '\u2970',
1763 'rpargt;': '\u2994',
1764 'rppolint;': '\u2a12',
1766 'Rrightarrow;': '\u21db',
1767 'rsaquo;': '\u203a',
1769 'rscr;': '\U0001d4c7',
1774 'rsquor;': '\u2019',
1775 'rthree;': '\u22cc',
1776 'rtimes;': '\u22ca',
1780 'rtriltri;': '\u29ce',
1781 'RuleDelayed;': '\u29f4',
1782 'ruluhar;': '\u2968',
1784 'Sacute;': '\u015a',
1785 'sacute;': '\u015b',
1790 'Scaron;': '\u0160',
1791 'scaron;': '\u0161',
1795 'Scedil;': '\u015e',
1796 'scedil;': '\u015f',
1801 'scnsim;': '\u22e9',
1802 'scpolint;': '\u2a13',
1809 'searhk;': '\u2925',
1812 'searrow;': '\u2198',
1816 'seswar;': '\u2929',
1817 'setminus;': '\u2216',
1820 'Sfr;': '\U0001d516',
1821 'sfr;': '\U0001d530',
1822 'sfrown;': '\u2322',
1824 'SHCHcy;': '\u0429',
1825 'shchcy;': '\u0449',
1828 'ShortDownArrow;': '\u2193',
1829 'ShortLeftArrow;': '\u2190',
1830 'shortmid;': '\u2223',
1831 'shortparallel;': '\u2225',
1832 'ShortRightArrow;': '\u2192',
1833 'ShortUpArrow;': '\u2191',
1838 'sigmaf;': '\u03c2',
1839 'sigmav;': '\u03c2',
1841 'simdot;': '\u2a6a',
1849 'simplus;': '\u2a24',
1850 'simrarr;': '\u2972',
1852 'SmallCircle;': '\u2218',
1853 'smallsetminus;': '\u2216',
1854 'smashp;': '\u2a33',
1855 'smeparsl;': '\u29e4',
1860 'smtes;': '\u2aac\ufe00',
1861 'SOFTcy;': '\u042c',
1862 'softcy;': '\u044c',
1865 'solbar;': '\u233f',
1866 'Sopf;': '\U0001d54a',
1867 'sopf;': '\U0001d564',
1868 'spades;': '\u2660',
1869 'spadesuit;': '\u2660',
1872 'sqcaps;': '\u2293\ufe00',
1874 'sqcups;': '\u2294\ufe00',
1877 'sqsube;': '\u2291',
1878 'sqsubset;': '\u228f',
1879 'sqsubseteq;': '\u2291',
1881 'sqsupe;': '\u2292',
1882 'sqsupset;': '\u2290',
1883 'sqsupseteq;': '\u2292',
1885 'Square;': '\u25a1',
1886 'square;': '\u25a1',
1887 'SquareIntersection;': '\u2293',
1888 'SquareSubset;': '\u228f',
1889 'SquareSubsetEqual;': '\u2291',
1890 'SquareSuperset;': '\u2290',
1891 'SquareSupersetEqual;': '\u2292',
1892 'SquareUnion;': '\u2294',
1893 'squarf;': '\u25aa',
1896 'Sscr;': '\U0001d4ae',
1897 'sscr;': '\U0001d4c8',
1898 'ssetmn;': '\u2216',
1899 'ssmile;': '\u2323',
1900 'sstarf;': '\u22c6',
1904 'straightepsilon;': '\u03f5',
1905 'straightphi;': '\u03d5',
1909 'subdot;': '\u2abd',
1912 'subedot;': '\u2ac3',
1913 'submult;': '\u2ac1',
1916 'subplus;': '\u2abf',
1917 'subrarr;': '\u2979',
1918 'Subset;': '\u22d0',
1919 'subset;': '\u2282',
1920 'subseteq;': '\u2286',
1921 'subseteqq;': '\u2ac5',
1922 'SubsetEqual;': '\u2286',
1923 'subsetneq;': '\u228a',
1924 'subsetneqq;': '\u2acb',
1925 'subsim;': '\u2ac7',
1926 'subsub;': '\u2ad5',
1927 'subsup;': '\u2ad3',
1929 'succapprox;': '\u2ab8',
1930 'succcurlyeq;': '\u227d',
1931 'Succeeds;': '\u227b',
1932 'SucceedsEqual;': '\u2ab0',
1933 'SucceedsSlantEqual;': '\u227d',
1934 'SucceedsTilde;': '\u227f',
1935 'succeq;': '\u2ab0',
1936 'succnapprox;': '\u2aba',
1937 'succneqq;': '\u2ab6',
1938 'succnsim;': '\u22e9',
1939 'succsim;': '\u227f',
1940 'SuchThat;': '\u220b',
1952 'supdot;': '\u2abe',
1953 'supdsub;': '\u2ad8',
1956 'supedot;': '\u2ac4',
1957 'Superset;': '\u2283',
1958 'SupersetEqual;': '\u2287',
1959 'suphsol;': '\u27c9',
1960 'suphsub;': '\u2ad7',
1961 'suplarr;': '\u297b',
1962 'supmult;': '\u2ac2',
1965 'supplus;': '\u2ac0',
1966 'Supset;': '\u22d1',
1967 'supset;': '\u2283',
1968 'supseteq;': '\u2287',
1969 'supseteqq;': '\u2ac6',
1970 'supsetneq;': '\u228b',
1971 'supsetneqq;': '\u2acc',
1972 'supsim;': '\u2ac8',
1973 'supsub;': '\u2ad4',
1974 'supsup;': '\u2ad6',
1975 'swarhk;': '\u2926',
1978 'swarrow;': '\u2199',
1979 'swnwar;': '\u292a',
1983 'target;': '\u2316',
1987 'Tcaron;': '\u0164',
1988 'tcaron;': '\u0165',
1989 'Tcedil;': '\u0162',
1990 'tcedil;': '\u0163',
1994 'telrec;': '\u2315',
1995 'Tfr;': '\U0001d517',
1996 'tfr;': '\U0001d531',
1997 'there4;': '\u2234',
1998 'Therefore;': '\u2234',
1999 'therefore;': '\u2234',
2002 'thetasym;': '\u03d1',
2003 'thetav;': '\u03d1',
2004 'thickapprox;': '\u2248',
2005 'thicksim;': '\u223c',
2006 'ThickSpace;': '\u205f\u200a',
2007 'thinsp;': '\u2009',
2008 'ThinSpace;': '\u2009',
2010 'thksim;': '\u223c',
2017 'TildeEqual;': '\u2243',
2018 'TildeFullEqual;': '\u2245',
2019 'TildeTilde;': '\u2248',
2022 'timesb;': '\u22a0',
2023 'timesbar;': '\u2a31',
2024 'timesd;': '\u2a30',
2028 'topbot;': '\u2336',
2029 'topcir;': '\u2af1',
2030 'Topf;': '\U0001d54b',
2031 'topf;': '\U0001d565',
2032 'topfork;': '\u2ada',
2034 'tprime;': '\u2034',
2037 'triangle;': '\u25b5',
2038 'triangledown;': '\u25bf',
2039 'triangleleft;': '\u25c3',
2040 'trianglelefteq;': '\u22b4',
2041 'triangleq;': '\u225c',
2042 'triangleright;': '\u25b9',
2043 'trianglerighteq;': '\u22b5',
2044 'tridot;': '\u25ec',
2046 'triminus;': '\u2a3a',
2047 'TripleDot;': '\u20db',
2048 'triplus;': '\u2a39',
2050 'tritime;': '\u2a3b',
2051 'trpezium;': '\u23e2',
2052 'Tscr;': '\U0001d4af',
2053 'tscr;': '\U0001d4c9',
2058 'Tstrok;': '\u0166',
2059 'tstrok;': '\u0167',
2061 'twoheadleftarrow;': '\u219e',
2062 'twoheadrightarrow;': '\u21a0',
2070 'Uarrocir;': '\u2949',
2073 'Ubreve;': '\u016c',
2074 'ubreve;': '\u016d',
2082 'Udblac;': '\u0170',
2083 'udblac;': '\u0171',
2085 'ufisht;': '\u297e',
2086 'Ufr;': '\U0001d518',
2087 'ufr;': '\U0001d532',
2096 'ulcorn;': '\u231c',
2097 'ulcorner;': '\u231c',
2098 'ulcrop;': '\u230f',
2105 'UnderBrace;': '\u23df',
2106 'UnderBracket;': '\u23b5',
2107 'UnderParenthesis;': '\u23dd',
2109 'UnionPlus;': '\u228e',
2112 'Uopf;': '\U0001d54c',
2113 'uopf;': '\U0001d566',
2114 'UpArrow;': '\u2191',
2115 'Uparrow;': '\u21d1',
2116 'uparrow;': '\u2191',
2117 'UpArrowBar;': '\u2912',
2118 'UpArrowDownArrow;': '\u21c5',
2119 'UpDownArrow;': '\u2195',
2120 'Updownarrow;': '\u21d5',
2121 'updownarrow;': '\u2195',
2122 'UpEquilibrium;': '\u296e',
2123 'upharpoonleft;': '\u21bf',
2124 'upharpoonright;': '\u21be',
2126 'UpperLeftArrow;': '\u2196',
2127 'UpperRightArrow;': '\u2197',
2131 'Upsilon;': '\u03a5',
2132 'upsilon;': '\u03c5',
2134 'UpTeeArrow;': '\u21a5',
2135 'upuparrows;': '\u21c8',
2136 'urcorn;': '\u231d',
2137 'urcorner;': '\u231d',
2138 'urcrop;': '\u230e',
2142 'Uscr;': '\U0001d4b0',
2143 'uscr;': '\U0001d4ca',
2145 'Utilde;': '\u0168',
2146 'utilde;': '\u0169',
2154 'uwangle;': '\u29a7',
2155 'vangrt;': '\u299c',
2156 'varepsilon;': '\u03f5',
2157 'varkappa;': '\u03f0',
2158 'varnothing;': '\u2205',
2159 'varphi;': '\u03d5',
2161 'varpropto;': '\u221d',
2164 'varrho;': '\u03f1',
2165 'varsigma;': '\u03c2',
2166 'varsubsetneq;': '\u228a\ufe00',
2167 'varsubsetneqq;': '\u2acb\ufe00',
2168 'varsupsetneq;': '\u228b\ufe00',
2169 'varsupsetneqq;': '\u2acc\ufe00',
2170 'vartheta;': '\u03d1',
2171 'vartriangleleft;': '\u22b2',
2172 'vartriangleright;': '\u22b3',
2182 'Vdashl;': '\u2ae6',
2185 'veebar;': '\u22bb',
2187 'vellip;': '\u22ee',
2188 'Verbar;': '\u2016',
2192 'VerticalBar;': '\u2223',
2193 'VerticalLine;': '|',
2194 'VerticalSeparator;': '\u2758',
2195 'VerticalTilde;': '\u2240',
2196 'VeryThinSpace;': '\u200a',
2197 'Vfr;': '\U0001d519',
2198 'vfr;': '\U0001d533',
2200 'vnsub;': '\u2282\u20d2',
2201 'vnsup;': '\u2283\u20d2',
2202 'Vopf;': '\U0001d54d',
2203 'vopf;': '\U0001d567',
2206 'Vscr;': '\U0001d4b1',
2207 'vscr;': '\U0001d4cb',
2208 'vsubnE;': '\u2acb\ufe00',
2209 'vsubne;': '\u228a\ufe00',
2210 'vsupnE;': '\u2acc\ufe00',
2211 'vsupne;': '\u228b\ufe00',
2212 'Vvdash;': '\u22aa',
2213 'vzigzag;': '\u299a',
2216 'wedbar;': '\u2a5f',
2219 'wedgeq;': '\u2259',
2220 'weierp;': '\u2118',
2221 'Wfr;': '\U0001d51a',
2222 'wfr;': '\U0001d534',
2223 'Wopf;': '\U0001d54e',
2224 'wopf;': '\U0001d568',
2227 'wreath;': '\u2240',
2228 'Wscr;': '\U0001d4b2',
2229 'wscr;': '\U0001d4cc',
2234 'Xfr;': '\U0001d51b',
2235 'xfr;': '\U0001d535',
2245 'Xopf;': '\U0001d54f',
2246 'xopf;': '\U0001d569',
2247 'xoplus;': '\u2a01',
2248 'xotime;': '\u2a02',
2251 'Xscr;': '\U0001d4b3',
2252 'xscr;': '\U0001d4cd',
2253 'xsqcup;': '\u2a06',
2254 'xuplus;': '\u2a04',
2257 'xwedge;': '\u22c0',
2270 'Yfr;': '\U0001d51c',
2271 'yfr;': '\U0001d536',
2274 'Yopf;': '\U0001d550',
2275 'yopf;': '\U0001d56a',
2276 'Yscr;': '\U0001d4b4',
2277 'yscr;': '\U0001d4ce',
2283 'Zacute;': '\u0179',
2284 'zacute;': '\u017a',
2285 'Zcaron;': '\u017d',
2286 'zcaron;': '\u017e',
2291 'zeetrf;': '\u2128',
2292 'ZeroWidthSpace;': '\u200b',
2296 'zfr;': '\U0001d537',
2299 'zigrarr;': '\u21dd',
2301 'zopf;': '\U0001d56b',
2302 'Zscr;': '\U0001d4b5',
2303 'zscr;': '\U0001d4cf',
2309 import http.client as compat_http_client
2310 except ImportError: # Python 2
2311 import httplib as compat_http_client
2314 from urllib.error import HTTPError as compat_HTTPError
2315 except ImportError: # Python 2
2316 from urllib2 import HTTPError as compat_HTTPError
2319 from urllib.request import urlretrieve as compat_urlretrieve
2320 except ImportError: # Python 2
2321 from urllib import urlretrieve as compat_urlretrieve
2324 from html.parser import HTMLParser as compat_HTMLParser
2325 except ImportError: # Python 2
2326 from HTMLParser import HTMLParser as compat_HTMLParser
2329 from HTMLParser import HTMLParseError as compat_HTMLParseError
2330 except ImportError: # Python <3.4
2332 from html.parser import HTMLParseError as compat_HTMLParseError
2333 except ImportError: # Python >3.4
2335 # HTMLParseError has been deprecated in Python 3.3 and removed in
2336 # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
2337 # and uniform cross-version exceptiong handling
2338 class compat_HTMLParseError(Exception):
2342 from subprocess import DEVNULL
2343 compat_subprocess_get_DEVNULL = lambda: DEVNULL
2345 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
2348 import http.server as compat_http_server
2350 import BaseHTTPServer as compat_http_server
2353 compat_str = unicode # Python 2
2358 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
2359 from urllib.parse import unquote as compat_urllib_parse_unquote
2360 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
2361 except ImportError: # Python 2
2362 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
2363 else re.compile(r'([\x00-\x7f]+)'))
2365 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2366 # implementations from cpython 3.4.3's stdlib. Python 2's version
2367 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
2369 def compat_urllib_parse_unquote_to_bytes(string):
2370 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2371 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2372 # unescaped non-ASCII characters, which URIs should not.
2374 # Is it a string-like object?
2377 if isinstance(string, compat_str):
2378 string = string.encode('utf-8')
2379 bits = string.split(b'%')
2384 for item in bits[1:]:
2386 append(compat_urllib_parse._hextochr[item[:2]])
2391 return b''.join(res)
2393 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
2394 """Replace %xx escapes by their single-character equivalent. The optional
2395 encoding and errors parameters specify how to decode percent-encoded
2396 sequences into Unicode characters, as accepted by the bytes.decode()
2398 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2399 sequences are replaced by a placeholder character.
2401 unquote('abc%20def') -> 'abc def'.
2403 if '%' not in string:
2406 if encoding is None:
2410 bits = _asciire.split(string)
2413 for i in range(1, len(bits), 2):
2414 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
2418 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
2419 """Like unquote(), but also replace plus signs by spaces, as required for
2420 unquoting HTML form values.
2422 unquote_plus('%7e/abc+def') -> '~/abc def'
2424 string = string.replace('+', ' ')
2425 return compat_urllib_parse_unquote(string, encoding, errors)
2428 from urllib.parse import urlencode as compat_urllib_parse_urlencode
2429 except ImportError: # Python 2
2430 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2431 # Possible solutions are to either port it from python 3 with all
2432 # the friends or manually ensure input query contains only byte strings.
2433 # We will stick with latter thus recursively encoding the whole query.
2434 def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
2436 if isinstance(e, dict):
2438 elif isinstance(e, (list, tuple,)):
2439 list_e = encode_list(e)
2440 e = tuple(list_e) if isinstance(e, tuple) else list_e
2441 elif isinstance(e, compat_str):
2442 e = e.encode(encoding)
2446 return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
2449 return [encode_elem(e) for e in l]
2451 return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
2454 from urllib.request import DataHandler as compat_urllib_request_DataHandler
2455 except ImportError: # Python < 3.4
2456 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2457 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
2458 def data_open(self, req):
2459 # data URLs as specified in RFC 2397.
2461 # ignores POSTed data
2464 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2465 # mediatype := [ type "/" subtype ] *( ";" parameter )
2467 # parameter := attribute "=" value
2468 url = req.get_full_url()
2470 scheme, data = url.split(':', 1)
2471 mediatype, data = data.split(',', 1)
2473 # even base64 encoded data URLs might be quoted so unquote in any case:
2474 data = compat_urllib_parse_unquote_to_bytes(data)
2475 if mediatype.endswith(';base64'):
2476 data = binascii.a2b_base64(data)
2477 mediatype = mediatype[:-7]
2480 mediatype = 'text/plain;charset=US-ASCII'
2482 headers = email.message_from_string(
2483 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
2485 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
2488 compat_basestring = basestring # Python 2
2490 compat_basestring = str
2493 compat_chr = unichr # Python 2
2498 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
2499 except ImportError: # Python 2.6
2500 from xml.parsers.expat import ExpatError as compat_xml_parse_error
2503 etree = xml.etree.ElementTree
2506 class _TreeBuilder(etree.TreeBuilder):
2507 def doctype(self, name, pubid, system):
2511 if sys.version_info[0] >= 3:
2512 def compat_etree_fromstring(text):
2513 return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
2515 # python 2.x tries to encode unicode strings with ascii (see the
2516 # XMLParser._fixtext method)
2518 _etree_iter = etree.Element.iter
2519 except AttributeError: # Python <=2.6
2520 def _etree_iter(root):
2521 for el in root.findall('*'):
2523 for sub in _etree_iter(el):
2526 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2528 def _XML(text, parser=None):
2530 parser = etree.XMLParser(target=_TreeBuilder())
2532 return parser.close()
2534 def _element_factory(*args, **kwargs):
2535 el = etree.Element(*args, **kwargs)
2536 for k, v in el.items():
2537 if isinstance(v, bytes):
2538 el.set(k, v.decode('utf-8'))
2541 def compat_etree_fromstring(text):
2542 doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
2543 for el in _etree_iter(doc):
2544 if el.text is not None and isinstance(el.text, bytes):
2545 el.text = el.text.decode('utf-8')
2548 if hasattr(etree, 'register_namespace'):
2549 compat_etree_register_namespace = etree.register_namespace
2551 def compat_etree_register_namespace(prefix, uri):
2552 """Register a namespace prefix.
2553 The registry is global, and any existing mapping for either the
2554 given prefix or the namespace URI will be removed.
2555 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2556 attributes in this namespace will be serialized with prefix if possible.
2557 ValueError is raised if prefix is reserved or is invalid.
2559 if re.match(r"ns\d+$", prefix):
2560 raise ValueError("Prefix format reserved for internal use")
2561 for k, v in list(etree._namespace_map.items()):
2562 if k == uri or v == prefix:
2563 del etree._namespace_map[k]
2564 etree._namespace_map[uri] = prefix
2566 if sys.version_info < (2, 7):
2567 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2568 # .//node does not match if a node is a direct child of . !
2569 def compat_xpath(xpath):
2570 if isinstance(xpath, compat_str):
2571 xpath = xpath.encode('ascii')
2574 compat_xpath = lambda xpath: xpath
2577 from urllib.parse import parse_qs as compat_parse_qs
2578 except ImportError: # Python 2
2579 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2580 # Python 2's version is apparently totally broken
2582 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
2583 encoding='utf-8', errors='replace'):
2584 qs, _coerce_result = qs, compat_str
2585 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
2587 for name_value in pairs:
2588 if not name_value and not strict_parsing:
2590 nv = name_value.split('=', 1)
2593 raise ValueError('bad query field: %r' % (name_value,))
2594 # Handle case of a control-name with no equal sign
2595 if keep_blank_values:
2599 if len(nv[1]) or keep_blank_values:
2600 name = nv[0].replace('+', ' ')
2601 name = compat_urllib_parse_unquote(
2602 name, encoding=encoding, errors=errors)
2603 name = _coerce_result(name)
2604 value = nv[1].replace('+', ' ')
2605 value = compat_urllib_parse_unquote(
2606 value, encoding=encoding, errors=errors)
2607 value = _coerce_result(value)
2608 r.append((name, value))
2611 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
2612 encoding='utf-8', errors='replace'):
2614 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
2615 encoding=encoding, errors=errors)
2616 for name, value in pairs:
2617 if name in parsed_result:
2618 parsed_result[name].append(value)
2620 parsed_result[name] = [value]
2621 return parsed_result
2624 compat_os_name = os._name if os.name == 'java' else os.name
2627 if compat_os_name == 'nt':
2628 def compat_shlex_quote(s):
2629 return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
2632 from shlex import quote as compat_shlex_quote
2633 except ImportError: # Python < 3.3
2634 def compat_shlex_quote(s):
2635 if re.match(r'^[-_\w./]+$', s):
2638 return "'" + s.replace("'", "'\"'\"'") + "'"
2642 args = shlex.split('ä¸æ–‡')
2643 assert (isinstance(args, list) and
2644 isinstance(args[0], compat_str) and
2645 args[0] == 'ä¸æ–‡')
2646 compat_shlex_split = shlex.split
2647 except (AssertionError, UnicodeEncodeError):
2648 # Working around shlex issue with unicode strings on some python 2
2649 # versions (see http://bugs.python.org/issue1548891)
2650 def compat_shlex_split(s, comments=False, posix=True):
2651 if isinstance(s, compat_str):
2652 s = s.encode('utf-8')
2653 return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
2663 if sys.version_info >= (3, 0):
2664 compat_getenv = os.getenv
2665 compat_expanduser = os.path.expanduser
2667 def compat_setenv(key, value, env=os.environ):
2670 # Environment variables should be decoded with filesystem encoding.
2671 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2673 def compat_getenv(key, default=None):
2674 from .utils import get_filesystem_encoding
2675 env = os.getenv(key, default)
2677 env = env.decode(get_filesystem_encoding())
2680 def compat_setenv(key, value, env=os.environ):
2682 from .utils import get_filesystem_encoding
2683 return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
2684 env[encode(key)] = encode(value)
2686 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2687 # environment variables with filesystem encoding. We will work around this by
2688 # providing adjusted implementations.
2689 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2690 # for different platforms with correct environment variables decoding.
2692 if compat_os_name == 'posix':
2693 def compat_expanduser(path):
2694 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2696 if not path.startswith('~'):
2698 i = path.find('/', 1)
2702 if 'HOME' not in os.environ:
2704 userhome = pwd.getpwuid(os.getuid()).pw_dir
2706 userhome = compat_getenv('HOME')
2710 pwent = pwd.getpwnam(path[1:i])
2713 userhome = pwent.pw_dir
2714 userhome = userhome.rstrip('/')
2715 return (userhome + path[i:]) or '/'
2716 elif compat_os_name in ('nt', 'ce'):
2717 def compat_expanduser(path):
2718 """Expand ~ and ~user constructs.
2720 If user or $HOME is unknown, do nothing."""
2724 while i < n and path[i] not in '/\\':
2727 if 'HOME' in os.environ:
2728 userhome = compat_getenv('HOME')
2729 elif 'USERPROFILE' in os.environ:
2730 userhome = compat_getenv('USERPROFILE')
2731 elif 'HOMEPATH' not in os.environ:
2735 drive = compat_getenv('HOMEDRIVE')
2738 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
2741 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
2743 return userhome + path[i:]
2745 compat_expanduser = os.path.expanduser
2748 if sys.version_info < (3, 0):
2749 def compat_print(s):
2750 from .utils import preferredencoding
2751 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
2753 def compat_print(s):
2754 assert isinstance(s, compat_str)
2758 if sys.version_info < (3, 0) and sys.platform == 'win32':
2759 def compat_getpass(prompt, *args, **kwargs):
2760 if isinstance(prompt, compat_str):
2761 from .utils import preferredencoding
2762 prompt = prompt.encode(preferredencoding())
2763 return getpass.getpass(prompt, *args, **kwargs)
2765 compat_getpass = getpass.getpass
2768 compat_input = raw_input
2769 except NameError: # Python 3
2770 compat_input = input
2772 # Python < 2.6.5 require kwargs to be bytes
2776 _testfunc(**{'x': 0})
2778 def compat_kwargs(kwargs):
2779 return dict((bytes(k), v) for k, v in kwargs.items())
2781 compat_kwargs = lambda kwargs: kwargs
2785 compat_numeric_types = (int, float, long, complex)
2786 except NameError: # Python 3
2787 compat_numeric_types = (int, float, complex)
2791 compat_integer_types = (int, long)
2792 except NameError: # Python 3
2793 compat_integer_types = (int, )
2796 if sys.version_info < (2, 7):
2797 def compat_socket_create_connection(address, timeout, source_address=None):
2798 host, port = address
2800 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
2801 af, socktype, proto, canonname, sa = res
2804 sock = socket.socket(af, socktype, proto)
2805 sock.settimeout(timeout)
2807 sock.bind(source_address)
2810 except socket.error as _:
2812 if sock is not None:
2817 raise socket.error('getaddrinfo returns an empty list')
2819 compat_socket_create_connection = socket.create_connection
2822 # Fix https://github.com/rg3/youtube-dl/issues/4223
2823 # See http://bugs.python.org/issue9161 for what is broken
2824 def workaround_optparse_bug9161():
2825 op = optparse.OptionParser()
2826 og = optparse.OptionGroup(op, 'foo')
2830 real_add_option = optparse.OptionGroup.add_option
2832 def _compat_add_option(self, *args, **kwargs):
2834 v.encode('ascii', 'replace') if isinstance(v, compat_str)
2836 bargs = [enc(a) for a in args]
2838 (k, enc(v)) for k, v in kwargs.items())
2839 return real_add_option(self, *bargs, **bkwargs)
2840 optparse.OptionGroup.add_option = _compat_add_option
2843 if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
2844 compat_get_terminal_size = shutil.get_terminal_size
2846 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
2848 def compat_get_terminal_size(fallback=(80, 24)):
2849 columns = compat_getenv('COLUMNS')
2851 columns = int(columns)
2854 lines = compat_getenv('LINES')
2860 if columns is None or lines is None or columns <= 0 or lines <= 0:
2862 sp = subprocess.Popen(
2864 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2865 out, err = sp.communicate()
2866 _lines, _columns = map(int, out.split())
2868 _columns, _lines = _terminal_size(*fallback)
2870 if columns is None or columns <= 0:
2872 if lines is None or lines <= 0:
2874 return _terminal_size(columns, lines)
2877 itertools.count(start=0, step=1)
2878 compat_itertools_count = itertools.count
2879 except TypeError: # Python 2.6
2880 def compat_itertools_count(start=0, step=1):
2886 if sys.version_info >= (3, 0):
2887 from tokenize import tokenize as compat_tokenize_tokenize
2889 from tokenize import generate_tokens as compat_tokenize_tokenize
2893 struct.pack('!I', 0)
2895 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2896 # See https://bugs.python.org/issue19099
2897 def compat_struct_pack(spec, *args):
2898 if isinstance(spec, compat_str):
2899 spec = spec.encode('ascii')
2900 return struct.pack(spec, *args)
2902 def compat_struct_unpack(spec, *args):
2903 if isinstance(spec, compat_str):
2904 spec = spec.encode('ascii')
2905 return struct.unpack(spec, *args)
2907 class compat_Struct(struct.Struct):
2908 def __init__(self, fmt):
2909 if isinstance(fmt, compat_str):
2910 fmt = fmt.encode('ascii')
2911 super(compat_Struct, self).__init__(fmt)
2913 compat_struct_pack = struct.pack
2914 compat_struct_unpack = struct.unpack
2915 if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
2916 class compat_Struct(struct.Struct):
2917 def unpack(self, string):
2918 if not isinstance(string, buffer): # noqa: F821
2919 string = buffer(string) # noqa: F821
2920 return super(compat_Struct, self).unpack(string)
2922 compat_Struct = struct.Struct
2926 from future_builtins import zip as compat_zip
2927 except ImportError: # not 2.6+ or is 3.x
2929 from itertools import izip as compat_zip # < 2.5 or 3.x
2934 if sys.version_info < (3, 3):
2935 def compat_b64decode(s, *args, **kwargs):
2936 if isinstance(s, compat_str):
2937 s = s.encode('ascii')
2938 return base64.b64decode(s, *args, **kwargs)
2940 compat_b64decode = base64.b64decode
2943 if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
2944 # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
2945 # names, see the original PyPy issue [1] and the youtube-dl one [2].
2946 # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
2947 # 2. https://github.com/rg3/youtube-dl/pull/4392
2948 def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
2949 real = ctypes.WINFUNCTYPE(*args, **kwargs)
2951 def resf(tpl, *args, **kwargs):
2953 return real((str(funcname), dll), *args, **kwargs)
2957 def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
2958 return ctypes.WINFUNCTYPE(*args, **kwargs)
2962 'compat_HTMLParseError',
2963 'compat_HTMLParser',
2967 'compat_basestring',
2971 'compat_ctypes_WINFUNCTYPE',
2972 'compat_etree_fromstring',
2973 'compat_etree_register_namespace',
2974 'compat_expanduser',
2975 'compat_get_terminal_size',
2978 'compat_html_entities',
2979 'compat_html_entities_html5',
2980 'compat_http_client',
2981 'compat_http_server',
2983 'compat_integer_types',
2984 'compat_itertools_count',
2986 'compat_numeric_types',
2992 'compat_shlex_quote',
2993 'compat_shlex_split',
2994 'compat_socket_create_connection',
2996 'compat_struct_pack',
2997 'compat_struct_unpack',
2998 'compat_subprocess_get_DEVNULL',
2999 'compat_tokenize_tokenize',
3000 'compat_urllib_error',
3001 'compat_urllib_parse',
3002 'compat_urllib_parse_unquote',
3003 'compat_urllib_parse_unquote_plus',
3004 'compat_urllib_parse_unquote_to_bytes',
3005 'compat_urllib_parse_urlencode',
3006 'compat_urllib_parse_urlparse',
3007 'compat_urllib_request',
3008 'compat_urllib_request_DataHandler',
3009 'compat_urllib_response',
3011 'compat_urlretrieve',
3012 'compat_xml_parse_error',
3015 'workaround_optparse_bug9161',