HEX
Server: nginx/1.28.3
System: Linux lightweb-s1 5.15.0-173-generic #183-Ubuntu SMP Fri Mar 6 13:29:34 UTC 2026 x86_64
User: dawonefr-98 (1071)
PHP: 8.3.30
Disabled: NONE
Upload Files
File: //lib/python3/dist-packages/html5lib/__pycache__/_inputstream.cpython-310.pyc
o

V=�^,~�@s�ddlmZmZmZddlmZddlmZmZddl	Z	ddl
Z
ddlmZm
Z
ddlZddlmZmZmZmZddlmZdd	lmZed
d�eD��Zedd�eD��Zed
d�eD��Zeeddg�BZdZejr�eddkrye�d�dks{J�e
�edd�e d�d�Z!ne
�e�Z!hd�Z"e
�d�Z#iZ$Gdd�de%�Z&dd�Z'Gdd�de%�Z(Gdd�de(�Z)Gdd�de*�Z+Gd d!�d!e%�Z,Gd"d#�d#e%�Z-d$d%�Z.dS)&�)�absolute_import�division�unicode_literals)�	text_type)�http_client�urllibN)�BytesIO�StringIO�)�EOF�spaceCharacters�asciiLetters�asciiUppercase)�_ReparseException)�_utilscC�g|]}|�d��qS��ascii��encode��.0�item�r�7/usr/lib/python3/dist-packages/html5lib/_inputstream.py�
<listcomp>�rcCrrrrrrrrrcCrrrrrrrrr�>�<u�[---Ÿ﷐-﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿]����]z"\uD800-\uDFFF"> ���������	�
���
������������	�
���
���z[	-
 -/:-@\[-`{-~]c@sHeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dS)�BufferedStreamz�Buffering for streams that do not have buffering of their own

    The buffer is implemented as a list of chunks on the assumption that
    joining many strings will be slow since it is O(n**2)
    cCs||_g|_ddg|_dS)Nrr)�stream�buffer�position)�selfrBrrr�__init__:szBufferedStream.__init__cCs<d}|jd|jd�D]}|t|�7}q||jd7}|S�Nrr
)rCrD�len)rE�pos�chunkrrr�tell?s
zBufferedStream.tellcCsd||��ksJ�|}d}t|j|�|kr+|t|j|�8}|d7}t|j|�|ks||g|_dSrG)�_bufferedBytesrHrCrD)rErI�offset�irrr�seekFs�zBufferedStream.seekcCsP|js|�|�S|jdt|j�kr#|jdt|jd�kr#|�|�S|�|�S)Nrr
r)rC�_readStreamrDrH�_readFromBuffer�rE�bytesrrr�readOs


zBufferedStream.readcCstdd�|jD��S)NcSsg|]}t|��qSr)rHrrrrrYsz1BufferedStream._bufferedBytes.<locals>.<listcomp>)�sumrC�rErrrrLX�zBufferedStream._bufferedBytescCs<|j�|�}|j�|�|jdd7<t|�|jd<|SrG)rBrTrC�appendrDrH)rErS�datarrrrP[s
zBufferedStream._readStreamcCs�|}g}|jd}|jd}|t|j�krc|dkrc|dksJ�|j|}|t|�|kr6|}|||g|_nt|�|}|t|�g|_|d7}|�||||��||8}d}|t|j�krc|dks|rm|�|�|��d�|�S)Nrr
�)rDrHrCrXrP�join)rErS�remainingBytes�rv�bufferIndex�bufferOffset�bufferedData�bytesToReadrrrrQbs(


�
zBufferedStream._readFromBufferN)�__name__�
__module__�__qualname__�__doc__rFrKrOrTrLrPrQrrrrrA3s		rAcKs�t|tj�st|tjj�rt|jtj�rd}nt|d�r%t|�d�t	�}nt|t	�}|rCdd�|D�}|r;t
d|��t|fi|��St|fi|��S)NFrTrcSsg|]	}|�d�r|�qS)�	_encoding)�endswith)r�xrrrr�sz#HTMLInputStream.<locals>.<listcomp>z3Cannot set an encoding with a unicode input, set %r)
�
isinstancer�HTTPResponser�response�addbase�fp�hasattrrTr�	TypeError�HTMLUnicodeInputStream�HTMLBinaryInputStream)�source�kwargs�	isUnicode�	encodingsrrr�HTMLInputStream}s��

rvc@speZdZdZdZdd�Zdd�Zdd�Zd	d
�Zdd�Z	d
d�Z
ddd�Zdd�Zdd�Z
ddd�Zdd�ZdS)rp��Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    i(cCsZtjsd|_ntd�dkr|j|_n|j|_dg|_td�df|_|�	|�|_
|��dS)�Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        Nu􏿿r
r�utf-8�certain)r�supports_lone_surrogates�reportCharacterErrorsrH�characterErrorsUCS4�characterErrorsUCS2�newLines�lookupEncoding�charEncoding�
openStream�
dataStream�reset)rErrrrrrF�s
zHTMLUnicodeInputStream.__init__cCs.d|_d|_d|_g|_d|_d|_d|_dS)N�r)rJ�	chunkSize�chunkOffset�errors�prevNumLines�prevNumCols�_bufferedCharacterrVrrrr��s
zHTMLUnicodeInputStream.resetcCst|d�r	|}|St|�}|S�zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        rT)rnr	�rErrrBrrrr��s

�z!HTMLUnicodeInputStream.openStreamcCsZ|j}|�dd|�}|j|}|�dd|�}|dkr#|j|}||fS||d}||fS)N�
rrr
)rJ�countr��rfindr�)rErMrJ�nLines�positionLine�lastLinePos�positionColumnrrr�	_position�s

�z HTMLUnicodeInputStream._positioncCs|�|j�\}}|d|fS)z:Returns (line, col) of the current position in the stream.r
)r�r�)rE�line�colrrrrD�szHTMLUnicodeInputStream.positioncCs6|j|jkr|��stS|j}|j|}|d|_|S)zo Read one character from the stream or queue if available. Return
            EOF when EOF is reached.
        r
)r�r��	readChunkrrJ)rEr��charrrrr��s

zHTMLUnicodeInputStream.charNcCs�|dur|j}|�|j�\|_|_d|_d|_d|_|j�|�}|j	r,|j	|}d|_	n|s0dSt
|�dkrWt|d�}|dksLd|krJdkrWnn|d|_	|dd�}|jr_|�|�|�
d	d
�}|�
dd
�}||_t
|�|_dS)
Nr�rFr
r�
��i��z
r��
T)�_defaultChunkSizer�r�r�r�rJr�r�rTr�rH�ordr|�replace)rEr�rY�lastvrrrr��s0
 


z HTMLUnicodeInputStream.readChunkcCs(ttt�|���D]}|j�d�q	dS)N�invalid-codepoint)�rangerH�invalid_unicode_re�findallr�rX)rErY�_rrrr}s�z*HTMLUnicodeInputStream.characterErrorsUCS4cCs�d}t�|�D]Q}|rqt|���}|��}t�|||d��r9t�|||d��}|tvr6|j	�
d�d}q|dkrP|dkrP|t|�dkrP|j	�
d�qd}|j	�
d�qdS)NF�r�Tr�i��r
)r��finditerr��group�startr�isSurrogatePair�surrogatePairToCodepoint�non_bmp_invalid_codepointsr�rXrH)rErY�skip�match�	codepointrI�char_valrrrr~#s"�z*HTMLUnicodeInputStream.characterErrorsUCS2Fc
Cszt||f}Wn4ty<	|D]
}t|�dksJ�qd�dd�|D��}|s-d|}t�d|�}t||f<Ynwg}	|�|j|j�}|durT|j|j	krSn*n|�
�}||j	krl|�|j|j|��||_n|�|j|jd��|��s|nq@d�|�}	|	S)	z� Returns a string of characters from the stream up to but not
        including any character in 'characters' or EOF. 'characters' must be
        a container that supports the 'in' method and iteration over its
        characters.
        T�r�cSsg|]}dt|��qS)z\x%02x)r�)r�crrrrHsz5HTMLUnicodeInputStream.charsUntil.<locals>.<listcomp>z^%sz[%s]+N)
�charsUntilRegEx�KeyErrorr�r[�re�compiler�rJr�r��endrXr�)
rE�
characters�opposite�charsr��regexr]�mr��rrrr�
charsUntil:s:�	�
�
z!HTMLUnicodeInputStream.charsUntilcCsZ|tur)|jdkr||j|_|jd7_dS|jd8_|j|j|ks+J�dSdSrG)rr�rJr�)rEr�rrr�ungetis
�zHTMLUnicodeInputStream.unget�N)F)rbrcrdrer�rFr�r�r�rDr�r�r}r~r�r�rrrrrp�s 
&
/rpc@sReZdZdZ			ddd�Zdd�Zd	d
�Zddd�Zd
d�Zdd�Z	dd�Z
dS)rqrwN�windows-1252TcCsn|�|�|_t�||j�d|_d|_||_||_||_||_	||_
|�|�|_|jddus1J�|�
�dS)rxi�drN)r��	rawStreamrprF�numBytesMeta�numBytesChardet�override_encoding�transport_encoding�same_origin_parent_encoding�likely_encoding�default_encoding�determineEncodingr�r�)rErrr�r�r�r�r��
useChardetrrrrF�szHTMLBinaryInputStream.__init__cCs&|jdj�|jd�|_t�|�dS)Nrr�)r��
codec_info�streamreaderr�r�rpr�rVrrrr��szHTMLBinaryInputStream.resetcCsJt|d�r|}nt|�}z
|�|���W|Sty$t|�}Y|Swr�)rnrrOrK�	ExceptionrAr�rrrr��s
�
�z HTMLBinaryInputStream.openStreamcCs�|��df}|ddur|St|j�df}|ddur|St|j�df}|ddur,|S|��df}|ddur:|St|j�df}|ddurQ|dj�d�sQ|St|j�df}|ddur`|S|r�zddl	m
}Wn	tysYn@wg}|�}|js�|j
�|j�}t|t�s�J�|s�n
|�|�|�|�|jr||��t|jd�}|j
�d�|dur�|dfSt|j�df}|ddur�|Std�dfS)Nrzr�	tentativezutf-16)�UniversalDetector�encodingr�)�	detectBOMr�r�r��detectEncodingMetar��name�
startswithr��chardet.universaldetectorr��ImportError�doner�rTr�rirSrX�feed�close�resultrOr�)rE�chardetr�r��buffers�detectorrCr�rrrr��sV�

�z'HTMLBinaryInputStream.determineEncodingcCs�|jddks	J�t|�}|durdS|jdvr$td�}|dus"J�dS||jdkr5|jddf|_dS|j�d�|df|_|��td|jd|f��)Nr
rz��utf-16be�utf-16leryrzEncoding changed from %s to %s)r�r�r�r�rOr�r)rE�newEncodingrrr�changeEncodings

z$HTMLBinaryInputStream.changeEncodingc
Cs�tjdtjdtjdtjdtjdi}|j�d�}t|t	�sJ�|�
|dd��}d}|s?|�
|�}d}|s?|�
|dd	��}d	}|rK|j�|�t|�S|j�d
�dS)z�Attempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return Noneryr�r�zutf-32lezutf-32be�N�r�r)
�codecs�BOM_UTF8�BOM_UTF16_LE�BOM_UTF16_BE�BOM_UTF32_LE�BOM_UTF32_BEr�rTrirS�getrOr�)rE�bomDict�stringr�rOrrrr�s&�
zHTMLBinaryInputStream.detectBOMcCsV|j�|j�}t|t�sJ�t|�}|j�d�|��}|dur)|jdvr)t	d�}|S)z9Report the encoding declared by the meta element
        rNr�ry)
r�rTr�rirS�EncodingParserrO�getEncodingr�r�)rErC�parserr�rrrr�3sz(HTMLBinaryInputStream.detectEncodingMeta)NNNNr�T)T)rbrcrdrerFr�r�r�r�r�r�rrrrrqzs
�*
>"rqc@s�eZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zeee
�Z
dd�Zee�Zefdd�Zdd�Zdd�Zdd�ZdS)�
EncodingBytesz�String-like object with an associated position and various extra methods
    If the position is ever greater than the string length then an exception is
    raisedcCst|t�sJ�t�||���Sr�)rirS�__new__�lower�rE�valuerrrr�FszEncodingBytes.__new__cCs
d|_dS)Nr)r�r�rrrrFJs
zEncodingBytes.__init__cCs|Sr�rrVrrr�__iter__NszEncodingBytes.__iter__cCs<|jd}|_|t|�krt�|dkrt�|||d�S)Nr
r�r�rH�
StopIterationro�rE�prrr�__next__QszEncodingBytes.__next__cCs|��Sr�)r�rVrrr�nextYszEncodingBytes.nextcCs@|j}|t|�krt�|dkrt�|d|_}|||d�SrGr�r�rrr�previous]szEncodingBytes.previouscCs|jt|�kr	t�||_dSr��r�rHr�)rErDrrr�setPositionfs
zEncodingBytes.setPositioncCs&|jt|�kr	t�|jdkr|jSdS)NrrrVrrr�getPositionks

zEncodingBytes.getPositioncCs||j|jd�S�Nr
)rDrVrrr�getCurrentByteurWzEncodingBytes.getCurrentBytecCsR|j}|t|�kr$|||d�}||vr||_|S|d7}|t|�ks	||_dS)zSkip past a list of charactersr
N�rDrHr��rEr�r�r�rrrr�zs�zEncodingBytes.skipcCsR|j}|t|�kr$|||d�}||vr||_|S|d7}|t|�ks	||_dSrrrrrr�	skipUntil�s�zEncodingBytes.skipUntilcCs(|�||j�}|r|jt|�7_|S)z�Look for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone)r�rDrH)rErSr]rrr�
matchBytes�szEncodingBytes.matchBytescCs6z|�||j�t|�d|_WdStyt�w)z�Look for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the matchr
T)�indexrDrHr��
ValueErrorr�rRrrr�jumpTo�s��zEncodingBytes.jumpToN)rbrcrdrer�rFr�r�r�r�rr�propertyrDr�currentByte�spaceCharactersBytesr�rrrrrrrr�Bs"	
	r�c@sXeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zdd�ZdS)r�z?Mini parser for detecting character encoding from meta elementscCst|�|_d|_dS)z3string - the data to work on for encoding detectionN)r�rYr��rErYrrrrF�s

zEncodingParser.__init__c
Cs�d|jvrdSd|jfd|jfd|jfd|jfd|jfd|jff}|jD]?}d}z|j�d�Wnty<Y|j	Sw|D]\}}|j�|�r\z|�}Wn
ty[d}Ynwq?|sc|j	Sq$|j	S)	Ns<metas<!--s</s<!s<?rTF)
rY�
handleComment�
handleMeta�handlePossibleEndTag�handleOther�handlePossibleStartTagrr�rr�)rE�methodDispatchr��keepParsing�key�methodrrrr��s@
�
����zEncodingParser.getEncodingcC�|j�d�S)zSkip over commentss-->�rYrrVrrrr�szEncodingParser.handleCommentcCs�|jjtvrdSd}d}	|��}|durdS|ddkr/|ddk}|r.|dur.||_dSn?|ddkrG|d}t|�}|durF||_dSn'|ddkrntt|d��}|��}|durnt|�}|durn|rl||_dS|}q
)	NTFrs
http-equivr
scontent-type�charsetscontent)	rYr
r�getAttributer�r��ContentAttrParserr��parse)rE�	hasPragma�pendingEncoding�attr�tentativeEncoding�codec�
contentParserrrrr�s@���zEncodingParser.handleMetacCs
|�d�S)NF)�handlePossibleTagrVrrrr�s
z%EncodingParser.handlePossibleStartTagcCst|j�|�d�S)NT)r�rYr%rVrrrr�s

z#EncodingParser.handlePossibleEndTagcCsj|j}|jtvr|r|��|��dS|�t�}|dkr#|��dS|��}|dur3|��}|dus+dS)NTr)rYr
�asciiLettersBytesr�rr�spacesAngleBracketsr)rE�endTagrYr�r!rrrr%�s

��z EncodingParser.handlePossibleTagcCr)NrrrVrrrrszEncodingParser.handleOthercCs�|j}|�ttdg�B�}|dust|�dksJ�|dvrdSg}g}	|dkr+|r+n0|tvr4|��}n'|dvr?d�|�dfS|tvrK|�|���n|durQdS|�|�t	|�}q$|dkrj|�
�d�|�dfSt	|�|��}|d	vr�|}	t	|�}||kr�t	|�d�|�d�|�fS|tvr�|�|���n|�|�qy|d
kr�d�|�dfS|tvr�|�|���n|dur�dS|�|�	t	|�}|tvr�d�|�d�|�fS|tvr�|�|���n|dur�dS|�|�q�)z_Return a name,value pair for the next attribute in the stream,
        if one is found, or None�/Nr
)rNT�=)r)rrZ)�'�"r)rYr�r�	frozensetrHr[�asciiUppercaseBytesrXr�r�r�r')rErYr��attrName�	attrValue�	quoteCharrrrrsn
�
�


�zEncodingParser.getAttributeN)
rbrcrdrerFr�rrrrr%rrrrrrr��s$r�c@seZdZdd�Zdd�ZdS)rcCst|t�sJ�||_dSr�)rirSrYrrrrrFas
zContentAttrParser.__init__cCszy|j�d�|jjd7_|j��|jjdksWdS|jjd7_|j��|jjdvrS|jj}|jjd7_|jj}|j�|�rP|j||jj�WSWdS|jj}z|j�t�|j||jj�WWStyy|j|d�YWSwty�YdSw)Nrr
r*)r,r+)rYrrDr�r
rrr�)rE�	quoteMark�oldPositionrrrres2

��zContentAttrParser.parseN)rbrcrdrFrrrrrr`srcCs\t|t�rz|�d�}Wn
tyYdSw|dur,zt�|�WSty+YdSwdS)z{Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding.rN)rirS�decode�UnicodeDecodeError�webencodings�lookup�AttributeError)r�rrrr��s
��r�)/�
__future__rrr�sixr�	six.movesrrr�r��iorr	r6�	constantsrrr
rrr�rr-rr&r.r'�invalid_unicode_no_surrogater{r�r��evalr�r��ascii_punctuation_rer��objectrArvrprqrSr�r�rr�rrrr�<module>sJ��

JgIb='