
    s<gv.                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZ d dlmZ dd	lmZmZmZmZmZmZ  ee
      dedefd       Z ee
      dedefd       Z ee
      dedee   fd       Z ee
      dedefd       Z  ee
      dedefd       Z! ee
      dedefd       Z" ee
      dedefd       Z# ee
      dedefd       Z$ ee
      dedefd       Z% ee
      dedefd       Z& ee
      dedefd       Z' ee
      dedefd       Z( ee
      dedefd       Z) ee
      dedefd       Z* ee
      dedefd       Z+ ee
      dedefd       Z, e e-e      
      dedefd       Z. ee
      dedefd       Z/dAd e0d!e1dee   fd"Z2 ed#
      d$edefd%       Z3d e0deee   e0f   fd&Z4d'edefd(Z5dBd)ed*edefd+Z6d,edee   fd-Z7d.ed/ede8fd0Z9d.ed/edefd1Z:d2ejv                  d3fd$ed4e1d5eddfd6Z<	 dCd7e0d8ed9e=d:e1d;ed<ed=e0d>ed?ee   deeddf   fd@Z>y)D    N)IncrementalDecoder)aliases)	lru_cache)findall)	GeneratorListOptionalSetTupleUnion)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize	characterreturnc                     	 t        j                  |       }d|v xs( d|v xs" d|v xs d|v xs d|v xs d|v xs
 d|v xs d	|v S # t        $ r Y yw xY w)
NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEzWITH MACRONzWITH RING ABOVEunicodedataname
ValueErrorr   descriptions     V/home/kevstigneev/proxy/myenv/lib/python3.12/site-packages/charset_normalizer/utils.pyis_accentuatedr       s    &++I6 	# 	,;&	,[(	, {*	, +		,
 ;&	, K'	, +	  s   A 	AAc                     t        j                  |       }|s| S |j                  d      }t        t	        |d   d            S )N r      )r   decompositionsplitchrint)r   
decomposedcodess      r   remove_accentr*   (   sA    !//	:J!'',Es58R !!    c                 b    t        |       }t        j                         D ]  \  }}||v s|c S  y)zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   character_ord
range_name	ord_ranges       r   unicode_ranger2   3   s=    
 YM!8!>!>!@ 
II% r+   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFLATINr   r   s     r   is_latinr5   A   s8    &++I6 k!!      	''c                 Z    t        j                  |       }d|v ryt        |       }|yd|v S )NPTFPunctuationr   categoryr2   r   character_categorycharacter_ranges      r   is_punctuationr?   J   s=    )229=
  %29%=OO++r+   c                 p    t        j                  |       }d|v sd|v ryt        |       }|yd|v xr |dk7  S )NSNTFFormsLor:   r<   s      r   	is_symbolrE   Y   sP    )229=
  C+=$=%29%=Oo%D*<*DDr+   c                 2    t        |       }|yd|v xs d|v S )NF	EmoticonsPictographs)r2   )r   r>   s     r   is_emoticonrI   h   s*    %29%=O/)M]o-MMr+   c                 j    | j                         s| dv ryt        j                  |       }d|v xs |dv S )N>      ｜+<>TZ>   PcPdPo)isspacer   r;   )r   r=   s     r   is_separatorrT   r   sB    i+AA)229=$$P(:>P(PPr+   c                 D    | j                         | j                         k7  S N)islowerisupperr   s    r   is_case_variablerZ   |   s    )"3"3"555r+   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFCJKr   r   character_names     r   is_cjkr_      s8    $)))4 N""  r6   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFHIRAGANAr   r]   s     r   is_hiraganarb      8    $)))4 ''  r6   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFKATAKANAr   r]   s     r   is_katakanarf      rc   r6   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFHANGULr   r]   s     r   	is_hangulri      8    $)))4 ~%%  r6   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFTHAIr   r]   s     r   is_thairm      s8    $)))4 ^##  r6   c                 T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFARABICr   r]   s     r   	is_arabicrp      rj   r6   c                 `    	 t        j                  |       }d|v xr d|v S # t        $ r Y yw xY w)NFro   zISOLATED FORMr   r]   s     r   is_arabic_isolated_formrr      sB    $)))4 ~%K/^*KK  s   ! 	--r0   c                 4     t         fdt        D              S )Nc              3   &   K   | ]  }|v  
 y wrV    ).0keywordr0   s     r   	<genexpr>z-is_unicode_range_secondary.<locals>.<genexpr>   s     Tw*$Ts   )anyr   )r0   s   `r   is_unicode_range_secondaryrz      s    T4STTTr+   c                 j    | j                         du xr  | j                         du xr | dk7  xr | dk7  S )NFu   ﻿)rS   isprintablerY   s    r   is_unprintabler~      sL     	u$ 	"!!#u,	"	" !	r+   sequencesearch_zonec           	      r   t        | t              st        t        |       }t	        t
        | dt        ||       j                  dd            }t        |      dk(  ry|D ]T  }|j                         j                  dd      }t        j                         D ]  \  }}||k(  r|c c S ||k(  s|c c S  V y)zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nasciiignoreerrorsr   -_)
isinstancebytes	TypeErrorlenr   r   mindecodelowerreplacer   r.   )r   r   seq_lenresultsspecified_encodingencoding_aliasencoding_ianas          r   any_specified_encodingr      s     h&x=G ',3w,-44WX4NG
 7|q% 
%/557??SI
 .5]]_ 	%)NM!33$$ 22$$		%
% r+      r   c                     | dv xs< t        t        j                  dj                  |             j                  t
              S )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigencodings.{})
issubclass	importlibimport_moduleformatr   r   )r   s    r   is_multi_byte_encodingr      sG    
  
 
  
 5 5d ;<OO#
r+   c                     t         D ]>  }t         |   }t        |t              r|g}|D ]  }| j                  |      s||fc c S  @ y)z9
    Identify and extract SIG/BOM in given sequence.
    )Nr+   )r   r   r   
startswith)r   iana_encodingmarksmarks       r   identify_sig_or_bomr     s[    
 ( ++9-+HeU#GE 	+D""4($d**	++ r+   r   c                 
    | dvS )N>   r   r   ru   )r   s    r   should_strip_sig_or_bomr     s     444r+   cp_namestrictc                     | j                         j                  dd      } t        j                         D ]  \  }}| ||fv s|c S  |rt	        dj                  |             | S )Nr   r   z Unable to retrieve IANA for '{}')r   r   r   r.   r   r   )r   r   r   r   s       r   	iana_namer   "  sj    mmo%%c3/G
 *1 !%~}55  ! ;BB7KLLNr+   decoded_sequencec                 x    t               }| D ]!  }t        |      }||j                  |       # t        |      S rV   )setr2   addlist)r   rangesr   r>   s       r   
range_scanr   2  sC    uF% $	)6y)A"

?#$ <r+   iana_name_aiana_name_bc                    t        |       st        |      ryt        j                  dj                  |             j                  }t        j                  dj                  |            j                  } |d      } |d      }d}t        d      D ]7  }t        |g      }|j                  |      |j                  |      k(  s3|dz  }9 |dz  S )	Ng        r   r   r   r      r      )r   r   r   r   r   ranger   r   )	r   r   	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodeds	            r   cp_similarityr   @  s    k*.D[.Q''k*  ''k*   )9D(9D!"3Z '$aSz;;}%])CC!Q&!'
 !3&&r+   c                 ,    | t         v xr |t         |    v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   s     r   is_cp_similarr   X  s%     	-- 	?1+>>r+   charset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevelformat_stringc                     t        j                  |       }|j                  |       t        j                         }|j	                  t        j
                  |             |j                  |       y rV   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   r   r   loggerhandlers        r   set_logging_handlerr   c  sU    
 t$F
OOE##%G**=9:
gr+   	sequencesr   offsets
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadc	              #     K   |r|du r|D ]  }	||	|	|z    }
|
s y |
  y |D ]  }	|	|z   }|t        |       dz   kD  r| |	|	|z    }|r	|du r||z   }|j                  ||rdnd      }
|r[|	dkD  rVt        |d      }|rH|
d | |vrAt        |	|	dz
  d	      D ].  }| || }|r	|du r||z   }|j                  |d      }
|
d | |v s. n |
  y w)
NF   r   r   r   r   r#      )r   r   r   r   )r   r   r   r   r   r   r   r   r   r   chunk	chunk_endcut_sequencechunk_partial_size_chkjs                  r   cut_sequence_chunksr   p  sK     0E9 	A#AJ7EK		  #	AJI3y>A--$QZ8L#(8E(A*\9 ''#8xh ( E %Q.1*b.A& $556oM"1a!eR0 	"'09'=/4D4M+6+EL , 3 3M( 3 S !8"89_L!	" KG#	s   B>C
C)i    )TrV   )?r   r   r   codecsr   encodings.aliasesr   	functoolsr   rer   typingr   r   r	   r
   r   r   _multibytecodecr   constantr   r   r   r   r   r   strboolr    r*   r2   r5   r?   rE   rI   rT   rZ   r_   rb   rf   ri   rm   rp   rr   r   rz   r~   r   r'   r   r   r   r   r   r   floatr   r   INFOr   r   r   ru   r+   r   <module>r      s      % %   ? ? 7  *+c d  ," *+"S "S " ," *+
S 
Xc] 
 ,
 *+" " " ," *+,c ,d , ,, *+E E E ,E *+N3 N4 N ,N *+QC QD Q ,Q *+6 6 6 ,6 *+#c #d # ,# *+(3 (4 ( ,( *+(3 (4 ( ,( *+& & & ,& *+$s $t $ ,$ *+& & & ,& *+Ls Lt L ,L 3./0U3 U4 U 1U *+c d  ,U  QT @ 3   (% E(3-2F,G $53 54 5s D C   c 's ' ' '0s    %D



 
 
	
, &*555 5 	5
 5 5 5  5 c]5 sD$5r+   