
    s<g0                         d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZ ddlmZmZ ddlmZmZmZ  G d	 d
      Z G d d      Zeeef   Zee   Z G d d      Zy)    )aliases)sha256)dumps)sub)AnyDictIteratorListOptionalTupleUnion   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                   (   e Zd Z	 	 d%dededededddee   d	ee   fd
Zde	defdZ
de	defdZedefd       ZdefdZdefdZd&dZedefd       Zedee   fd       Zedefd       Zedefd       Zedee   fd       Zedefd       Zedefd       Zedefd       Zedefd       Zedefd       Zedefd       Zeded    fd       Zedefd       Zedee   fd        Zedee   fd!       Z d'd"edefd#Z!edefd$       Z"y)(CharsetMatchNpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadpreemptive_declarationc                     || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        || _        y )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfr   r   r   r   r   r   r   s           W/home/kevstigneev/proxy/myenv/lib/python3.12/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   s_      '.'6,5%348+-,/"04/3&56L$    otherreturnc                     t        |t              s)t        |t              rt        |      | j                  k(  S y| j                  |j                  k(  xr | j
                  |j
                  k(  S )NF)
isinstancer   strr   encodingfingerprintr,   r0   s     r-   __eq__zCharsetMatch.__eq__(   sV    %.%% '4==88}}.X43C3CuGXGX3XXr/   c                    t        |t              st        t        | j                  |j                  z
        }t        | j
                  |j
                  z
        }|dk  r|dkD  r| j
                  |j
                  kD  S |dk  rS|dk  rNt        | j                        t        k\  r| j                  |j                  k  S | j                  |j                  kD  S | j                  |j                  k  S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?)
r3   r   
ValueErrorabschaos	coherencelenr    r   multi_byte_usage)r,   r0   chaos_differencecoherence_differences       r-   __lt__zCharsetMatch.__lt__/   s     %."%djj5;;&>"?&)$..5??*J&K d"';d'B>>EOO33$)=)E 4==!%55zzEKK//((5+A+AAAzzEKK''r/   c                 \    dt        t        |             t        | j                        z  z
  S )Ng      ?)r>   r4   rawr,   s    r-   r?   zCharsetMatch.multi_byte_usageE   s"    c#d)ns488}455r/   c                 ~    | j                   &t        | j                  | j                  d      | _         | j                   S )Nstrict)r*   r4   r    r!   rE   s    r-   __str__zCharsetMatch.__str__I   s.    <<t}}dnnhGDL||r/   c                 N    dj                  | j                  | j                        S )Nz<CharsetMatch '{}' bytes({})>)formatr5   r6   rE   s    r-   __repr__zCharsetMatch.__repr__O   s    .55dmmTEUEUVVr/   c                     t        |t              r|| k(  r$t        dj                  |j                              d |_        | j                  j                  |       y )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r3   r   r:   rJ   	__class__r*   r&   appendr7   s     r-   add_submatchzCharsetMatch.add_submatchR   sO    %.%4-MTTOO  E"r/   c                     | j                   S N)r!   rE   s    r-   r5   zCharsetMatch.encoding]   s    ~~r/   c                     g }t        j                         D ]G  \  }}| j                  |k(  r|j                  |       '| j                  |k(  s7|j                  |       I |S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr5   rN   )r,   also_known_asups       r-   encoding_aliaseszCharsetMatch.encoding_aliasesa   s^    
 $&MMO 	(DAq}}!$$Q'!#$$Q'		(
 r/   c                     | j                   S rQ   r$   rE   s    r-   bomzCharsetMatch.bomn       ###r/   c                     | j                   S rQ   rY   rE   s    r-   byte_order_markzCharsetMatch.byte_order_markr   r[   r/   c                 F    | j                   D cg c]  }|d   	 c}S c c}w )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        r   r#   )r,   es     r-   r   zCharsetMatch.languagesv   s      #oo.!...s   c                    | j                   shd| j                  v ryddlm}m} t        | j                        r || j                        n || j                        }t        |      dk(  sd|v ry|d   S | j                   d   d   S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r#   could_be_from_charsetcharset_normalizer.cdrd   re   r   r5   r>   )r,   rd   re   r   s       r-   languagezCharsetMatch.language~   s      $444  X *$--8 &dmm4'6  9~"my&@ Q<q!!$$r/   c                     | j                   S rQ   )r"   rE   s    r-   r<   zCharsetMatch.chaos   s    $$$r/   c                 @    | j                   sy| j                   d   d   S )Nr   r   r   r_   rE   s    r-   r=   zCharsetMatch.coherence   s     q!!$$r/   c                 6    t        | j                  dz  d      S Nd      )ndigits)roundr<   rE   s    r-   percent_chaoszCharsetMatch.percent_chaos   s    TZZ#%q11r/   c                 6    t        | j                  dz  d      S rm   )rq   r=   rE   s    r-   percent_coherencezCharsetMatch.percent_coherence   s    T^^c)155r/   c                     | j                   S )z+
        Original untouched bytes.
        )r    rE   s    r-   rD   zCharsetMatch.raw   s    
 }}r/   c                     | j                   S rQ   )r&   rE   s    r-   submatchzCharsetMatch.submatch   s    ||r/   c                 2    t        | j                        dkD  S Nr   )r>   r&   rE   s    r-   has_submatchzCharsetMatch.has_submatch   s    4<< 1$$r/   c                     | j                   | j                   S t        |       D cg c]  }t        |       }}t        t	        |D ch c]  }|s|	 c}            | _         | j                   S c c}w c c}w rQ   )r%   r4   r   sortedlist)r,   chardetected_rangesrs       r-   	alphabetszCharsetMatch.alphabets   sw    +''' -0I0
$(M$0
 0
  &d+L!!A+L&MN###0
 ,Ms   A0A5A5c                 p    | j                   g| j                  D cg c]  }|j                   c}z   S c c}w )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        )r!   r&   r5   )r,   ms     r-   rg   z"CharsetMatch.could_be_from_charset   s,     t||"D!1::"DDD"Ds   3r5   c                 4     j                    j                   |k7  rq| _         t               } j                  < j                  j                         dvr t	        t
         fd|dd d      }||dd z   }|j                  |d       _         j                  S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        N)zutf-8utf8utf_8c                     | j                   | j                         d   | j                         d    j                  | j                         d   t	        j
                              S )Nr   r   )stringspanreplacegroupsr   r)   )r   r,   s    r-   <lambda>z%CharsetMatch.output.<locals>.<lambda>   sK    ahhqvvx{QVVXa[AII
1y1F1F'G r/   i    r   r   )r)   r4   r+   lowerr   r   encoder(   )r,   r5   decoded_stringpatched_headers   `   r-   outputzCharsetMatch.output   s    
   (D,A,AX,M$,D! YN,,80066812 "%3 #5D)" "0.2G!G#1#8#89#MD ###r/   c                 P    t        | j                               j                         S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   r   	hexdigestrE   s    r-   r6   zCharsetMatch.fingerprint   s    
 dkkm$..00r/   )NN)r0   r   r1   N)r   )#__name__
__module____qualname__bytesr4   floatboolr   r.   objectr8   rB   propertyr?   rH   rK   rO   r5   r
   rW   rZ   r]   r   ri   r<   r=   rr   rt   rD   rw   rz   r   rg   r   r6    r/   r-   r   r      s    *.04MM M 	M
 M &M "#M !)M8YF Yt Y(F (t (, 6% 6 6 W# W	# #   
$s) 
 
 $T $ $ $ $ $ /49 / / %# % %6 %u % % %5 % %
 2u 2 2 65 6 6 U   $~.   %d % % 	$49 	$ 	$ EtCy E E$s $ $8 1S 1 1r/   r   c                       e Zd ZdZddeee      fdZdee   fdZ	de
eef   defdZdefd	Zdefd
ZdeddfdZded   fdZded   fdZy)CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultsc                 8    |rt        |      | _        y g | _        y rQ   )r|   _results)r,   r   s     r-   r.   zCharsetMatches.__init__   s    ?FF7OBr/   r1   c              #   8   K   | j                   E d {    y 7 wrQ   r   rE   s    r-   __iter__zCharsetMatches.__iter__   s     ==  s   itemc                     t        |t              r| j                  |   S t        |t              r/t	        |d      }| j                  D ]  }||j
                  v s|c S  t        )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r3   intr   r4   r   rg   KeyError)r,   r   results      r-   __getitem__zCharsetMatches.__getitem__   s`    
 dC ==&&dC T5)D-- "6777!M" r/   c                 ,    t        | j                        S rQ   r>   r   rE   s    r-   __len__zCharsetMatches.__len__  s    4==!!r/   c                 2    t        | j                        dkD  S ry   r   rE   s    r-   __bool__zCharsetMatches.__bool__  s    4==!A%%r/   c                    t        |t              s-t        dj                  t	        |j
                                    t        |j                        t        k  rW| j                  D ]H  }|j                  |j                  k(  s|j                  |j                  k(  s7|j                  |        y | j                  j                  |       t        | j                        | _	        y)z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r3   r   r:   rJ   r4   rM   r>   rD   r   r   r6   r<   rO   rN   r|   )r,   r   matchs      r-   rN   zCharsetMatches.append  s    
 $-?FF'  txx=++ $$(8(88U[[DJJ=V&&t, 	T"t}}-r/   r   c                 :    | j                   sy| j                   d   S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   rE   s    r-   bestzCharsetMatches.best(  s     }}}}Qr/   c                 "    | j                         S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   rE   s    r-   firstzCharsetMatches.first0  s     yy{r/   rQ   )r   r   r   __doc__r   r
   r   r.   r	   r   r   r   r4   r   r   r   r   rN   r   r   r   r/   r-   r   r      s    
Ol); < O!(<0 !c3h L " "&$ &.< .D .( h~.  x/ r/   r   c                       e Zd Zdedee   dee   dee   dedee   deded	ed
ee   defdZe	de
eef   fd       ZdefdZy)CliDetectionResultpathr5   rW   alternative_encodingsri   r   r   r<   r=   unicode_pathis_preferredc                     || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        y rQ   )r   r   r5   rW   r   ri   r   r   r<   r=   r   )r,   r   r5   rW   r   ri   r   r   r<   r=   r   r   s               r-   r.   zCliDetectionResult.__init__<  sV     	+7'/+;0E"%$-$2!
 )".r/   r1   c                     | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  dS )Nr   r5   rW   r   ri   r   r   r<   r=   r   r   r   rE   s    r-   __dict__zCliDetectionResult.__dict__V  se     II $ 5 5%)%?%?"11ZZ -- --
 	
r/   c                 2    t        | j                  dd      S )NT   )ensure_asciiindent)r   r   rE   s    r-   to_jsonzCliDetectionResult.to_jsonf  s    T]]a@@r/   N)r   r   r   r4   r   r
   r   r   r.   r   r   r   r   r   r   r/   r-   r   r   ;  s    // 3-/ s)	/
  $Cy/ / 9/ / / / sm/ /4 
$sCx. 
 
A Ar/   r   N)encodings.aliasesr   hashlibr   jsonr   rer   typingr   r   r	   r
   r   r   r   constantr   r   utilsr   r   r   r   r   r4   r   CoherenceMatchr   r   r   r/   r-   <module>r      sd    %    D D D G C Cf1 f1R@ @F sEz"' ,A ,Ar/   