o
    fm:                     @   s  d Z ddlmZ ddlZddlmZ dZdZdZd	Z	e
d
Zdd Zdd Zdd Zdd Zdd ZG dd deZdd ZG dd deZG dd deZe ZG dd deZG dd  d eZG d!d" d"eZG d#d$ d$eZG d%d& d&eZe Zd'e_ d(e_d)d* Zd+d, Zd-d. Z d/d0 Z!e!d1Z"d2e"_ d3e"_dDd4d5Z#d6d7 Z$d8d9 Z%d:d; Z&d<d= Z'eeZ(d>e(_ d?e(_eeZ)d@e)_ dAe)_ee	Z*dBe*_ dCe*_dS )Ez.
Python Lexical Analyser

Regular Expressions
    )absolute_importN   )Errorsiboleoleof
c                 C   s   t | }|  d}t|}g }||k rPt|| }|d }|d7 }||k rB|t|| krB|d7 }|d7 }||k rB|t|| ks.|| || ||k s|S )z
    Return a list of character codes consisting of pairs
    [code1a, code1b, code2a, code2b,...] which cover all
    the characters in |s|.
    r   r   )listsortlenordappend)s	char_listinresultcode1code2 r   O/var/www/html/mig_web/myenv/lib/python3.10/site-packages/Cython/Plex/Regexps.pychars_to_ranges   s"   

	r   c                 C   L   t | td}t|tdd }||k r$tdtd }|| || fS dS )z
    If the range of characters from code1 to code2-1 includes any
    lower case letters, return the corresponding upper case range.
    azr   ANmaxr   minr   r   code3code4dr   r   r   uppercase_range4      r#   c                 C   r   )z
    If the range of characters from code1 to code2-1 includes any
    upper case letters, return the corresponding lower case range.
    r   Zr   r   Nr   r   r   r   r   lowercase_rangeB   r$   r&   c                    s&    fddt dt dD }t| S )z
    Given a list of codes as returned by chars_to_ranges, return
    an RE which will match a character in any of the ranges.
    c                    s"   g | ]}t  |  |d   qS )r   )	CodeRange).0r   	code_listr   r   
<listcomp>U   s   " zCodeRanges.<locals>.<listcomp>r      )ranger   Alt)r*   re_listr   r)   r   
CodeRangesP   s   r0   c                 C   s>   | t   kr
|k rn ntt| t ttt d |S t| |S )z
    CodeRange(code1, code2) is an RE which matches any character
    with a code |c| in the range |code1| <= |c| < |code2|.
    r   )nl_coder.   RawCodeRange
RawNewliner   r   r   r   r   r'   Y   s   

r'   c                   @   sd   e Zd ZdZdZdZdZdd Zdd Zdd	 Z	d
d Z
dd Zdd Zdd Zdd Zdd ZdS )REa  RE is the base class for regular expression constructors.
    The following operators are defined on REs:

         re1 + re2         is an RE which matches |re1| followed by |re2|
         re1 | re2         is an RE which matches either |re1| or |re2|
    r   Nc                 C   s   t d| jj )aM  
        This method should add states to |machine| to implement this
        RE, starting at |initial_state| and ending at |final_state|.
        If |match_bol| is true, the RE must be able to match at the
        beginning of a line. If nocase is true, upper and lower case
        letters should be treated as equivalent.
        z %s.build_machine not implemented)NotImplementedError	__class____name__)selfmachineinitial_statefinal_state	match_bolnocaser   r   r   build_machinev   s   	zRE.build_machinec                 C   s"   |  }|| ||| |S )z~
        Given a state |s| of machine |m|, return a new state
        reachable from |s| on character |c| or epsilon.
        )	new_statelink_toadd_transition)r9   mr;   cr   r   r   r   	build_opt   s   
zRE.build_optc                 C   
   t | |S N)Seqr9   otherr   r   r   __add__      
z
RE.__add__c                 C   rF   rG   )r.   rI   r   r   r   __or__   rL   z	RE.__or__c                 C   s   | j r| j S |  S rG   )strcalc_strr9   r   r   r   __str__   s   z
RE.__str__c                 C   s    t |ts| ||d d S d S )NzPlex.RE instance)
isinstancer5   
wrong_typer9   numvaluer   r   r   check_re   s   
zRE.check_rec                 C   s&   t |t dkr| ||d d S d S )N string)typerS   rT   r   r   r   check_string   s   zRE.check_stringc                 C   s8   |  || t|dkrtd|| jjt|f d S )Nr   zOInvalid value for argument %d of Plex.%s.Expected a string of length 1, got: %s)r[   r   r   PlexValueErrorr7   r8   reprrT   r   r   r   
check_char   s   zRE.check_charc                 C   sH   t |tjkrd|jj|jjf }nt |j}td|| jj||f )Nz%s.%s instancez<Invalid type for argument %d of Plex.%s (expected %s, got %s)rZ   typesInstanceTyper7   
__module__r8   r   PlexTypeError)r9   rU   rV   expectedgotr   r   r   rS      s   
zRE.wrong_type)r8   ra   __qualname____doc__nullablematch_nlrN   r?   rE   rK   rM   rQ   rW   r[   r^   rS   r   r   r   r   r5   j   s    
r5   c                 C   s>   t | dkrtt| t| d }nt| }dt|  |_|S )z;
    Char(c) is an RE which matches the character |c|.
    r   zChar(%s))r   r'   r   SpecialSymbolr]   rN   )rD   r   r   r   r   Char   s
   rj   c                   @   s<   e Zd ZdZdZdZdZdZdZdd Z	dd Z
dd	 ZdS )
r2   z
    RawCodeRange(code1, code2) is a low-level RE which matches any character
    with a code |c| in the range |code1| <= |c| < |code2|, where the range
    does not include newline. For internal use only.
    r   Nc                 C   s&   ||f| _ t||| _t||| _d S rG   )r-   r#   r&   )r9   r   r   r   r   r   __init__   s   
zRawCodeRange.__init__c                 C   sX   |r	|  ||t}|| j| |r(| jr|| j| | jr*|| j| d S d S d S rG   )rE   BOLrB   r-   r#   r&   r9   rC   r;   r<   r=   r>   r   r   r   r?      s   zRawCodeRange.build_machinec                 C   s   d| j | jf S )NzCodeRange(%d,%d)r4   rP   r   r   r   rO      s   zRawCodeRange.calc_str)r8   ra   re   rf   rg   rh   r-   r#   r&   rk   r?   rO   r   r   r   r   r2      s    
r2   c                   @   s    e Zd ZdZdZdZdd ZdS )_RawNewlinezd
    RawNewline is a low-level RE which matches a newline character.
    For internal use only.
    r   r   c                 C   s8   |r	|  ||t}|  ||t}|ttd f| d S Nr   )rE   rl   EOLrB   r1   )r9   rC   r;   r<   r=   r>   r   r   r   r   r?      s   z_RawNewline.build_machineN)r8   ra   re   rf   rg   rh   r?   r   r   r   r   rn      s
    rn   c                   @   s,   e Zd ZdZdZdZdZdd Zdd ZdS )ri   zx
    SpecialSymbol(sym) is an RE which matches the special input
    symbol |sym|, which is one of BOL, EOL or EOF.
    r   Nc                 C   s
   || _ d S rG   )sym)r9   rq   r   r   r   rk      rL   zSpecialSymbol.__init__c                 C   s.   |r| j tkr| ||t}|| j | d S rG   )rq   rp   rE   rl   rB   rm   r   r   r   r?     s   zSpecialSymbol.build_machine)	r8   ra   re   rf   rg   rh   rq   rk   r?   r   r   r   r   ri      s    ri   c                   @   (   e Zd ZdZdd Zdd Zdd ZdS )	rH   z]Seq(re1, re2, re3...) is an RE which matches |re1| followed by
    |re2| followed by |re3|...c                 G   sz   d}t |D ]\}}| || |o|j}q|| _|| _t|}d}|r8|d8 }|| }|jr2d}n|js6n|s$|| _d S )Nr   r   )	enumeraterW   rg   r/   r   rh   )r9   r/   rg   r   rerh   r   r   r   rk     s$   
zSeq.__init__c                 C   s   | j }t|dkr|| d S |}t|}t|D ]$\}	}
|	|d k r)| }n|}|
||||| |}|
jp=|o=|
j}qd S Nr   r   )r/   r   rA   rs   r@   r?   rh   rg   )r9   rC   r;   r<   r=   r>   r/   s1r   r   rt   s2r   r   r   r?   !  s   
zSeq.build_machinec                 C      dd tt| j S )NzSeq(%s),joinmaprN   r/   rP   r   r   r   rO   1     zSeq.calc_strNr8   ra   re   rf   rk   r?   rO   r   r   r   r   rH   
  s
    rH   c                   @   rr   )	r.   zRAlt(re1, re2, re3...) is an RE which matches either |re1| or
    |re2| or |re3|...c                 G   s~   || _ d}d}g }g }d}|D ]!}| || |jr"|| d}n|| |jr,d}|d7 }q|| _|| _|| _|| _d S ru   )r/   rW   rg   r   rh   nullable_resnon_nullable_res)r9   r/   rg   rh   r   r   r   rt   r   r   r   rk   9  s&   



zAlt.__init__c                 C   s\   | j D ]}|||||| q| jr*|r| ||t}| jD ]}||||d| qd S d S )Nr   )r   r?   r   rE   rl   )r9   rC   r;   r<   r=   r>   rt   r   r   r   r?   O  s   

zAlt.build_machinec                 C   rx   )NzAlt(%s)ry   rz   rP   r   r   r   rO   X  r}   zAlt.calc_strNr~   r   r   r   r   r.   5  s
    	r.   c                   @   rr   )	Rep1z@Rep1(re) is an RE which matches one or more repetitions of |re|.c                 C   s&   |  d| || _|j| _|j| _d S ro   )rW   rt   rg   rh   )r9   rt   r   r   r   rk   _  s   zRep1.__init__c                 C   sN   |  }|  }|| | j||||p| jj| || || d S rG   )r@   rA   rt   r?   rh   )r9   rC   r;   r<   r=   r>   rv   rw   r   r   r   r?   e  s   

zRep1.build_machinec                 C   s
   d| j  S )NzRep1(%s)rt   rP   r   r   r   rO   m  rL   zRep1.calc_strNr~   r   r   r   r   r   \  s
    r   c                   @   s0   e Zd ZdZdZdZdd Zdd Zdd ZdS )	
SwitchCasez
    SwitchCase(re, nocase) is an RE which matches the same strings as RE,
    but treating upper and lower case letters according to |nocase|. If
    |nocase| is true, case is ignored, otherwise it is not.
    Nc                 C   s    || _ || _|j| _|j| _d S rG   )rt   r>   rg   rh   )r9   rt   r>   r   r   r   rk   z  s   zSwitchCase.__init__c                 C   s   | j ||||| j d S rG   )rt   r?   r>   rm   r   r   r   r?     s   zSwitchCase.build_machinec                 C   s   | j rd}nd}d|| jf S )NNoCaseCasez%s(%s))r>   rt   )r9   namer   r   r   rO     s   zSwitchCase.calc_str)	r8   ra   re   rf   rt   r>   rk   r?   rO   r   r   r   r   r   q  s    r   z8
    Empty is an RE which matches the empty string.
    Emptyc                 C   s$   t ttt|  }dt|  |_|S )z@
    Str1(s) is an RE which matches the literal string |s|.
    Str(%s))rH   tupler|   rj   r]   rN   r   r   r   r   r   Str1  s   r   c                  G   sD   t | dkrt| d S tttt|  }ddtt|  |_|S )z
    Str(s) is an RE which matches the literal string |s|.
    Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
    r   r   r   ry   )r   r   r.   r   r|   r{   r]   rN   )strsr   r   r   r   Str  s
   r   c                 C   s   t t| }dt|  |_|S )zH
    Any(s) is an RE which matches any character in the string |s|.
    zAny(%s))r0   r   r]   rN   r   r   r   r   Any  s   r   c                 C   s:   t | }|dt  |t t|}dt|  |_|S )zp
    AnyBut(s) is an RE which matches any character (including
    newline) which is not in the string |s|.
    r   z
AnyBut(%s))r   insertmaxintr   r0   r]   rN   )r   rangesr   r   r   r   AnyBut  s   
r   rX   zT
    AnyChar is an RE which matches any single character (including a newline).
    AnyCharc              	   C   s   |rt t| t|d }d| |f |_|S g }tdt| dD ]}|t t| | t| |d  d  q t| }dt|  |_|S )a  
    Range(c1, c2) is an RE which matches any single character in the range
    |c1| to |c2| inclusive.
    Range(s) where |s| is a string of even length is an RE which matches
    any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
    r   zRange(%s,%s)r   r,   z	Range(%s))r'   r   rN   r-   r   r   r.   r]   )rv   rw   r   r   r   r   r   r   Range  s   *r   c                 C   s   t | t}d|  |_|S )zI
    Opt(re) is an RE which matches either |re| or the empty string.
    zOpt(%s))r.   r   rN   rt   r   r   r   r   Opt  s   

r   c                 C   s   t t| }d|  |_|S )zJ
    Rep(re) is an RE which matches zero or more repetitions of |re|.
    zRep(%s))r   r   rN   r   r   r   r   Rep  s   
r   c                 C      t | ddS )z
    NoCase(re) is an RE which matches the same strings as RE, but treating
    upper and lower case letters as equivalent.
    r   r>   r   r   r   r   r   r     s   r   c                 C   r   )z
    Case(re) is an RE which matches the same strings as RE, but treating
    upper and lower case letters as distinct, i.e. it cancels the effect
    of any enclosing NoCase().
    r   r   r   r   r   r   r   r     s   r   z=
    Bol is an RE which matches the beginning of a line.
    Bolz7
    Eol is an RE which matches the end of a line.
    Eolz9
    Eof is an RE which matches the end of the file.
    EofrG   )+rf   
__future__r   r_   rX   r   r   rl   rp   EOFr   r1   r   r#   r&   r0   r'   objectr5   rj   r2   rn   r3   ri   rH   r.   r   r   r   rN   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sj    	O+'"		
		
