
    *'h&                     h    d Z ddlZddlmZmZ ddlmZmZ ddlm	Z	  G d de      Z
 G d d	e
      Zy)
z
This module is for codecs only.

While the codec implementation can contain details of the PDF specification,
the module should not do any PDF parsing.
    N)ABCabstractmethod)DictList)logger_warningc                   D    e Zd ZdZededefd       Zededefd       Zy)Codecz#Abstract base class for all codecs.datareturnc                      y)z
        Encode the input data.

        Args:
            data: Data to encode.

        Returns:
            Encoded data.

        N selfr
   s     \/home/kushmeetdev/Regenta/Chatbot/venv/lib/python3.12/site-packages/pypdf/_codecs/_codecs.pyencodezCodec.encode           c                      y)z
        Decode the input data.

        Args:
            data: Data to decode.

        Returns:
            Decoded data.

        Nr   r   s     r   decodezCodec.decode   r   r   N)__name__
__module____qualname____doc__r   bytesr   r   r   r   r   r	   r	      sG    -
5 
U 
 
 
5 
U 
 
r   r	   c                       e Zd ZdZdZdZdZdZddZdd	Z	d
e
de
fdZdee   de
fdZddZd
e
defdZd
e
de
fdZde
deddfdZy)LzwCodecz2Lempel-Ziv-Welch (LZW) adaptive compression codec.   i  	      r   Nc                     t        d      D ci c]  }t        |g      | c}| _        | j                  dz   | _        | j
                  | _        d| j                  z  dz
  | _        yc c}w )z>Initialize the encoding table and state to initial conditions.r      N)ranger   encoding_table
EOD_MARKER	next_codeINITIAL_BITS_PER_CODEbits_per_codemax_code_valuer   is     r   _initialize_encoding_tablez#LzwCodec._initialize_encoding_table5   s_    HMc
0S1sQ0S1,!77 D$6$66!; 1Ts   A'c                     | xj                   dz  c_         | j                   | j                  kD  rG| j                  | j                  k  r-| xj                  dz  c_        d| j                  z  dz
  | _        yyy)z5Update bits_per_code and max_code_value if necessary.r!   N)r%   r(   r'   MAX_BITS_PER_CODE)r   s    r   _increase_next_codezLzwCodec._increase_next_code<   si    !NNT000""T%;%;;!##$(:(:#:a"?D < 1r   r
   c                    g }|j                  | j                         | j                          d}|D ]  }|t        |g      z   }|| j                  v r|}#|j                  | j                  |          | j
                  d| j                  z  dz
  k  r*| j
                  | j                  |<   | j                          n+|j                  | j                         | j                          t        |g      } |r|j                  | j                  |          |j                  | j                         | j                  |      S )z
        Encode data using the LZW compression algorithm.

        Taken from PDF 1.7 specs, "7.4.4.2 Details of LZW Encoding".
        r   r!   )
appendCLEAR_TABLE_MARKERr+   r   r#   r%   r-   r.   r$   _pack_codes_into_bytes)r   r
   result_codescurrent_sequencebytenext_sequences         r   r   zLzwCodec.encodeF   s0    #% 	D334'') 	1D,udV}<M 3 33#0  ##D$7$78H$IJ >>a4+A+A&AQ%FF9=D''6,,. !''(?(?@335 $)$= )	1.  3 34D EFDOO,**<88r   codesc                    | j                          d}d}t               }|D ]  }|| j                  z  |z  }|| j                  z  }|dk\  r"|dz  }|j                  ||z	  dz         |dk\  r"|| j                  k(  r| j                          k|| j
                  k(  r{| j                           |dkD  r|j                  |d|z
  z  dz         t        |      S )z
        Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
        The bit-width starts at 9 bits and expands as needed.
        r         )r+   	bytearrayr'   r0   r1   r$   r.   r   )r   r7   bufferbits_in_bufferoutputcodes         r   r2   zLzwCodec._pack_codes_into_bytesp   s    
 	'') 	+D 2 22d:Fd000N
 !A%!#v74?@ !A% t...//1(((*!	+& AMM6a.&89TABV}r   c                    d| j                   z  dz
  | _        t        | j                        D cg c]  }t	        |g       c}dg| j                  | j                  z
  dz   z  z   | _        | j                  dz   | _        d| _        y c c}w )Nr!   r   r   )	r-   r(   r"   r1   r   decoding_tabler$   _table_index_bits_to_getr)   s     r   _initialize_decoding_tablez#LzwCodec._initialize_decoding_table   s     D$:$::a?389P9P3QRauaSzRV
  4#:#::Q>V@ @ !OOa/	 Ss   Bc                    |  	 | j                   | j                  k  rk| j                  dz  || j                     dz  z  | _        | xj                  dz  c_        | xj                   dz  c_         | j                   | j                  k  rk| j                  | j                   | j                  z
  z	  | j                  | j                  dz
     z  }| xj                   | j                  z  c_         |S # t
        $ r | j                  cY S w xY w)Nr9   r:   r!   r   )
_next_bitsrC   
_next_data_byte_pointer
_and_table
IndexErrorr$   )r   r
   r?   s      r   _next_code_decodezLzwCodec._next_code_decode   s    	#//D$5$55#'??a#7++,t3# ""a'"1$ //D$5$55 DOOd6G6G$GH 1 1A 567D OOt000OK 	#??"	#s   BC, A#C, ,DDc                    g d| _         d| _        d| _        d| _        d| _        d| _        t        j                         }| j                          d| _        d| _        d| _        | j                  }	 | j                  |      }|| j                  k(  r	 |j                         S || j                  k(  rb| j                          | j                  |      }|| j                  k(  r	 |j                         S |j                  | j                  |          |}n|| j                  k  rT| j                  |   }|j                  |       || j                  k7  r"| j                  | j                  |   |d          |}nW| j                  |   | j                  |   dd z   }|j                  |       | j                  | j                  |   |d          |}^)z
        The following code was converted to Python from the following code:
        https://github.com/empira/PDFsharp/blob/master/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
        )      i  r   r   Nr!   )rI   rB   rC   rH   rG   rF   ioBytesIOrD   r1   rK   r$   writerA   _add_entry_decodegetvalue)r   r
   output_streamold_coder?   strings         r   r   zLzwCodec.decode   s   
 2

'')**))$/Dt&0 %%''- t...//1--d34??*$ %%''# ##D$7$7$=>))),,T2##F+t666**4+>+>x+H&QR)T ''1D4G4G4QRTST4UU  ##F+&&t':':8'DfQiP3 r   
old_stringnew_charc                 \   |t        |g      z   }| j                  | j                  kD  rt        dt               y || j
                  | j                  <   | xj                  dz  c_        | j                  dk(  rd| _        y | j                  dk(  rd| _        y | j                  dk(  rd| _        y y )	Nz#Ignoring too large LZW table index.r!   rM   
   rN      rO   r   )r   rB   r(   r   r   rA   rC   )r   rX   rY   
new_strings       r   rS   zLzwCodec._add_entry_decode   s    %
"33
t222@(K1;D--.Q # "D$& "D$& "D 'r   )r   N)r   r   r   r   r1   r$   r&   r-   r+   r.   r   r   r   intr2   rD   rK   r   rS   r   r   r   r   r   -   s    <J<@(95 (9U (9T DI  %  D#e # #d/(5 /(U /(b#E #S #T #r   r   )r   rP   abcr   r   typingr   r   pypdf._utilsr   r	   r   r   r   r   <module>rb      s2    
 #  'C <^#u ^#r   