
    @'h                     ~    d Z ddlZddlZddlmZ ddlmZmZmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ  G d	 d
e      Zy)z3Slides parser.

Contains parsers for .pptx files.

    N)Path)DictListOptional)AbstractFileSystem)
BaseReader)Document)infer_torch_devicec            
       X    e Zd ZdZddZdedefdZ	 	 ddedee	   d	ee
   dee   fd
Zy)
PptxReaderzOPowerpoint parser.

    Extract text, caption images, and specify slides.

    returnNc                     	 ddl }ddlm} ddlm} ddlm}m}m} |j                  d      }|j                  d      }|j                  d      }	|||	d| _        y# t        $ r t        d      w xY w)	zInit parser.r   NImagePresentation)AutoTokenizerVisionEncoderDecoderModelViTFeatureExtractorz{Please install extra dependencies that are required for the PptxReader: `pip install torch transformers python-pptx Pillow`z$nlpconnect/vit-gpt2-image-captioning)feature_extractormodel	tokenizer)torchPILr   pptxr   transformersr   r   r   ImportErrorfrom_pretrainedparser_config)
selfr   r   r   r   r   r   r   r   r   s
             k/home/kushmeetdev/Regenta/Chatbot/venv/lib/python3.12/site-packages/llama_index/readers/file/slides/base.py__init__zPptxReader.__init__   s    	!)  *992
 0??2
 "112
	
 "3"
#  	F 	s   A A0tmp_image_filec                    ddl m} | j                  d   }| j                  d   }| j                  d   }t               }|j	                  |       d}d}||d}	|j                  |      }
|
j                  d	k7  r|
j                  d	
      }
 ||
gd      j                  }|j	                  |      } |j                  |fi |	}|j                  |d      }|d   j                         S )zGenerate text caption of image.r   r   r   r   r         )
max_length	num_beamsRGB)modept)imagesreturn_tensorsT)skip_special_tokens)r   r   r   r
   toopenr*   convertpixel_valuesgeneratebatch_decodestrip)r    r#   r   r   r   r   devicer'   r(   
gen_kwargsi_imager2   
output_idspredss                 r!   caption_imagezPptxReader.caption_image;   s    ""7+ ../BC&&{3	#%
	$.YG
**^,<<5 oo5o1G(9T

, 	 $v.#U^^L?J?
&&zt&LQx~~    file
extra_infofsc                    ddl m} |r#|j                  |      5 } ||      }ddd       n ||      }d}t        j                        D ]  \  }}	|d| dz  }|	j
                  D ]  }
t        |
d      r|
j                  }|j                  }t        j                  dd	
      }	 |j                  |       |j                          |d| j                  |j                         dz  }t        j                   |j                         t        |
d      s||
j"                   dz  }  t%        ||xs i       gS # 1 sw Y   xY w# t        j                   |j                         w xY w)zParse file.r   r   N z	

Slide #z: 
imagewbF)deletez	
 Image: z

text
)rE   metadata)r   r   r0   	enumerateslidesshapeshasattrrB   blobtempfileNamedTemporaryFilewritecloser;   nameosunlinkrE   r	   )r    r=   r>   r?   r   fpresentationresultislideshaperB   image_bytess                r!   	load_datazPptxReader.load_dataX   sS    	& /!+A/ / (-L!,"5"56 	0HAuA3d++F 05'*!KKE"'**K 33DGA*,	Jt/A/A!&&/I.J$"OO		!&&)5&)B//F0	0& fz/?R@AA1/ /& 		!&&)s   	D<%AE	<E	!E*)r   N)NN)__name__
__module____qualname____doc__r"   strr;   r   r   r   r   r   r	   r[    r<   r!   r   r      sg     
D C  C  @ &*+/	"B"B TN"B '(	"B
 
h"Br<   r   )r_   rR   rM   pathlibr   typingr   r   r   fsspecr   llama_index.core.readers.baser   llama_index.core.schemar	   llama_index.core.utilsr
   r   ra   r<   r!   <module>rh      s7    
   ' ' % 4 , 5hB hBr<   