
    @'h                         d Z ddlZddlmZ ddlmZmZmZmZ ddl	m
Z
 ddlmZ ddlmZ  ej                  e      Z G d d	e      Zy)
z6Mbox parser.

Contains simple parser for mbox files.

    N)Path)AnyDictListOptional)AbstractFileSystem)
BaseReader)Documentc                        e Zd ZU dZdZeed<   deddededed	ed
df
 fdZ		 	 dde
dee   dee   d
ee   fdZ xZS )
MboxReaderzMbox parser.

    Extract messages from mailbox files.
    Returns string including date, subject, sender, receiver and
    content for each message.

    zMDate: {_date}
From: {_from}
To: {_to}
Subject: {_subject}
Content: {_content}DEFAULT_MESSAGE_FORMATr   )	max_countmessage_formatargsr   r   kwargsreturnNc                ~    	 ddl m} t        |   |i | || _        || _        y# t        $ r t        d      w xY w)zInit params.r   BeautifulSoupz@`beautifulsoup4` package not found: `pip install beautifulsoup4`N)bs4r   ImportErrorsuper__init__r   r   )selfr   r   r   r   r   	__class__s         i/home/kushmeetdev/Regenta/Chatbot/venv/lib/python3.12/site-packages/llama_index/readers/file/mbox/base.pyr   zMboxReader.__init__#   sO    	) 	$)&)",  	R 	s   ' <file
extra_infofsc                    ddl }ddlm} ddlm} ddlm} |rt        j                  d       d}g }	 ||      j                  }
|j                  ||
      }t        |      D ]"  \  }}	 |}|j                         r^|j                         D ]J  }|j                         }t        |j!                  d	            }|d
k(  s3d|vs8|j#                  d      } n n|j#                  d      } |      }dj%                  |j'                         j)                               }| j*                  j-                  |d   |d   |d   |d   |      }|	j/                  |       |dz  }| j2                  dkD  s|| j2                  k\  s# n |	D cg c]  }t5        ||xs i        c}S # t0        $ r%}t        j                  d| d|        Y d}~rd}~ww xY wc c}w )zParse file into string.r   N)BytesParser)defaultr   zyfs was specified but MboxReader doesn't support loading from fsspec filesystems. Will load from local filesystem instead.)policy)factoryzContent-Dispositionz
text/plain
attachmentT)decode datefromtosubject)_date_from_to_subject_contentzFailed to parse message:
z
 with exception    )textmetadata)mailboxemail.parserr!   email.policyr"   r   r   loggerwarningparsembox	enumerateis_multipartwalkget_content_typestrgetget_payloadjoinget_textsplitr   formatappend	Exceptionr   r
   )r   r   r   r   r4   r!   r"   r   iresultsbytes_parserr:   __msgmsgpartctypecdispocontentsoupstripped_content
msg_stringeresults                           r   	load_datazMboxReader.load_data6   s    	,(%NNT
 "'288||D,|7 ! "	GAtY+/##% #
 " $ 5 5 7!$TXX.C%D!E L0\5O&*&6&6d&6&CG!" "ooTo:G %W-#&88DMMO,A,A,C#D !0077f+f+D	 ^- 8 
 z*
 FA~~!a4>>&9E"	H PWWVfz/?R@WW  Y!;D6ASTUSVWXXY Xs,   +AF( F(BF(G(	G1GG)NN)__name__
__module____qualname____doc__r   r?   __annotations__r   intr   r   r   r   r   r   r
   rW   __classcell__)r   s   @r   r   r      s    	 C  4	-- - 	-
 - 
-, &*+/	?X?X TN?X '(	?X
 
h?X    r   )r[   loggingpathlibr   typingr   r   r   r   fsspecr   llama_index.core.readers.baser	   llama_index.core.schemar
   	getLoggerrX   r7   r    r_   r   <module>rh      sC      , , % 4 ,			8	$cX cXr_   