
    'Gga                        d dl Z d dlZd dlZd dlmZmZmZ d dlmZ d dl	m
Z
mZmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(m)Z)m*Z* d d	l+m,Z, d d
l-m.Z. d dl/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z5  ed      Z6ee7ee8ef   Z9ee)ee   f   Z: ee#jv                  e jx                        Z= G d d      Z> G d de,      Z?e@dk(  rd dlAmBZB  eB         eeC      j                  j                  dz  dz  ZE e?       ZF	 eFj                  d      ZHeHj                  eEdz  d z        ZLeFj                  eHj                          eOeL       yy# eI$ r eFj                  ddddiddidd      ZHY _w xY w)!    N)BufferedIOBaseBufferedReaderBytesIO)Path)ListOptionalTypeUnion	CoroutineAnyTypeVar)	BaseModel)ExtractAgentExtractAgentCreateExtractConfig
ExtractJobExtractJobCreate
ExtractRunExtractSchemaValidateRequestExtractAgentUpdateFileExtractMode
StatusEnumProjectExtractTargetLlamaExtractSettingsPaginatedExtractRunsResponse)AsyncLlamaCloud)JSONObjectTypeaugment_async_errors)BaseComponent)run_jobs)FieldPrivateAttr)DEFAULT_BASE_URL)ThreadPoolExecutorT)extraction_targetextraction_modec                      e Zd ZdZ	 	 	 	 	 	 	 	 	 d-dededee   dee   deded	ed
e	de	dee	   dee
   fdZdeeeef   defdZedefd       Zedefd       Zedefd       Zej*                  deddfd       Zedefd       Zej*                  deddfd       ZdedefdZdedee   fdZd.dZdeee e   f   d e!dee"e e"   f   fd!Z#deee e   f   dee"e e"   f   fd"Z$deee e   f   deee e   f   fd#Z%deee e   f   deee e   f   fd$Z&dede"fd%Z'dedefd&Z(d'eddfd(Z)	 d/d)ed*ede*fd+Z+defd,Z,y)0ExtractionAgentzTClass representing a single extraction agent with methods for extraction operations.Nclientagent
project_idorganization_idcheck_intervalmax_timeoutnum_workersshow_progressverboseverifyhttpx_timeoutc                 "   || _         || _        || _        || _        || _        || _        || _        || _        |
| _        || _	        |	| _
        d | _        d | _        t        t        dt        j                          xs ddz               | _        y )N
         max_workers)_client_agent_project_id_organization_idr0   r1   r2   r3   r5   r6   _verbose_data_schema_configr&   minos	cpu_count_thread_pool)selfr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   s               /home/kushmeetdev/apache_webroot/langgraph_flaskproject/venv/lib/python3.12/site-packages/llama_cloud_services/extract/extract.py__init__zExtractionAgent.__init__1   s     % /,&&**9=37.B!41 9:
    cororeturnc                 r     dt         f fd} j                  j                  |      j                         S )=Run coroutine in a separate thread to avoid event loop issuesrM   c                  R    dt         ffd} t        j                   |              S )NrM   c                    K   j                   j                  j                  } t        j                  j
                  j                        4 d {   }|j                   j                  _        	  d {   | j                   j                  _        cd d d       d {    S 7 V7 27 	# | j                   j                  _        w xY w# 1 d {  7  sw Y   y xY ww)Nr5   timeout)r=   _client_wrapperhttpx_clienthttpxAsyncClientr5   r6   )original_clientr,   rL   rH   s     rI   wrapped_corozFExtractionAgent._run_in_thread.<locals>.run_coro.<locals>.wrapped_coroT   s     "&,,">">"K"K !,,;; .. 
T 
T @FDLL00=T%)z ET44A
T 
T 
T  *
T ET44A
T 
T 
Tsr   AC%B*C%C4B09B,:B0=CC%$B.%C%,B0.C%0CCC"CC"C%r'   asynciorunrY   rL   rH   s    rI   run_coroz0ExtractionAgent._run_in_thread.<locals>.run_coroS   s$    T T" ;;|~..rK   r'   rG   submitresultrH   rL   r^   s   `` rI   _run_in_threadzExtractionAgent._run_in_threadP   s1    	/! 	/(   ''188::rK   c                 .    | j                   j                  S N)r>   idrH   s    rI   rf   zExtractionAgent.idi   s    {{~~rK   c                 .    | j                   j                  S re   )r>   namerg   s    rI   ri   zExtractionAgent.namem   s    {{rK   c                 ^    | j                   s| j                  j                  S | j                   S re   )rB   r>   data_schemarg   s    rI   rk   zExtractionAgent.data_schemaq   s%    .2.?.?t{{&&VTEVEVVrK   rk   c                 B   t        |t              r|}n<t        |t              r!t        |t              r|j                         }nt        d      | j                  | j                  j                  j                  t        |                  }|j                  | _        y )N;data_schema must be either a dictionary or a Pydantic model)rk   request)
isinstancedicttype
issubclassr   model_json_schema
ValueErrorrc   r=   llama_extractvalidate_extraction_schemar   rk   rB   )rH   rk   processed_schemavalidated_schemas       rI   rk   zExtractionAgent.data_schemau   s     k4(*T*z+y/Q*<<>M   ..LL&&AA4AQR B 

 -88rK   c                 ^    | j                   s| j                  j                  S | j                   S re   )rC   r>   configrg   s    rI   r{   zExtractionAgent.config   s!    )-t{{!!G4<<GrK   r{   c                     || _         y re   )rC   )rH   r{   s     rI   r{   zExtractionAgent.config   s	    rK   
file_inputc                   K   t        |t              r|}nJt        |t              rt        |      }n.t        |t        t
        f      rt        |d      }nt        d      	 | j                  j                  j                  | j                  |       d{   t        |t              r|j                          S S 7 %# t        |t              r|j                          w w xY ww)zUpload a file for extraction.rbzJfile_input must be either a file path string, file bytes, or buffer object)r.   upload_fileN)rp   r   bytesr   strr   openru   r=   filesr   r?   r   close)rH   r}   r   s      rI   _upload_filezExtractionAgent._upload_file   s     j.1$K
E*!*-K
S$K0z40K\ 	$++77++ 8   +~6!!# 7	 +~6!!# 7s0   AC#!4B= B;B= "C#;B= =#C  C#job_idc                 @  K   t        j                         }d}	 t        j                  | j                         d{    |dz  }| j
                  j                  j                  |       d{   }|j                  t        j                  k(  r.| j
                  j                  j                  |       d{   S |j                  t        j                  k(  rXt        j                         }||z
  | j                  kD  rt        d|       | j                  r|dz  dk(  rt!        dd	d
       t#        j$                  d| d|j                   d|j&                          | j
                  j                  j                  |       d{   S 7 X7 (7 7 w)z5Wait for and return the results of an extraction job.r   TNr9   r   z#Timeout while extracting the file: r8   . )endflushzFailure in job: z
, status: z	, error: )timeperf_counterr[   sleepr0   r=   rv   get_jobstatusr   SUCCESSget_run_by_job_idPENDINGr1   	ExceptionrA   printwarningswarnerror)rH   r   starttriesjobr   s         rI   _wait_for_job_resultz$ExtractionAgent._wait_for_job_result   sx    !!#-- 3 3444QJE22:: ;  C zzZ///!\\77II! J    z111'');!1!11#&I&$RSS==URZ1_#2T2&vhjIcii[Y "\\77II! J   + 5
sG   :FF2F/F0A
F:F;CFFFFFFc           	          | j                  | j                  j                  j                  | j                  t        | j                  | j                                    | _        y)zPersist the extraction agent's schema and config to the database.

        Returns:
            ExtractionAgent: The updated extraction agent
        )rk   r{   )extraction_agent_idro   N)	rc   r=   rv   update_extraction_agentrf   r   rk   r{   r>   rg   s    rI   savezExtractionAgent.save   sR     ))LL&&>>$(GG* $ 0 0;; ? 
rK   r   extract_settingsc                    K   t        |t              s|g}d}nd}|D cg c]  } j                  |       }}t               5  t	        | j
                  d j                         d {   }d d d        dt        dt        f fd}D cg c]
  } ||       }}t               5  t	        | j
                  d j                         d {   }	d d d         j                  rRt        |	      D ]C  \  }}
t        |t        t        f      rt        |      nd	}t        d
| d|
j                          E |r	d   S 	S c c}w 7 # 1 sw Y   xY wc c}w 7 # 1 sw Y   xY ww)NTFUploading filesworkersdescr3   filerM   c                 "  K   j                   j                  j                  t        j                  | j                  j
                  j                               d {   }j                  |j                         d {   S 7 '7 w)Nr   file_iddata_schema_overrideconfig_override)
job_creater   )r=   rv   run_job_test_userr   rf   rk   r{   r   )r   
job_queuedr   rH   s     rI   run_jobz7ExtractionAgent._queue_extraction_test.<locals>.run_job   s}     #||99KK+(, GG)-)9)9$(KK	 "2  L   J 22:==AAA Bs$   A B#B$"BBBBzRunning extraction jobs<bytes/buffer> Queued file extraction for file  under job_id r   )rp   listr   r    r"   r2   r3   r   r   rA   zipr   r   r   rf   )rH   r   r   single_filer   upload_tasksuploaded_filesr   	job_tasksextract_jobsr   	file_reprs   ` `         rI   _queue_extraction_testz&ExtractionAgent._queue_extraction_test   s~    
 %&GEKK<ABD))$/BB!# 	#+((&"00	$ N	
	B 
	B 
	B 0>>tWT]>	>!# 	!)((."00	" L	 == 5 	c!+D3+!>CIDT  6ykPSPVPVxX	 #.|A?<?O C	 	( ?	 	sp   E,EE,'E,E-E1E,EE,+'E EE A4E,EEE,E  E)%E,c                 ^  K   	 t        |t              s|g}d}nd}|D cg c]  }| j                  |       }}t               5  t	        || j
                  d| j                         d{   }ddd       D cg c]]  }| j                  j                  j                  t        | j                  |j                  | j                  | j                              _ }}t               5  t	        || j
                  d| j                         d{   }ddd       | j                  rRt        |      D ]C  \  }}t        |t         t"        f      rt!        |      nd	}	t%        d
|	 d|j                          E |rd   S S c c}w 7 # 1 sw Y   xY wc c}w 7 # 1 sw Y   xY ww)z
        Queue multiple files for extraction.

        Args:
            files (Union[FileInput, List[FileInput]]): The files to extract

        Returns:
            Union[ExtractJob, List[ExtractJob]]: The queued extraction jobs
        TFr   r   Nr   rn   zCreating extraction jobsr   r   r   r   )rp   r   r   r    r"   r2   r3   r=   rv   r   r   rf   rk   r{   rA   r   r   r   r   )
rH   r   r   r   r   r   r   r   r   r   s
             rI   queue_extractionz ExtractionAgent.queue_extraction  s     	C%&GEKK<ABD))$/BB!# 	#+((&"00	$ N	" '

  LL&&..((, GG)-)9)9$(KK	 / 

	 

 "# 	!)((/"00	" L	 == 5 	c!+D3+!>CIDT  6ykPSPVPVxX	 #.|A?<?K C	 	

	 	sq   F-FF-'F+F
,F0F-<A"FF-*'F!FF!A4F-
FFF-F!!F*&F-c                 t  K   t        |t              s|g}d}nd}| j                  |       d{   }|D cg c]  }| j                  |j                         }}t               5  t        || j                  d| j                         d{   }ddd       |rd   S S 7 sc c}w 7 # 1 sw Y   xY ww)a  Asynchronously extract data from one or more files using this agent.

        Args:
            files (Union[FileInput, List[FileInput]]): The files to extract

        Returns:
            Union[ExtractRun, List[ExtractRun]]: The extraction results
        TFNzExtracting filesr   r   )	rp   r   r   r   rf   r    r"   r2   r3   )rH   r   r   jobsr   result_tasksresultss          rI   aextractzExtractionAgent.aextractB  s      %&GEKK **511EIJc11#&&9JJ!# 	$(('"00	 G	 )wqz5g5 2J	 	sJ   -B8B#B8"B%B8&'B,B*B,B8%B8*B,,B51B8c                 B    | j                  | j                  |            S )a  Synchronously extract data from one or more files using this agent.

        Args:
            files (Union[FileInput, List[FileInput]]): The files to extract

        Returns:
            Union[ExtractRun, List[ExtractRun]]: The extraction results
        )rc   r   )rH   r   s     rI   extractzExtractionAgent.extracta  s     ""4==#788rK   c                 l    | j                  | j                  j                  j                  |            S )z
        Get the extraction job for a given job_id.

        Args:
            job_id (str): The job_id to get the extraction job for

        Returns:
            ExtractJob: The extraction job
        r   )rc   r=   rv   r   rH   r   s     rI   get_extraction_jobz"ExtractionAgent.get_extraction_jobn  s-     ""4<<#=#=#E#EV#E#TUUrK   c                 l    | j                  | j                  j                  j                  |            S )z
        Get the extraction run for a given job_id.

        Args:
            job_id (str): The job_id to get the extraction run for

        Returns:
            ExtractRun: The extraction run
        r   )rc   r=   rv   r   r   s     rI   get_extraction_run_for_jobz*ExtractionAgent.get_extraction_run_for_jobz  s7     ""LL&&88 9 
 	
rK   run_idc                 n    | j                  | j                  j                  j                  |             y)zxDelete an extraction run by ID.

        Args:
            run_id (str): The ID of the extraction run to delete
        )r   N)rc   r=   rv   delete_extraction_run)rH   r   s     rI   r   z%ExtractionAgent.delete_extraction_run  s-     	LL&&<<F<K	
rK   pagelimitc                     | j                  | j                  j                  j                  | j                  ||z  |            S )zList extraction runs for the extraction agent.

        Returns:
            PaginatedExtractRunsResponse: Paginated list of extraction runs
        )r   skipr   )rc   r=   rv   list_extract_runsrf   )rH   r   r   s      rI   list_extraction_runsz$ExtractionAgent.list_extraction_runs  sE     ""LL&&88$(GGE\ 9 
 	
rK   c                 <    d| j                    d| j                   dS )NzExtractionAgent(id=z, name=))rf   ri   rg   s    rI   __repr__zExtractionAgent.__repr__  s    $TWWIWTYYKqAArK   )	NNr9     r:   TFT<   )rM   N)r   d   )-__name__
__module____qualname____doc__r   CloudExtractAgentr   r   intboolfloatrJ   r   r   r'   rc   propertyrf   ri   rq   rk   setterSchemaInputr   r{   	FileInputr   r   r   r   r   r
   r   r   r   r   r   r   r   r   r   r   r   r   r    rK   rI   r+   r+   .   s   ^ %))-"!%)+

 !
 SM	

 "#
 
 
 
 
 
 
  
>;9S#q[#9 ;a ;2 C    c     WT W W 9{ 9t 9 9$ H H H ]]] t  $Y $4 $* *9M <
 2@YY/02@ /2@ 
z4
++	,	2@h9@YY/09@ 
z4
++	,9@v69d9o566	z4
++	,6>99d9o569	z4
++	,9
V 
V 
V
 
 
 
C 
D 
 +.

$'
	%
 B# BrK   r+   c                       e Zd ZU dZ ed      Zeed<    ed      Zeed<    edd	      Z	e
ed
<    edd	      Ze
ed<    edddd      Ze
ed<    edd	      Zeed<    edd	      Zeed<    edd	      Zee   ed<    edd	      Zee   ed<    e       Zeed<    e       Zeed <    e       Zee   ed!<    e       Zee   ed"<   	 	 	 	 	 	 	 	 	 	 	 d3dee   dee   d
e
de
de
ded$ee   d%ee   dee   dee   def fd&Zd'eeeef   d(efd)Z	 d4d*ed+e d,ee!   d(e"fd-Z#	 	 d5d*ee   d.ee   d(e"fd/Z$d(e%e"   fd0Z&d1ed(d#fd2Z' xZ(S )6LlamaExtractz:Factory class for creating and managing extraction agents.z%The API key for the LlamaExtract API.)descriptionapi_keyz%The base URL of the LlamaExtract API.base_urlr9   z;The interval in seconds to check if the extraction is done.)defaultr   r0   r   zDThe maximum timeout in seconds to wait for the extraction to finish.r1   r:   r   r8   zAThe number of workers to use sending API requests for extraction.)r   gtltr   r2   Tz-Show progress when extracting multiple files.r3   Fz*Show verbose output when extracting files.r4   zSimple SSL verification option.r5   r   zTimeout for the httpx client.r6   _async_clientrG   r?   r@   Nr.   r/   c                 $   |s#t        j                  dd       }|t        d      |st        j                  dd       xs t        }t        |   |||||||	|
|	       t        j                  |	|
      | _        |	| _	        |
| _
        t        | j                  | j                  | j                        | _        t        t!        dt        j"                         xs dd	z         
      | _        |st        j                  dd       }|svt'        d       | j)                  | j                  j*                  j-                               }|D cg c]  }|j.                  s| }}|st        d      |d   j0                  }|| _        || _        y c c}w )NLLAMA_CLOUD_API_KEYzThe API key is required.LLAMA_CLOUD_BASE_URL)	r   r   r0   r1   r2   r3   r5   r6   r4   rR   )tokenr   rU   r8   r9   r:   r;   LLAMA_CLOUD_PROJECT_IDz1No project_id provided, fetching default project.z6No default project found. Please provide a project_id.r   )rE   getenvru   r%   superrJ   rV   rW   _httpx_clientr5   r6   r   r   r   r   r&   rD   rF   rG   r   rc   projectslist_projects
is_defaultrf   r?   r@   )rH   r   r   r0   r1   r2   r3   r.   r/   r5   r6   r4   r   pdefault_project	__class__s                  rI   rJ   zLlamaExtract.__init__  s    ii 5t<G !;<<yy!7>RBRH)##'' 	 
	
 #..fmT*,,,]]++

 /B!41 9:
 #;TBJIJ*.*=*=&&//==?+ /7"G!,,1"G"G&$P  -Q/22
% / #Hs   
FFrL   rM   c                 r     dt         f fd} j                  j                  |      j                         S )rO   rM   c                  R    dt         ffd} t        j                   |              S )NrM   c                    K   j                   J d       t        j                  j                  j                        4 d {   } | j
                  j                  _        	  d {   j                   j
                  j                  _        cd d d       d {    S 7 `7 <7 	# j                   j
                  j                  _        w xY w# 1 d {  7  sw Y   y xY ww)Nz"httpx_client should be initializedrR   )r   rV   rW   r5   r6   r   rT   rU   )r,   rL   rH   s    rI   rY   zCLlamaExtract._run_in_thread.<locals>.run_coro.<locals>.wrapped_coro  s     &&28782 !,,;; ..   FLD&&66C%)z !.. **::G    * !.. **::G  sr   AC,B'C,
C'B-,B)-B-0%CC,!B+"C,)B-+C,-'CCC)C C)%C,rZ   r]   s    rI   r^   z-LlamaExtract._run_in_thread.<locals>.run_coro  s"     & ;;|~..rK   r_   rb   s   `` rI   rc   zLlamaExtract._run_in_thread  s1    	/! 	/.   ''188::rK   ri   rk   r{   c                    |H|j                   t        j                  k(  r1t        j                  d       t        j
                  |_         nt        }t        |t              r|}n,t        |t              r|j                         }nt        d      | j                  | j                  j                  j!                  | j"                  | j$                  t'        |||                  }t)        | j                  || j"                  | j$                  | j*                  | j,                  | j.                  | j0                  | j2                  	      S )a]  Create a new extraction agent.

        Args:
            name (str): The name of the extraction agent
            data_schema (SchemaInput): The data schema for the extraction agent
            config (Optional[ExtractConfig]): The extraction config for the agent

        Returns:
            ExtractionAgent: The created extraction agent
        z?ACCURATE extraction mode is deprecated. Using BALANCED instead.rm   )ri   rk   r{   )r.   r/   ro   	r,   r-   r.   r/   r0   r1   r2   r3   r4   )r)   r   ACCURATEr   r   BALANCEDDEFAULT_EXTRACT_CONFIGrp   rq   rs   r   rt   ru   rc   r   rv   create_extraction_agentr?   r@   r   r+   r0   r1   r2   r3   r4   )rH   ri   rk   r{   r-   s        rI   create_agentzLlamaExtract.create_agent'  s&     %%)=)==U *5)=)=&+Fk4(%KY/%779KM  ##,,DD++ $ 5 5* +! E 

 %%'' 11..((((,,LL

 
	
rK   rf   c                 4   ||t        j                  d       |r6| j                  | j                  j                  j                  |            }nN|rA| j                  | j                  j                  j                  || j                              }nt        d      t        | j                  || j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                   | j"                        S )a  Get extraction agents by name or extraction agent ID.

        Args:
            name (Optional[str]): Filter by name
            extraction_agent_id (Optional[str]): Filter by extraction agent ID

        Returns:
            ExtractionAgent: The extraction agent
        zJBoth name and extraction_agent_id are provided. Using extraction_agent_id.r   )ri   r.   z4Either name or extraction_agent_id must be provided.)r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   )r   r   rc   r   rv   get_extraction_agentget_extraction_agent_by_namer?   ru   r+   r@   r0   r1   r2   r3   r4   r5   r6   )rH   ri   rf   r-   s       rI   	get_agentzLlamaExtract.get_agenta  s
    >d.MM\ ''""00EE(* F E ''""00MM#// N E STT%%'' 11..((((,,LL;;,,
 	
rK   c                 n   | j                  | j                  j                  j                  | j                              }|D cg c]f  }t        | j                  || j                  | j                  | j                  | j                  | j                  | j                  | j                  	      h c}S c c}w )z%List all available extraction agents.)r.   r  )rc   r   rv   list_extraction_agentsr?   r+   r@   r0   r1   r2   r3   r4   )rH   agentsr-   s      rI   list_agentszLlamaExtract.list_agents  s    $$,,CC++ D 
$  
  ))++ $ 5 5#22 ,, ,,"00

 	
 
s   A+B2agent_idc                 n    | j                  | j                  j                  j                  |             y)zzDelete an extraction agent by ID.

        Args:
            agent_id (str): ID of the extraction agent to delete
        r  N)rc   r   rv   delete_extraction_agent)rH   r  s     rI   delete_agentzLlamaExtract.delete_agent  s5     	,,DD$, E 	
rK   )NNr9   r   r:   TNNTr   Fre   )NN))r   r   r   r   r#   r   r   __annotations__r   r0   r   r1   r2   r3   r   r4   r5   r   r6   r   r$   r   r   rG   r&   r?   r@   rJ   r   r   r'   rc   r   r   r+   r	  r  r   r  r  __classcell__)r   s   @rI   r   r     su   D%LMGSM&MNHcNQNC  ZK  W	K   "QM4  #OGT  #"CFHTN  &+ ?&M8E?  &1]M?2'2}L$4!,K#.&1mhsm3 "&"&"$()-!%)+=0#=0 3-=0 	=0
 =0 =0 =0 SM=0 "#=0 =0  =0 =0~;9S#q[#9 ;a ;@ +/	8
8
 !8
 '	8

 
8
x # 0
sm0
 SM0
 
	0
d
T/2 
.

S 

T 

rK   r   __main__)load_dotenvtestsdataz
test-agent)ri   objectrr   string)titlesummary)rr   
propertiesslidezconocophilips.pdf)Pr[   rE   r   ior   r   r   pathlibr   typingr   r   r	   r
   r   r   r   r   rV   pydanticr   llama_cloudr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   llama_cloud.clientr   "llama_cloud_services.extract.utilsr   r    llama_index.core.schemar!   llama_index.core.async_utilsr"    llama_index.core.bridge.pydanticr#   r$   llama_index.core.constantsr%   concurrent.futuresr&   r'   r   r   r   r   PER_DOCr  r  r+   r   r   dotenvr  __file__parentdata_dir	extractorr  r-   r   r	  r   r   r  rf   r   r   rK   rI   <module>r5     s    	  6 6  G G G       " / S 1 1 ? 7 1CL#tUN23	NDO34&#++(( wB wBtL
= L
^ z"MH~$$++g5>HI
###6 mmHw.1DDEG588$	'N-   

&& $h/ &1	


s   E% %!F	F	