
    @'h                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlZd d	lmZ d d
lmZmZ d dl m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 ee6e7ef   Z8dZ9dZ:dZ;dZ<de6dee6   dee6   de6fdZ= G d de6e	      Z> G d de'      Z?y)    N)asynccontextmanager)deepcopy)Enum)BufferedIOBase)PathPurePathPurePosixPath)AnyAsyncGeneratorDictListOptionalTupleUnion)urlparse)AbstractFileSystem)asyncio_runrun_jobs)FieldPrivateAttrfield_validator)DEFAULT_BASE_URL)BasePydanticReader)get_default_fs)Document)	JobResult)SUPPORTED_FILE_TYPES
ResultTypeParsingModeFailedPageModenest_asyncio_errnest_asyncio_msgmake_api_requestz
---
z./api/parsing/job/{job_id}/result/{result_type}z/api/parsing/job/{job_id}z/api/parsing/uploadbase_urlorganization_id
project_idreturnc           
          i }|r||d<   |r||d<   |r=| dz   dj                  |j                         D cg c]  \  }}| d|  c}}      z   S | S c c}}w )Nr%   r&   ?&=)joinitems)r$   r%   r&   query_paramskvs         f/home/kushmeetdev/Regenta/Chatbot/venv/lib/python3.12/site-packages/llama_cloud_services/parse/base.py	build_urlr2   -   sl     L*9&'%/\"#~ASASAU)VAQCq*)V WWWO *Ws   Ac                       e Zd ZdZdZdZdZy)BackoffPatternzBackoff pattern for polling.constantlinearexponentialN)__name__
__module____qualname____doc__CONSTANTLINEAREXPONENTIAL     r1   r4   r4   <   s    &HFKr@   r4   c                   (   e Zd ZU dZ eddd      Zeed<    eed      Z	eed	<    ed
d      Z
ee   ed<    ed
d      Zee   ed<    edd      Zeed<    eej                   d      Zeed<    edd      Zeed<    ed
d      Zeej*                     ed<    edd      Zeed<    edd      Zeed<    eddd d!"      Zeed#<    eej6                  d$      Zeed%<    edd&      Zeed'<    edd(      Zeed)<    edd*      Zeed+<    ed,d-      Z ee   ed.<    ed,d/      Z!ee   ed0<    ed,d1      Z"ee   ed2<    ed
d3      Z#ee   ed4<    ed,d5      Z$ee   ed6<    ed,d7      Z%ee   ed8<    ed
d9      Z&ee   ed:<    ed
d;      Z'ee   ed<<    ed
d=      Z(ee   ed><    ed
d?      Z)ee   ed@<    ed
dA      Z*ee   edB<    ed
dC      Z+ee   edD<    ed
dE      Z,ee-   edF<    ed
dG      Z.ee-   edH<    ed
dI      Z/ee-   edJ<    ed
dK      Z0ee-   edL<    ed,dM      Z1ee   edN<    ed,dO      Z2ee   edP<    ed,dQ      Z3ee   edR<    ed,dS      Z4ee   edT<    ed,dU      Z5ee   edV<    ed,dW      Z6ee   edX<    ed,dY      Z7ee   edZ<    ed,d[      Z8ee   ed\<    ed,d]      Z9ee   ed^<    ed,d_      Z:ee   ed`<    ed,da      Z;ee   edb<    ed,dc      Z<ee   edd<    ed,de      Z=ee   edf<    ed
dg      Z>ee   edh<    ed,di      Z?ee   edj<    ed
dk      Z@ee   edl<    ed,dm      ZAee   edn<    ed
do      ZBee-   edp<    ed
dq      ZCee-   edr<    edsdt      ZDee   edu<    ed
dv      ZEee   edw<    ed
dx      ZFee   edy<    ed,dz      ZGee   ed{<    ed
d|      ZHee   ed}<    ed
d~      ZIee   ed<    ed,d      ZJee   ed<    ed
d      ZKee-   ed<    ed
d      ZLee   ed<    ed
d      ZMee   ed<    ed
d      ZNee   ed<    ed
d      ZOeePeQef      ed<    ed,d      ZRee   ed<    ed
d      ZSee   ed<    ed,d      ZTee   ed<    ed
d      ZUeeV   ed<    ed
d      ZWee   ed<    ed
d      ZXee   ed<    ed,d      ZYee   ed<    ed,d      ZZee   ed<    ed,d      Z[ee   ed<    ed,d      Z\ee   ed<    ed,d      Z]ee   ed<    ed,d      Z^ee   ed<    ed,d      Z_ee   ed<    ed
d      Z`ee   ed<    ed
d      Zaee   ed<    ed
d      Zbee   ed<    ed
d      Zcee   ed<    ed,d      Zdee   ed<    ed
d      Zeee   ed<    ed
d      Zfee   ed<    ed
d      Zgee   ed<    ed
d      Zhee   ed<    ed
d      Ziee   ed<    ed
d      Zjee   ed<    ed
d      Zkee   ed<    ed
d      Zlee   ed<    ed
d      Zmee   ed<    ed,d¬      Znee   ed<    ed
dĬ      Zoee   ed<    ed,dƬ      Zpee   ed<    eddȬ      Zqee   ed<    ed,dʬ      Zree   ed<    esdddͫ      etdedefdЄ              Zu esd	ddͫ      etdedefdф              Zv ewd
d,ҫ      ZxePej*                  d
f   ed<   eydej*                  fdԄ       Zze{de|ej*                  d
f   fdՄ       Z}de~defdׄZde~defd؄Z	 	 dde~dee   dee   defd܄Zde-de-fdބZ	 dded%ed+edeeef   fdZ	 	 	 dde~dee   dee   d%ee   deeeeef   f   f
dZ	 	 	 dde~dee   dee   d+edee   f
dZ	 	 ddePee~   e~f   dee   dee   dee   fdZ	 	 ddePee~   e~f   dee   dee   dee   fdZ	 	 ddePee~   e~f   dee   dee   dePed   df   fdZ	 	 ddePee~   e~f   dee   dee   dePed   df   fdZ	 dde~dee   dee   fdZ	 ddePee~   e~f   dee   dee   fdZ	 ddePee~   e~f   dee   dee   fdZ	 ddePee~   e~f   dee   dee   fdZdee   dededee   fdZdee   dedee   fdZdee   dedee   fdZdee   dedee   fdZdee   dedee   fdZdee   dedee   fdZdee   dedee   fdZdee   dee   fdZy
)
LlamaParsezA smart-parser for files. z#The API key for the LlamaParse API.T)defaultdescriptionvalidate_defaultapi_keyz&The base URL of the Llama Parsing API.)rD   rE   r$   Nz+The organization ID for the LlamaParse API.r%   z&The project ID for the LlamaParse API.r&      z8The interval in seconds to check if the parsing is done.check_intervalzcControls the backoff pattern when retrying failed requests: 'constant', 'linear', or 'exponential'.backoff_pattern   zNMaximum interval in seconds between polling attempts when checking job status.max_check_intervalz2A custom HTTPX client to use for sending requests.custom_clientz?Whether or not to ignore and skip errors raised during parsing.ignore_errorsi  zAThe maximum timeout in seconds to wait for the parsing to finish.max_timeout   r      z>The number of workers to use sending API requests for parsing.)rD   gtltrE   num_workerszThe result type for the parser.result_typez*Show progress when parsing multiple files.show_progressz1Whether to split by page using the page separatorsplit_by_pagez-Whether to print the progress of the parsing.verboseFzNIf set to true, LlamaParse will try to detect long table and adapt the output.adaptive_long_tablez:Annotate links found in the document to extract their URL.annotate_linkszIf set to true, the parser will automatically select the best mode to extract text from documents based on the rules provide. Will use the 'accurate' default mode by default and will upgrade page that match the rule to Premium mode.	auto_modezA JSON string containing the configuration for the auto mode. If set, the parser will use the provided configuration for the auto mode.auto_mode_configuration_jsonzdIf auto_mode is set to true, the parser will upgrade the page that contain an image to Premium mode."auto_mode_trigger_on_image_in_pagezcIf auto_mode is set to true, the parser will upgrade the page that contain a table to Premium mode."auto_mode_trigger_on_table_in_pagezdIf auto_mode is set to true, the parser will upgrade the page that contain the text to Premium mode.!auto_mode_trigger_on_text_in_pagezdIf auto_mode is set to true, the parser will upgrade the page that match the regexp to Premium mode.#auto_mode_trigger_on_regexp_in_pagezAzure Openai API Versionazure_openai_api_versionzAzure Openai Deployment Nameazure_openai_deployment_namezAzure Openai Endpointazure_openai_endpointzAzure Openai Keyazure_openai_keyzThe bottom margin of the bounding box to use to extract text from documents expressed as a float between 0 and 1 representing the percentage of the page height.bbox_bottomzThe left margin of the bounding box to use to extract text from documents expressed as a float between 0 and 1 representing the percentage of the page width.	bbox_leftzThe right margin of the bounding box to use to extract text from documents expressed as a float between 0 and 1 representing the percentage of the page width.
bbox_rightzThe top margin of the bounding box to use to extract text from documents expressed as a float between 0 and 1 representing the percentage of the page height.bbox_topzaIf set to true, the parser will output compact markdown table (without trailing spaces in cells).compact_markdown_tablezhParse documents continuously, leading to better results on documents where tables span across two pages.continuous_modezbDisable the OCR on the document. LlamaParse will only extract the copyable text from the document.disable_ocrz]If set to true, the parser will not extract images from the document. Make the parser faster.disable_image_extractionzIf set to true, the document will not be cached. This mean that you will be re-charged it you reprocess them as they will not be cached.do_not_cachezIf set to true, the parser will keep column in the text according to document layout. Reduce reconstruction accuracy, and LLM's/embedings performances in most case.do_not_unroll_columnszEIf set to true, the parser will extract/tag charts from the document.extract_chartszgIf set to true, the parser will extract the layout information of the document. Cost 1 credit per page.extract_layoutzNote: Non compatible with gpt-4o. If set to true, the parser will use a faster mode to extract text from documents. This mode will skip OCR of images, and table/heading reconstruction.	fast_modez2Whether to guess the sheet names of the xlsx file.guess_xlsx_sheet_nameszmIf set to true, when parsing HTML the parser will consider all elements display not element as display block.html_make_all_elements_visiblezgIf set to true, when parsing HTML the parser will remove fixed elements. Useful to hide cookie banners.html_remove_fixed_elementszsIf set to true, when parsing HTML the parser will remove navigation elements. Useful to hide menus, header, footer.html_remove_navigation_elementszX(optional) If set with input_url will use the specified http proxy to download the file.
http_proxyznIf set to true, the parser will ignore document elements for layout detection and only rely on a vision model.-ignore_document_elements_for_layout_detectionz@The region of the input S3 bucket if input_s3_path is specified.input_s3_regionzIf set to true, the cache will be ignored and the document re-processes. All document are kept in cache for 48hours after the job was completed to avoid processing the same document twice.invalidate_cachezjThe extra time in seconds to wait for the parsing to finish per page. Get added to job_timeout_in_seconds.*job_timeout_extra_time_per_page_in_secondszThe maximum timeout in seconds to wait for the parsing to finish. Override default timeout of 30 minutes. Minimum is 120 seconds.job_timeout_in_secondsenz"The language of the text to parse.languagezeThe separator to use to split the header of the markdown table into multiple lines. Default is: <br/>)markdown_table_multiline_header_separatorzThe maximum number of pages to extract text from documents. If set to 0 or not set, all pages will be that should be extracted will be extracted (can work in combination with targetPages).	max_pagesz\If set to true, the parser will also output a PDF of the document. (except for spreadsheets)output_pdf_of_documentzAn S3 path prefix to store the output of the parsing job. If set, the parser will upload the output to S3. The bucket need to be accessible from the LlamaIndex organization.output_s3_path_prefixzHThe AWS region of the output S3 bucket defined in output_s3_path_prefix.output_s3_regionzFIf set to true, the parser will output tables as HTML in the markdown.output_tables_as_HTMLzThe error tolerance for the number of pages with error in a doc (percentage express as 0-1). If we fail to parse a greater percentage of pages than the tolerance value we fail the job.page_error_tolerancezA templated prefix to add to the beginning of each page. If it contain `{page_number}`, it will be replaced by the page number.page_prefixzA templated  page separator to use to split the text.  If it contain `{page_number}`,it will be replaced by the next page number. If not set will the default separator '\n---\n' will be used.page_separatorzA templated suffix to add to the beginning of each page. If it contain `{page_number}`, it will be replaced by the page number.page_suffixzBThe parsing mode to use, see ParsingMode enum for possible values 
parse_modez(Use our best parser mode if set to True.premium_modezThe preset to use for the parser. If set, the parser will use the preset configuration. See LlamaParse documentation for available presets. Preset override most other parameters.presetz1Preserve grid alignment across page in text mode.&preserve_layout_alignment_across_pageszThe mode to use to replace the failed page, see FailedPageMode enum for possible value. If set, the parser will replace the failed page with the specified mode. If not set, the default mode (raw_text) will be used.replace_failed_page_modezYA prefix to add before error message in failed pages. If not set, no prefix will be used.-replace_failed_page_with_error_message_prefixzXA suffix to add after error message in failed pages. If not set, no suffix will be used.-replace_failed_page_with_error_message_suffixzlIf set to true, the parser will ignore diagonal text (when the text rotation in degrees modulo 90 is not 0).skip_diagonal_textzvIf set to true, the parser will extract sub-tables from the spreadsheet when possible (more than one table per sheet).spreadsheet_extract_sub_tableszfIf set to true, the parser will fail if it can't extract text from a document because of a buggy font.strict_mode_buggy_fontzTIf set to true, the parser will fail if it can't extract an image from the document.strict_mode_image_extractionzPIf set to true, the parser will fail if it can't OCR an image from the document.strict_mode_image_ocrzdIf set to true, the parser will fail if it can't reconstruct a table or a heading from the document.strict_mode_reconstructionzYIf set to true, the parser will output structured data based on the provided JSON Schema.structured_outputzA JSON Schema to use to structure the output of the parsing job. If set, the parser will output structured data based on the provided JSON Schema.structured_output_json_schemazThe named JSON Schema to use to structure the output of the parsing job. For convenience / testing, LlamaParse provides a few named JSON Schema that can be used directly. Use 'imFeelingLucky' to let llamaParse dream the schema."structured_output_json_schema_namezPThe system prompt. Replace llamaParse default system prompt, may impact accuracysystem_promptz*String to append to default system prompt.system_prompt_appendz8Whether to take screenshot of each page of the document.take_screenshotzThe target pages to extract text from documents. Describe as a comma separated list of page numbers. The first page of the document is page 0target_pagesz7The user prompt. Replace llamaParse default user promptuser_promptz#The API key for the multimodal API.vendor_multimodal_api_keyz-The model name for the vendor multimodal API.vendor_multimodal_model_namez<A URL that needs to be called at the end of the parsing job.webhook_urlzoThe bounding box to use to extract text from documents describe as a string containing the bounding box marginsbounding_boxzThe complemental formatting instruction for the parser. Tell llamaParse how some thing should to be formatted, while retaining the markdown output.#complemental_formatting_instructionzfThe content guideline for the parser. Tell LlamaParse how the content should be changed / transformed.content_guideline_instructionzThe Formatting instruction for the parser. Override default llamaParse behavior. In most case you want to use complemental_formatting_instruction instead.formatting_instructionz2Whether to use gpt-4o extract text from documents.
gpt4o_modez;The API key for the GPT-4o API. Lowers the cost of parsing.gpt4o_api_keyzcAllow the parsing instruction to also format the output. Disable to have a cleaner markdown output.is_formatting_instructionz'The parsing instruction for the parser.parsing_instructionz)Whether to use the vendor multimodal API.use_vendor_multimodal_modelbefore)modecheck_fieldsr0   r'   c                 V    |s&ddl } |j                  dd      }|t        d      |S |S )zValidate the API key.r   NLLAMA_CLOUD_API_KEYzThe API key is required.)osgetenv
ValueError)clsr0   r   rG   s       r1   validate_api_keyzLlamaParse.validate_api_key  s8     bii 5t<G !;<<Nr@   c                 J    t        j                  dd      }|xs
 |xs t        S )zValidate the base URL.LLAMA_CLOUD_BASE_URLN)r   r   r   )r   r0   urls      r1   validate_base_urlzLlamaParse.validate_base_url  s&     ii.5+a+++r@   )rD   init_aclientc                 8   | j                   s'| j                  xs t        j                         | _         | j                  | j                   _        d| j
                   | j                   j                  d<   | j                  | j                   _        | j                   S )NzBearer Authorization)	r   rM   httpxAsyncClientr$   rG   headersrO   timeout)selfs    r1   aclientzLlamaParse.aclient  sq    }} ..E%2C2C2EDM "&3:4<<.1Io. $ 0 0}}r@   c                   K   | j                   | j                    yt        j                  | j                        4 d{   }| ddd      d{    y7 7 # 1 d{  7  sw Y   yxY ww)z&Create a context for the HTTPX client.Nr   )rM   r   r   rO   )r   clients     r1   client_contextzLlamaParse.client_context  sj      )$$$((1A1AB  f      sH   AA7AA7A"A7A A7 A7"A4(A+)A40A7	file_pathc                     t        |t              sy	 t        |      }t        |j                  dv |j
                  |j                  g      S # t        $ r Y yw xY w)zCheck if the input is a valid URL.

        This method checks for:
        - Proper URL scheme (http/https)
        - Valid URL structure
        - Network location (domain)
        F)httphttps)
isinstancestrr   allschemenetloc	Exception)r   r   results      r1   _is_input_urlzLlamaParse._is_input_url  sa     )S)
	i(FMM%66MMMM   		s   8A 	AAc                 F    t        |t              r|j                  d      S y)zoCheck if the input is a valid URL.

        This method checks for:
        - Proper S3 scheme (s3://)
        zs3://F)r   r   
startswith)r   r   s     r1   
_is_s3_urlzLlamaParse._is_s3_url  s"     i%''00r@   
file_input
extra_infofsc                   K   d }d }| j                  |      r|nd }| j                  |      r|nd }t        |t        t        f      r6|rd|vrt        d      |d   }t        j                  |      d   }	d|||	fi}n|d }n|d }nt        |t        t        t        t        f      rt        |      }
t        j                  j                  |
      d   j                         }|t         vrt#        dt          d|       t        j                  |
      d   }	|xs
 t%               }|j'                  |d      }dt        j                  j)                  |
      ||	fi}nt        d	      i }d
|d<   | j*                  r| j*                  |d<   | j,                  r| j,                  |d<   | j.                  r| j.                  |d<   | j0                  | j0                  |d<   | j2                  r| j2                  |d<   | j4                  r| j4                  |d<   | j6                  | j6                  |d<   | j8                  | j8                  |d<   | j:                  | j:                  |d<   | j<                  | j<                  |d<   | j>                  | j>                  |d<   | j@                  | j@                  |d<   | jB                  | jB                  |d<   | jD                  | jD                  |d<   | jF                  | jF                  |d<   | jH                  | jH                  |d<   | jJ                  r| jJ                  |d<   | jL                  rtO        d       | jL                  |d<   | jP                  rtO        d       | jP                  |d <   | jR                  r| jR                  |d!<   | jT                  r| jT                  |d"<   | jV                  r| jV                  |d#<   | jX                  r| jX                  |d$<   | jZ                  r| jZ                  |d%<   | j\                  r| j\                  |d&<   | j^                  r| j^                  |d'<   | j`                  r| j`                  |d(<   | jb                  rtO        d)       | jb                  |d*<   | jd                  r| jd                  |d+<   | jf                  r| jf                  |d,<   | jh                  r| jh                  |d-<   | jj                  r| jj                  |d.<   | jl                  | jl                  |d/<   | jn                  r| jn                  |d0<   |d }t        |      |d1<   |d }t        |      |d2<   | jp                  | jp                  |d3<   | jr                  r| jr                  |d4<   | jt                  rtO        d)       | jt                  |d5<   | jv                  | jv                  |d6<   | jx                  | jx                  |d7<   | jz                  r| jz                  |d8<   | j|                  | j|                  |d9<   | j~                  r| j~                  |d:<   | j                  | j                  |d;<   | j                  | j                  |d<<   | j                  r| j                  |d=<   | j                  | j                  |d><   | j                  | j                  |d?<   | j                  | j                  |d@<   | j                  | j                  |dA<   | j                  rtO        dB       | j                  |dC<   | j                  r| j                  |dD<   | j                  r| j                  |dE<   | j                  r| j                  |dF<   | j                  | j                  |dG<   | j                  | j                  j                  |dH<   | j                  | j                  |dI<   | j                  | j                  |dJ<   | j                  r| j                  |dK<   | j                  r| j                  |dL<   | j                  r| j                  |dM<   | j                  r| j                  |dN<   | j                  r| j                  |dO<   | j                  r| j                  |dP<   | j                  r| j                  |dQ<   | j                  | j                  |dR<   | j                  | j                  |dS<   | j                  | j                  |dT<   | j                  | j                  |dU<   | j                  r| j                  |dV<   | j                  | j                  |dW<   | j                  | j                  |dX<   | j                  r| j                  |dY<   | j                  | j                  |dZ<   | j                  | j                  |d[<   | j                  | j                  |d\<   | j                  | j                  |d]<   | j                  | j                  |d^<   | j                  r| j                  |d_<   | j                  | j                  |d`<   	 t        t        | j                  | j                        }t        | j                  da|| j                  ||b       d {   }|j                          |j                         dc   ||j                          S S 7 :# t        j                  $ r*}dd|j                  j                   }t#        |      |d }~ww xY w# ||j                          w w xY ww)eN	file_name;file_name must be provided in extra_info when passing bytesr   filerH   z8Currently, only the following file types are supported: z
Current file type: rbzJfile_input must be either a file path string, file bytes, or buffer objectTfrom_python_packagerY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   zWARNING: complemental_formatting_instruction is deprecated and may be remove in a future release. Use system_prompt, system_prompt_append or user_prompt instead.r   zWARNING: content_guideline_instruction is deprecated and may be remove in a future release. Use system_prompt, system_prompt_append or user_prompt instead.r   rj   rk   rl   rm   rn   ro   rp   rq   zWARNING: formatting_instruction is deprecated and may be remove in a future release. Use system_prompt, system_prompt_append or user_prompt instead.r   rr   rs   rt   ru   rv   rw   	input_urlinput_s3_pathrx   ry   r   rz   r{   r}   r   r   r   r   r   r   r   r   r   zkWARNING: parsing_instruction is deprecated. Use system_prompt, system_prompt_append or user_prompt instead.r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r~   r   r   r   POST)r   filesdataidzFailed to parse the file: )tr   r   r   bytesr   r   	mimetypes
guess_typer   r   r	   r   r   pathsplitextlowerr   r   r   openbasenamerY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   r   printr   rj   rk   rl   rm   rn   ro   rp   rq   r   rr   rs   rt   ru   rv   rw   rx   ry   r   rz   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   valuer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r~   r   r   r   r2   JOB_UPLOAD_ROUTEr%   r&   r#   r   rO   raise_for_statusjsoncloser   HTTPStatusErrorresponsetext)r   r   r   r   r   file_handler   r   r   	mime_typer   file_extr   r   resperrmsgs                    r1   _create_jobzLlamaParse._create_job  s     "&"4"4Z"@Jd	&*ooj&A
tj5."9:J!> Q  #;/I!,,Y7:IiY?@E"E&E
S$x$HIJIww''	215;;=H33NOcNd e**25  ",,Y7:I '~'B''*d3Kbgg..y9;	RSE\   "&*"###*.*B*BD&'%)%8%8D!">> $D,,8373T3TD/022 77 4 22 77 4 11= 66 3 33? 88 5 ((4/3/L/LD+,,,8373T3TD/0%%1,0,F,FD()  ,'+'<'<D#$'"&"2"2D>>% $D??&!%D==$#}}D&&-1-H-HD)*33 t
 88 5 -- n 594V4VD01&*&:&:D"#"&"2"2D((/3/L/LD+,#'#4#4D %%,0,F,FD()%)%8%8D!"%)%8%8D!">> $D&& g .2-H-HD)*&&-1-H-HD)*..595X5XD12**151P1PD-.// 44 1 ??&!%D== BB ?  E #ID$E$'$6D!+&*&:&:D"#  '+'<'<D#$)) g 150N0ND,-::F ?? < &&2-1-H-HD)*==#}}D>>% $D&&-1-H-HD)*%%1,0,F,FD()  ,'+'<'<D#$%%,0,F,FD()$$0+/+D+DD'('"&"2"2D *%)%8%8D!"'"&"2"2D##} +/*B*BD&'??!%D#'#4#4D 66 ;; 8 ;;"![[DN((4/3/L/L/R/RD+,==I BB ? ==I BB ? "")-)@)@D%&..595X5XD12&&-1-H-HD)*,,373T3TD/0%%,0,F,FD()**151P1PD-.!!(,(>(>D$%--9484V4VD0122> 77 4 )$($6$6D!$$0+/+D+DD'(&*&:&:D"#(#'#4#4D '"&"2"2D++262R2RD./))5040N0ND,-,,8373T3TD/0'"&"2"2D99E >> ;
 (#'#4#4D ??!%D)$($6$6D!
	$,d.B.BDOOTC)$,,TM]M]ejquvvD!!#99;t$
 &!!# ' w $$ 	*.s||/@/@.ABCC.c)	* &!!# 'sI   h&l)Aj3 6j17&j3 l1j3 3k0%k++k00k3 3llcurrent_intervalc                 @   | j                   t        j                  k(  r|S | j                   t        j                  k(  r"t	        |dz   t        | j                              S | j                   t        j                  k(  r"t	        |dz  t        | j                              S |S )zCalculate the next backoff interval based on the backoff pattern.

        Args:
            current_interval: The current interval in seconds

        Returns:
            The next interval in seconds
        rH      )rJ   r4   r<   r=   minfloatrL   r>   )r   r   s     r1   _calculate_backoffzLlamaParse._calculate_backoffp  s     >#:#::##!!^%:%::'!+U43J3J-KLL!!^%?%??'!+U43J3J-KLLr@   job_idc           	        K   t        j                          }d}d}t        | j                        }| j                  }	 	 t	        j
                  |       d {    |dz  }|j                  t        j                  |             d {   }	|	j                          |	j                         }
|
d   }|dk(  r>|j                  t        j                  ||             d {   }|j                         S |dk(  r^t        j                          }||z
  | j                  kD  rt        d	|       |r|d
z  dk(  rt        ddd       | j                  |      }n=|
j                  dd      }|
j                  dd      }d| d| d| d| }t        |      	 [7 B7 7 # t         j"                  t         j$                  t         j&                  t         j(                  t         j*                  t         j,                  t         j.                  f$ rp}|dz  }t        j                          }||z
  | j                  kD  rt        d	|       ||r|d
z  dk(  rt        d| dd       | j                  |      }Y d }~d }~ww xY ww)Nr   TrH   )r  statusSUCCESS)r  rU   PENDINGz Timeout while parsing the file: 
   .rC   )endflush
error_codezNo error code founderror_messagezNo error message foundzJob ID: z failed with status: z, Error code: z, Error message: zHTTP error: z...)r	  )timer   rI   r   asynciosleepgetJOB_STATUS_ROUTEformatr   r   JOB_RESULT_URLrO   r   r   r   r   ConnectError	ReadError
WriteErrorConnectTimeoutReadTimeoutWriteTimeoutr   )r   r  rU   rX   starttrieserror_countr   r   r   result_jsonr  parsed_resultr  r
  r  exception_strr   s                     r1   _get_job_resultzLlamaParse._get_job_result  s     		"'(;(;"<
 2Mmm$4555
%zz*:*A*A*A*PQQ'')$kkm$X.Y&*0**&--V-U+ %M )--//y())+CU{T%5%55'*J6((STT52:?cr6'+'>'>?O'P$!,?T!UJ$/OO')A%M #6(*?x H''1l2CM?T " $M22 %) 5Q%, ""  $$!!""%% M q iik;!1!11#:6(C urzQ&se3/" $(#:#:;K#L )Mst   ;JF  F1F  F	AF   F!F  4J5B F  JF  F  F   A.I>A&I94J9I>>Jc                   K   | j                  |||       d{   }| j                  rt        d|z         | j                  ||xs | j                  j
                  | j                         d{   }||fS 7 `7 
w)z-Create one parse job and wait for the result.r   r   Nz(Started parsing the file under job_id %s)rX   )r   rX   r   r  rU   r   )r   r   r   r   rU   r  r   s          r1   
_parse_onezLlamaParse._parse_one  s      ''	jR'PP<<<vEF++K94#3#3#9#94<< , 
 
 v~ Q
s"   A?A;AA?2A=3	A?=A?c                 r  K   	 | j                  |||       d{   \  }}t        || j                  j                     |xs i       g}| j                  r| j                  |      S |S 7 O# t        $ r>}t        |t              r|nd}	t        d|	 d|       | j                  rg cY d}~S |d}~ww xY ww)Load data from the input path.r!  Nr   metadata<bytes/buffer>Error while parsing the file '':)r"  r   rU   r   rW   _get_sub_docsr   r   r   r   rN   )
r   r   r   r   rX   _job_idr   docse	file_reprs
             r1   _aload_datazLlamaParse._aload_data  s     	$(OOjR %4 % OGV  0 0 6 67'-2D !!))$//  	%/	3%?	EUI29+R@!D!!		sW   B7A- A+AA- (B7)A- *B7+A- -	B461B/'B4(B7-B//B44B7c           
      \  K   t        |t        t        t        t        t
        f      r'| j                  |||| j                         d{   S t        |t              r|D cg c]0  }| j                  |||| j                  xr | j                         2 }}	 t        || j                  d| j                         d{   }|D cg c]  }|D ]  }|  c}}S t        d      7 c c}w 7 /c c}}w # t        $ r'}	t        t        |	      v rt        t              |	d}	~	ww xY ww)r$  )r   r   rX   NParsing filesworkersdescrV   :The input file_path must be a string or a list of strings.)r   r   r	   r   r   r   r/  rX   listrV   r   rT   RuntimeErrorr!   r"   r   )
r   r   r   r   fjobsresultssublistitemr-  s
             r1   
aload_datazLlamaParse.aload_data  s@     i#}dE>!RS))jR *    	4( #    ) LLC1C1C-C	 ! D  ( ,,("&"4"4	!  -4HHHHH L ; I #s1v-&'788G	sl   AD,
C*D,#5C,D,&C9 C1	C9 C3C9 D,,D,1C9 3C9 9	D)"D$$D))D,c                     	 t        | j                  |||            S # t        $ r'}t        t	        |      v rt        t
              |d}~ww xY w)r$  r   N)r   r=  r7  r!   r   r"   r   r   r   r   r-  s        r1   	load_datazLlamaParse.load_data  sN    	ty*LMM 	3q6)"#344		    	A"A

Ar   c                   K   t        |t        t        t        t        t
        f      rt        |t        t
        f      r|rd|vrt        d      |d   }nt        |      }| j                  |||t        j                  j                         d{   \  }}t        |||| j                  | j                  | j                  | j                  xs t               S t        |t"              r*g }|D ]X  }t        |t        t
        f      r&|rd|vrt        d      |j%                  |d          ?|j%                  t        |             Z 	 t'        |D cg c]/  }| j                  |||t        j                  j                        1 c}| j(                  d| j*                         d{   }	t-        |	      D 
cg c]M  \  }
\  }}t        |||
   || j                  | j                  | j                  | j                  xs t               O c}}}
S t        d      7 c c}w 7 }c c}}}
w # t.        $ r'}t0        t        |      v rt/        t2              |d}~ww xY ww)	az  
        Parse the file and return a JobResult object instead of Document objects.

        This method is similar to aload_data but returns JobResult objects that provide
        direct access to the various output formats (text, markdown, json, etc.)

        Args:
            file_path: Path to the file to parse. Can be a string, path, bytes, file-like object, or a list of these.
            extra_info: Additional metadata to include in the result.
            fs: Optional filesystem to use for reading files.

        Returns:
            JobResult object or list of JobResult objects if multiple files were provided
        r   r   )r   r   rU   N)r  r   
job_resultrG   r$   r   r   zGetting job resultsr2  r5  )r   r   r	   r   r   r   r   r"  r   JSONr   r   rG   r$   r   r   _DEFAULT_SEPARATORr6  appendr   rT   rV   	enumerater7  r!   r"   )r   r   r   r   r   r  rD  
file_namesr8  job_resultsir-  s               r1   aparsezLlamaParse.aparse)  s`    * i#}dE>!RS)e^%<=![
%B$U  '{3		N	'+%&OO11	 (7 ( "FJ #%||#22H6H  	4(J .a%!89%J)F(Y  %%j&=>%%c!f-."$, "+  '1!(2(=(=	 (  !,,."&"4"4% 4 4=[3I  0/FJ %",Q-#- $!%#||'+':':'P>P & L A"<    #s1v-&'788G	sh   BI$H B9I$	H1 4H#"H1 *H(+H1 >AH*H1 I$#H1 *H1 1	I!:"II!!I$c                     	 t        | j                  |||            S # t        $ r'}t        t	        |      v rt        t
              |d}~ww xY w)ay  
        Parse the file and return a JobResult object instead of Document objects.

        This method is similar to load_data but returns JobResult objects that provide
        direct access to the various output formats (text, markdown, json, etc.)

        Args:
            file_path: Path to the file to parse. Can be a string, path, bytes, file-like object, or a list of these.
            extra_info: Additional metadata to include in the result.
            fs: Optional filesystem to use for reading files.

        Returns:
            JobResult object or list of JobResult objects if multiple files were provided
        r?  N)r   rL  r7  r!   r   r"   r@  s        r1   parsezLlamaParse.parse  sN    (	t{{9jR{HII 	3q6)"#344		rB  c                 j  K   	 | j                  ||t        j                  j                         d{   \  }}||d<   t	        |t
        t        f      st        |      |d<   |gS 7 3# t        $ r>}t	        |t              r|nd}t        d| d|       | j                  rg cY d}~S |d}~ww xY ww)r$  )r   rU   Nr  r   r'  r(  r)  )r"  r   rE  r   r   r   r   r   r   r   rN   )r   r   r   r  r   r-  r.  s          r1   
_aget_jsonzLlamaParse._aget_json  s     	#'??%&OO11 $3 $ NFF
  &F8i%)@A&))n{#8O  	%/	3%?	EUI29+R@!D!!		sJ   B3/A) A'2A) &B3'A) )	B021B+#B0$B3)B++B00B3c                   K   t        |t        t        f      r| j                  ||       d{   S t        |t              re|D cg c]  }| j                  ||       }}	 t        || j                  d| j                         d{   }|D cg c]  }|D ]  }|  c}}S t        d      7 c c}w 7 /c c}}w # t        $ r'}t        t        |      v rt        t              |d}~ww xY ww)r$  )r   Nr1  r2  r5  )r   r   r   rP  r6  r   rT   rV   r7  r!   r"   r   )	r   r   r   r8  r9  r:  r;  r<  r-  s	            r1   	aget_jsonzLlamaParse.aget_json  s      i#t-zJJJ	4(GPQ!DOOA*O=QDQ ( ,,("&"4"4	!  -4HHHHH L ' KQ I #s1v-&'788G	si   -C6B4C6B6"C6%&C B;	C B=%C (C66C6;C =C 	C3"C..C33C6c                     	 t        | j                  ||            S # t        $ r'}t        t	        |      v rt        t
              |d}~ww xY w)zParse the input path.N)r   rR  r7  r!   r   r"   )r   r   r   r-  s       r1   get_json_resultzLlamaParse.get_json_result  sJ    	t~~iDEE 	3q6)"#344		    	A"AAc                 &    | j                  ||      S )r$  )rT  )r   r   r   s      r1   get_jsonzLlamaParse.get_json  s     ##Iz::r@   json_resultdownload_path	asset_keyc                   K   t         j                  j                  |      st        j                  |       | j                  }	 g }|D ]R  }|d   }|d   D ]A  }| j
                  r(t        d|j                          d|d    d||           ||   D ]  }	|	d   }
t         j                  j                  || d|
       }|j                  d	      s|j                  d
      s|d	z  }||	d<   ||	d<   |j                  dd      |	d<   |d   |	d<   t        |d      5 }| j                   d| d|
 }t        |d|| j                         d{   }|j                          |j!                  |j"                         ddd       |j%                  |	        D U |S 7 S# 1 sw Y   'xY w# t&        $ r*}t        d| d|       | j(                  rg cY d}~S |d}~ww xY ww)z:Download assets (images or charts) from the parsed result.r  pagesz> z
 for page pagez: name-z.pngz.jpgr   r   Noriginal_file_pathpage_numberwb/api/parsing/job/z/result/image/GETr   zError while downloading z from the parsed result:)r   r   existsmakedirsr   rX   r   
capitalizer,   endswithr  r   r$   r#   rO   r   writecontentrG  r   rN   )r   rX  rY  rZ  r   assetsr   r  r]  asset
asset_name
asset_pathr8  	asset_urlr   r-  s                   r1   aget_assetszLlamaParse.aget_assets  s    
 ww~~m,KK&)	F%  -)"7O -D|| !5!5!7 8
4<.PRSWXaSbRcd "&i -%*6]
 &(WW\\)fXQzl+C&

  *226:#-#6#6v#> *f 4
(2f*0h6<jjd6S23/3F|m,!*d3 2q+/==/9J6(R`ak`l(mI)9 &y$BRBR* $D !113GGDLL12 e,3-- -B M$2 2  	,YK7OPRST!!		sg   AG$CF. 1F"	F /F"	<#F. G$ F"	"F+'F. .	G!7GG!G$GG!!G$c                    K   	 | j                  ||d       d{   S 7 # t        $ r&}t        d|       | j                  rg cY d}~S |d}~ww xY ww)'Download images from the parsed result.imagesNzError while downloading images:rp  r   r   rN   r   rX  rY  r-  s       r1   aget_imageszLlamaParse.aget_images(  S     	))+}hOOOO 	3Q7!!		B   A! ! A! 	AAAA	AAAc                    K   	 | j                  ||d       d{   S 7 # t        $ r&}t        d|       | j                  rg cY d}~S |d}~ww xY ww)'Download charts from the parsed result.chartsNzError while downloading charts:rt  ru  s       r1   aget_chartszLlamaParse.aget_charts5  rw  rx  c                     	 t        | j                  ||            S # t        $ r'}t        t	        |      v rt        t
              |d}~ww xY w)rr  N)r   rv  r7  r!   r   r"   ru  s       r1   
get_imageszLlamaParse.get_imagesB  L    	t//]KLL 	3q6)"#344		rU  c                     	 t        | j                  ||            S # t        $ r'}t        t	        |      v rt        t
              |d}~ww xY w)rz  N)r   r|  r7  r!   r   r"   ru  s       r1   
get_chartszLlamaParse.get_chartsL  r  rU  c                   K   t         j                  j                  |      st        j                  |       | j                  }	 g }|D ]  }|d   }| j
                  rt        d       t         j                  j                  || d      }i }||d<   ||d<   |j                  dd      |d<   t        |d      5 }	| j                   d	| d
}
t        |d|
| j                         d{   }|j                          |	j                  |j                         ddd       |j!                  |        |S 7 L# 1 sw Y    xY w# t"        $ r&}t        d|       | j$                  rg cY d}~S |d}~ww xY ww)%Download xlsx from the parsed result.r  z> XLSXz.xlsxr   r   Nr`  rb  rc  z/result/raw/xlsxrd  r   zError while downloading xlsx:)r   r   re  rf  r   rX   r   r,   r  r   r$   r#   rO   r   ri  rj  rG  r   rN   )r   rX  rY  r   	xlsx_listr   r  	xlsx_pathxlsxr8  xlsx_urlresr-  s                r1   	aget_xlsxzLlamaParse.aget_xlsxV  sl    
 ww~~m,KK& 	I% ')<<(OGGLL6(%8HI	(V!'X-3ZZT-J)*)T* )a==/):6(BRS  !1x9I9I! C ((*GGCKK()   &-'. 	) )  	115!!		sg   AE3A3E 7/D5&D3'/D5E 2E33D55D>	:E 	E0
E+#E0$E3)E++E00E3c                     	 t        | j                  ||            S # t        $ r'}t        t	        |      v rt        t
              |d}~ww xY w)r  N)r   r  r7  r!   r   r"   ru  s       r1   get_xlsxzLlamaParse.get_xlsx  sJ    	t~~k=IJJ 	3q6)"#344		rU  r,  c                     g }| j                   xs t        }|D ]U  }|j                  j                  |      }|D ]3  }t	        |t        |j                              }|j                  |       5 W |S )z$Split docs into pages, by separator.r%  )r   rF  r   splitr   r   r&  rG  )r   r,  sub_docs	separatordoc
doc_chunks	doc_chunksub_docs           r1   r*  zLlamaParse._get_sub_docs  sv    ''=+=	 	)C	2J' )	""%cll3 ()	) r@   )NN)F)NNN)NNF)N)r8   r9   r:   r;   r   rG   r   __annotations__r   r$   r%   r   r&   rI   intr4   r=   rJ   rL   rM   r   r   rN   boolrO   rT   r   TXTrU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   r   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   classmethodr   r   r   r   propertyr   r   r   r   	FileInputr   r   dictr   r   r   r   r
   r  r   r"  r   r   r/  r=  rA  rL  rN  rP  rR  rT  rW  rp  rv  r|  r~  r  r  r  r*  r?   r@   r1   rB   rB   D   s   # 9GS 
  <Hc  &+A&OXc]  !&<!J   NNC 
 ',%%y'O^  $d 
 27"V2M8E--.   UM4  WK  T	K  $,MK   "NM4   GM4  "QGT 
 +0d+$  &+P&NHTN  !& !Ix~  38 ^3 (3-  :?z:&  :?y:&  8=z8%x}  :?z:'#  /4"</hsm  38"@3 (3-  ,1"9,8C=  ',"4'hsm  $) w$K%  "' t"Ix  #( u#J  !& t!Hhuo  .3w.HTN  ',~'OXd^  #(x#K$  05s0htn  $) _$L(4.  -2 {-8D>  &+[&NHTN  &+}&NHTN  !& O!Ix~ 
 .3H.HTN  6; D6"HTN  27}2  7< J7#Xd^  !&n!J  EJ EE18D>  &+V&OXc]  (- S(htn  CH AC.  /4 X/HUO  $"FHhsm  @E{@-x}   % S Ix}  .3r.HTN  ,1 D,8C=  ',^'hsm  -2\-8D>  -2 O-(5/  "' V"K#  %* X%NHSM  "' V"K#  5:X5J{C/01  $)>$L(4.  " IFHSM  >CG>*HTN  :? m:h~6  DIoD18C=  DInD18C=  */ C*  6; M6"HTN 
 .3|.HTN 
 49j4 (4. 
 -2f-8D> 
 27z2 
 ).o)x~  49 i4!8C=  9> z9&  $)f$M8C=  +0@+(3-  ',N'OXd^  #( d#L(3-  "'M"K#  0590x}  38C3 (3-  "'R"K#  #( F#L(3-  :? j:'#  49|4!8C=  -2 q-HSM  "'H"J  $)Q$M8C=  16y1x~  */ I*#  38?3$ 
 YXDA
 
 
  B
 ZhTB,# ,# ,  C,
 0;4e/THeE%%t+,T**    nU5F5F5L&M  y T ,I $  &*+/	T$T$ TNT$ '(	T$
 
T$l
 5  U  $ >C?M?M(+?M6:?M	c3h?MH &*+/%) TN '(	
 c] 
sDcN"	#& &*+/ TN '(	
  
hD &*+/	'i)34' TN' '(	'
 
h'X &*+/	i)34 TN '(	
 
h$ &*+/	ai)34a TNa '(	a
 
tK +-	.aL &*+/	i)34 TN '(	
 
tK +-	.: BF"08	d6 &*i)34 TN 
d	B &*i)34 TN 
d	" &*;i)34; TN; 
d	;2:2692FI2	d2h:69	d:69	dd4j  d d4j  d (:(69(	d(TDJ s tDz $x. T(^ r@   rB   )@r  r   r   r  
contextlibr   copyr   enumr   ior   pathlibr   r   r	   typingr
   r   r   r   r   r   r   urllib.parser   r   fsspecr   llama_index.core.async_utilsr   r    llama_index.core.bridge.pydanticr   r   r   llama_index.core.constantsr   llama_index.core.readers.baser   "llama_index.core.readers.file.baser   llama_index.core.schemar    llama_cloud_services.parse.typesr    llama_cloud_services.parse.utilsr   r   r   r    r!   r"   r#   r   r   r  rF  r  r  r   r2   r4   rB   r?   r@   r1   <module>r     s      	  *    1 1 J J J !  % > P P 7 < = , 6   #un,-	  B. ( $,SM?G} S$  S# Sr@   