
    'hj                       d dl mZ d dlZd dlZd dlmZmZmZ d dlm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZmZ d dlmZ  ej<                  e      Z 	 	 	 	 	 	 	 	 	 	 	 	 dd	Z! G d
 dee      Z"y)    )annotationsN)IterableMappingSequence)AnyLiteralOptionalUnioncast)
Embeddings)from_envget_pydantic_field_namessecret_from_env)	BaseModel
ConfigDictField	SecretStrmodel_validator)Selfc                
   t        |       D cg c]  }g  }}t        |       D cg c]  }g  }}t        t        |            D ]S  }|rt        ||         dk(  r|||      j                  ||          |||      j                  t        ||                U g }	t        |       D ]  }||   }
t        |
      dk(  r|	j                  d        (t        |
      dk(  r|	j                  |
d          Kt        ||         }t	        |
 D cg c]$  }t        d t	        |||         D              |z  & }}t        d |D              dz  }|	j                  |D cg c]  }||z  	 c}        |	S c c}w c c}w c c}w c c}w )N   r   c              3  ,   K   | ]  \  }}||z    y wN ).0valweights      g/home/kushmeetdev/Regenta/Chatbot/venv/lib/python3.12/site-packages/langchain_openai/embeddings/base.py	<genexpr>z6_process_batched_chunked_embeddings.<locals>.<genexpr>?   s       #V &Ls   c              3  &   K   | ]	  }|d z    yw)   Nr   )r   r   s     r   r   z6_process_batched_chunked_embeddings.<locals>.<genexpr>I   s     6sCF6   g      ?)rangelenappendsumzip)	num_textstokensbatched_embeddingsindices
skip_empty_resultsnum_tokens_in_batchi
embeddings_resulttotal_weight	embeddingaverage	magnituder   s                   r   #_process_batched_chunked_embeddingsr7      s    5:)4D'Eq'EG'E
 9>i8H+I1B+I+I3w<  ?#0349
""#5a#89GAJ'..s6!9~>	? /1J9  D%,QZw<1 d#\Qgaj) 2156L "%g   '*96I!6L'M  	G  6g66#=I'B3sYBCA DD c (F
 ,J< Cs   	E1	E6)E;F 
c                     e Zd ZU dZ edd      Zded<    edd      Zded<   dZd	ed
<   dZ	ded<   	 eZ
ded<    e edd      d      Zded<   	  ed edd            Zded<   	  e edd            Zded<    e edd            Zded<   dZded<   	  ed  ed!d            Zd"ed#<   	  ed$ ed%d&gd            Zded'<   	 dZd(ed)<   dZd*ed+<   d,Zded-<   	 d.Zded/<   	  edd01      Zd2ed3<   	 dZded4<   dZd5ed6<   	 dZded7<   	 d8Zd5ed9<   	  ee      Zd:ed;<   	 d8Zd5ed<<   	 dZ d=ed><   dZ!d?ed@<   dAZ"dedB<   	 dCZ#dedD<   	 dZ$dEedF<   	 dZ%dEedG<   	 dZ&d5edH<   	  e'dIddJK      Z( e)dLM      e*dZdN              Z+ e)dOM      d[dP       Z,e-d\dQ       Z.	 	 	 	 	 	 d]dRZ/ddS	 	 	 	 	 	 	 	 	 d^dTZ0ddS	 	 	 	 	 	 	 	 	 d^dUZ1	 d_	 	 	 	 	 	 	 d`dVZ2	 d_	 	 	 	 	 	 	 d`dWZ3dadXZ4dadYZ5y)bOpenAIEmbeddingsu	  OpenAI embedding model integration.

    Setup:
        Install ``langchain_openai`` and set environment variable ``OPENAI_API_KEY``.

        .. code-block:: bash

            pip install -U langchain_openai
            export OPENAI_API_KEY="your-api-key"

    Key init args — embedding params:
        model: str
            Name of OpenAI model to use.
        dimensions: Optional[int] = None
            The number of dimensions the resulting output embeddings should have.
            Only supported in `text-embedding-3` and later models.

    Key init args — client params:
        api_key: Optional[SecretStr] = None
            OpenAI API key.
        organization: Optional[str] = None
            OpenAI organization ID. If not passed in will be read
            from env var OPENAI_ORG_ID.
        max_retries: int = 2
            Maximum number of retries to make when generating.
        request_timeout: Optional[Union[float, Tuple[float, float], Any]] = None
            Timeout for requests to OpenAI completion API

    See full list of supported init args and their descriptions in the params section.

    Instantiate:
        .. code-block:: python

            from langchain_openai import OpenAIEmbeddings

            embed = OpenAIEmbeddings(
                model="text-embedding-3-large"
                # With the `text-embedding-3` class
                # of models, you can specify the size
                # of the embeddings you want returned.
                # dimensions=1024
            )

    Embed single text:
        .. code-block:: python

            input_text = "The meaning of life is 42"
            vector = embeddings.embed_query("hello")
            print(vector[:3])

        .. code-block:: python

            [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]

    Embed multiple texts:
        .. code-block:: python

            vectors = embeddings.embed_documents(["hello", "goodbye"])
            # Showing only the first 3 coordinates
            print(len(vectors))
            print(vectors[0][:3])

        .. code-block:: python

            2
            [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]

    Async:
        .. code-block:: python

            await embed.aembed_query(input_text)
            print(vector[:3])

            # multiple:
            # await embed.aembed_documents(input_texts)

        .. code-block:: python

            [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188]
    NT)defaultexcluder   clientasync_clientztext-embedding-ada-002strmodelOptional[int]
dimensionszOptional[str]
deploymentOPENAI_API_VERSION)r:   api_version)default_factoryaliasopenai_api_versionbase_urlOPENAI_API_BASE)rF   rE   openai_api_baseOPENAI_API_TYPE)rE   openai_api_typeOPENAI_PROXYopenai_proxyi  intembedding_ctx_lengthapi_keyOPENAI_API_KEYzOptional[SecretStr]openai_api_keyorganizationOPENAI_ORG_IDOPENAI_ORGANIZATIONopenai_organizationz%Union[Literal['all'], set[str], None]allowed_specialz4Union[Literal['all'], set[str], Sequence[str], None]disallowed_speciali  
chunk_sizer!   max_retriestimeout)r:   rF   z0Optional[Union[float, tuple[float, float], Any]]request_timeoutheadersbooltiktoken_enabledtiktoken_model_nameFshow_progress_bardict[str, Any]model_kwargsr,   zUnion[Mapping[str, str], None]default_headersz!Union[Mapping[str, object], None]default_query   retry_min_seconds   retry_max_secondszUnion[Any, None]http_clienthttp_async_clientcheck_embedding_ctx_lengthforbidr   )extrapopulate_by_nameprotected_namespacesbefore)modec           
     `   t        |       }|j                  di       }t        |      D ]M  }||v rt        d| d      ||vst	        j
                  d| d| d| d       |j                  |      ||<   O |j                  |j                               }|rt        d| d	      ||d<   |S )
z>Build extra kwargs from additional params that were passed in.rd   zFound z supplied twice.z	WARNING! z/ is not default parameter.
                    zJ was transferred to model_kwargs.
                    Please confirm that z is what you intended.zParameters za should be specified explicitly. Instead they were passed in as part of `model_kwargs` parameter.)	r   getlist
ValueErrorwarningswarnpopintersectionkeys)clsvaluesall_required_field_namesro   
field_nameinvalid_model_kwargss         r   build_extrazOpenAIEmbeddings.build_extra  s     $<C#@ 

>2.v, 		;JU" 6*5E!FGG!99!* .L !))34JN
 %+JJz$:j!		;  8DDUZZ\R23 4S T 
 "'~    afterc                4   | j                   dv rt        d      | j                  r| j                  j                         nd| j                  | j
                  | j                  | j                  | j                  | j                  d}| j                  rP| j                  s| j                  r8| j                  }| j                  }| j                  }t        d|d|d|      | j                  sr| j                  r2| j                  s&	 ddl}|j!                  | j                  
      | _        d| j                  i}t#        j$                  di ||j&                  | _        | j(                  sr| j                  r2| j                  s&	 ddl}|j+                  | j                  
      | _        d| j                  i}t#        j,                  di ||j&                  | _        | S # t        $ r}t        d	      |d}~ww xY w# t        $ r}t        d	      |d}~ww xY w)z?Validate that api key and python package exists in environment.)azureazure_adazureadzEIf you are using Azure, please use the `AzureOpenAIEmbeddings` class.N)rQ   rT   rH   r\   r[   re   rf   zwCannot specify 'openai_proxy' if one of 'http_client'/'http_async_client' is already specified. Received:
openai_proxy=z
http_client=z
http_async_client=r   zRCould not import httpx python package. Please install it with `pip install httpx`.)proxyrk   r   )rL   rw   rS   get_secret_valuerW   rJ   r]   r[   re   rf   rN   rk   rl   r<   httpxImportErrorClientopenaiOpenAIr1   r=   AsyncClientAsyncOpenAI)	selfclient_paramsrN   rk   rl   r   esync_specificasync_specifics	            r   validate_environmentz%OpenAIEmbeddings.validate_environment  s-    #CC@  ;?:M:M##446SW 44,,++++#33!//

 $"2"2d6L6L,,L**K $ 6 6!/K>1F4E3GI 
 {{  )9)9  $)<<d6G6G<#H *D,<,<=M --I-I=ITTDK    )?)?  */):):ARAR):)S&+T-C-CDN & 2 2 !! ! j  / # %F  # %F s0   G  G=  	G:)G55G:=	HHHc                p    d| j                   i| j                  }| j                  | j                  |d<   |S )Nr?   rA   )r?   rd   rA   )r   paramss     r   _invocation_paramsz#OpenAIEmbeddings._invocation_paramsX  s8    At/@/@A??&#'??F< r   c                2   g }g }| j                   xs | j                  }| j                  s	 ddlm} |j                  |      }t        |      D ]  \  }}	|j                  |	d      }
t        dt        |
      | j                        D ]G  }|
||| j                  z    }|j                  |      }|j                  |       |j                  |       I  n	 t        j                   |      }| j&                  | j(                  dj+                         D ci c]
  \  }}||| }}}t        |      D ]  \  }}	| j                  j-                  d	      r|	j/                  d
d      }	|r |j                  |	fi |}n|j1                  |	      }t        dt        |      | j                        D ]4  }|j                  |||| j                  z           |j                  |       6  | j2                  r$	 ddlm}  |t        dt        |      |            }nt        dt        |      |      }|||fS # t
        $ r t        d      w xY w# t"        $ r t        j$                  d      }Y ~w xY wc c}}w # t
        $ r t        dt        |      |      }Y iw xY w)a  
        Take the input `texts` and `chunk_size` and return 3 iterables as a tuple:

        We have `batches`, where batches are sets of individual texts
        we want responses from the openai api. The length of a single batch is
        `chunk_size` texts.

        Each individual text is also split into multiple texts based on the
        `embedding_ctx_length` parameter (based on number of tokens).

        This function returns a 3-tuple of the following:

        _iter: An iterable of the starting index in `tokens` for each *batch*
        tokens: A list of tokenized texts, where each text has already been split
            into sub-texts based on the `embedding_ctx_length` parameter. In the
            case of tiktoken, this is a list of token arrays. In the case of
            HuggingFace transformers, this is a list of strings.
        indices: An iterable of the same length as `tokens` that maps each token-array
            to the index of the original text in `texts`.
        r   )AutoTokenizerzCould not import transformers python package. This is needed for OpenAIEmbeddings to work without `tiktoken`. Please install it with `pip install transformers`. )pretrained_model_name_or_pathF)add_special_tokenscl100k_base)rX   rY   001
 )tqdm)ra   r?   r`   transformersr   r   rw   from_pretrained	enumerateencoder#   r$   rP   decoder%   tiktokenencoding_for_modelKeyErrorget_encodingrX   rY   itemsendswithreplaceencode_ordinaryrb   	tqdm.autor   )r   textsrZ   r)   r+   
model_namer   	tokenizerr0   text	tokenizedjtoken_chunk
chunk_textencodingkvencoder_kwargstokenr   _iters                        r   	_tokenizezOpenAIEmbeddings._tokenize_  s   . /1--;
 $$6 &55.8 6 I %U+ &4'0'7'7QV'7'W	 q#i.$2K2KL &A-6A 9 99.K
 '0&6&6{&CJMM*-NN1%&&@#66zB (,';';*.*A*A %'.Aq = 1.N . %U+ &4::&&u-  <<c2D!+HOODCNCE$44T:E q#e*d.G.GH &AMM%A0I0I,I"JKNN1%&&" !!:*"&uQFZ'H"I !S[*5Efg%%   V 6  @#00?@.<  :aVj9:s5   H1 I	 I.3"I4 1I	I+*I+4JJ)rZ   c          	         |xs  j                   }i  j                  | j                  ||      \  }}}g }	|D ]a  }
  j                  j                  dd||
|
|z    i}t        |t              s|j                         }|	j                  d |d   D               c t        t        |      ||	| j                        }dd fd}|D cg c]  }||n |        c}S c c}w )al  
        Generate length-safe embeddings for a list of texts.

        This method handles tokenization and embedding generation, respecting the
        set embedding context length and chunk size. It supports both tiktoken
        and HuggingFace tokenizer based on the tiktoken_enabled flag.

        Args:
            texts (List[str]): A list of texts to embed.
            engine (str): The engine or model to use for embeddings.
            chunk_size (Optional[int]): The size of chunks for processing embeddings.

        Returns:
            List[List[float]]: A list of embeddings for each input text.
        inputc              3  &   K   | ]	  }|d      ywr4   Nr   r   rs     r   r   z<OpenAIEmbeddings._get_len_safe_embeddings.<locals>.<genexpr>       %Oan%Or"   dataNc                     I j                   j                  dddi} t        | t              s| j	                         } | d   d   d   S Nr    r   r   r4   r   )r<   create
isinstancedict
model_dumpaverage_embedded_cached_empty_embeddingclient_kwargsr   s    r   empty_embeddingzBOpenAIEmbeddings._get_len_safe_embeddings.<locals>.empty_embedding  s]    &.#54;;#5#5#PB#P-#P !"2D9'7'B'B'D$*:6*B1*Ek*R'**r   r   returnlist[float])rZ   r   r   r<   r   r   r   r   extendr7   r$   r,   r   r   enginerZ   kwargs_chunk_sizer   r)   r+   r*   r0   responser1   r   r   r   r   s   `              @@r   _get_len_safe_embeddingsz)OpenAIEmbeddings._get_len_safe_embeddings  s   . !3DOO=422=f=!%{!Cvw02 	PA)t{{)) Q[15BH h-#..0%%%Ohv>N%OO	P 9J 2GT__

 :>	+ DNNaQ](99NNNs   C'c          	     0   K   |xs  j                   }i  j                  | j                  ||      \  }}}g }	t        dt	        |      |      D ]i  }
  j
                  j                  dd||
|
|z    i d{   }t        |t              s|j                         }|	j                  d |d   D               k t        t	        |      ||	| j                        }dd fd}|D cg c]  }||n |        d{    c}S 7 7 c c}w w)	a  
        Asynchronously generate length-safe embeddings for a list of texts.

        This method handles tokenization and asynchronous embedding generation,
        respecting the set embedding context length and chunk size. It supports both
        `tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.

        Args:
            texts (List[str]): A list of texts to embed.
            engine (str): The engine or model to use for embeddings.
            chunk_size (Optional[int]): The size of chunks for processing embeddings.

        Returns:
            List[List[float]]: A list of embeddings for each input text.
        r   r   Nc              3  &   K   | ]	  }|d      ywr   r   r   s     r   r   z=OpenAIEmbeddings._aget_len_safe_embeddings.<locals>.<genexpr>  r   r"   r   c                    K   Q j                   j                  dddi d {   } t        | t              s| j	                         } | d   d   d   S 7 1wr   )r=   r   r   r   r   r   s    r   r   zCOpenAIEmbeddings._aget_len_safe_embeddings.<locals>.empty_embedding   sw     &.)A):):)A)A ** -* $  ""2D9'7'B'B'D$*:6*B1*Ek*R'**$s   $AA2Ar   r   )rZ   r   r   r#   r$   r=   r   r   r   r   r   r7   r,   r   s   `              @@r   _aget_len_safe_embeddingsz*OpenAIEmbeddings._aget_len_safe_embeddings  s2    0 !3DOO=422=f=!%{!Cvw02q#f+{3 	PA5T..55 Q[15B H h-#..0%%%Ohv>N%OO	P 9J 2GT__

 :>		+ JTTAQ]o.?(??TT10 )@Ts7   A9D>D?A1D0DD
D
DDDc           	        |xs | j                   }i | j                  |}| j                  s~g }t        dt	        |      |      D ]a  } | j
                  j                  dd||||z    i|}t        |t              s|j                         }|j                  d |d   D               c |S t        t        | j                        }	 | j                  |f|	|d|S )a  Call out to OpenAI's embedding endpoint for embedding search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings. If None, will use the chunk size
                specified by the class.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            List of embeddings, one for each text.
        r   r   c              3  &   K   | ]	  }|d      ywr   r   r   s     r   r   z3OpenAIEmbeddings.embed_documents.<locals>.<genexpr>E       !KQ!K.!Kr"   r   r   rZ   r   )rZ   r   rm   r#   r$   r<   r   r   r   r   r   r   r>   rB   r   
r   r   rZ   r   chunk_size_r   r1   r0   r   r   s
             r   embed_documentsz OpenAIEmbeddings.embed_documents-  s     !3DOO=422=f=..,.J1c%j+6 L-4;;-- AO48E "(D1'224H!!!K(6:J!KKL  c4??+,t,,
 Z
;A
 	
r   c           	       K   |xs | j                   }i | j                  |}| j                  sg }t        dt	        |      |      D ]i  } | j
                  j                  dd||||z    i| d{   }t        |t              s|j                         }|j                  d |d   D               k |S t        t        | j                        }	 | j                  |f|	|d| d{   S 7 {7 w)a  Call out to OpenAI's embedding endpoint async for embedding search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings. If None, will use the chunk size
                specified by the class.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            List of embeddings, one for each text.
        r   r   Nc              3  &   K   | ]	  }|d      ywr   r   r   s     r   r   z4OpenAIEmbeddings.aembed_documents.<locals>.<genexpr>g  r   r"   r   r   r   )rZ   r   rm   r#   r$   r=   r   r   r   r   r   r   r>   rB   r   r   s
             r   aembed_documentsz!OpenAIEmbeddings.aembed_documentsO  s     !3DOO=422=f=..,.J1c%j+6 L!9!2!2!9!9 "AO4"8E"  "(D1'224H!!!K(6:J!KKL  c4??+3T33
 Z
;A
 
 	

s%   A/C11C-2A6C1(C/)C1/C1c                0     | j                   |gfi |d   S )a  Call out to OpenAI's embedding endpoint for embedding query text.

        Args:
            text: The text to embed.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            Embedding for the text.
        r   )r   )r   r   r   s      r   embed_queryzOpenAIEmbeddings.embed_queryq  s#     $t##TF5f5a88r   c                P   K    | j                   |gfi | d{   }|d   S 7 	w)a	  Call out to OpenAI's embedding endpoint async for embedding query text.

        Args:
            text: The text to embed.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            Embedding for the text.
        Nr   )r   )r   r   r   r1   s       r   aembed_queryzOpenAIEmbeddings.aembed_query}  s4      1400$B6BB
!} Cs   &$
&)r~   rc   r   r   )r   r   )r   rc   )r   	list[str]rZ   rO   r   z<tuple[Iterable[int], list[Union[list[int], str]], list[int]])
r   r   r   r>   rZ   r@   r   r   r   list[list[float]]r   )r   r   rZ   r@   r   r   r   r   )r   r>   r   r   r   r   )6__name__
__module____qualname____doc__r   r<   __annotations__r=   r?   rA   rB   r   rG   rJ   rL   rN   rP   r   rS   rW   rX   rY   rZ   r[   r]   r^   r`   ra   rb   r   rd   r,   re   rf   rh   rj   rk   rl   rm   r   model_configr   classmethodr   r   propertyr   r   r   r   r   r   r   r   r   r   r   r9   r9   O   s   Ob d3FC3dD9L#9)E3) $J$
 !&J%(- !5tD)  T%*(3Dd*S&O]  &+ !2DA&O]  #( >#L-  !%#$8*/9ISW)X+N'  P). 34d
*  O=AO:AOSLSJ8K<HMIIOE GS!d!G)--J $t#8#(#>L.>VJ!6:O3:7;M4; s7s7$(K!( +/'.S'++- BL (#  $2 '"7 #7r  ^&^&,/^&	E^&N %)1O1O 	1O
 "1O 1O 
1Ot %)5U5U 	5U
 "5U 5U 
5Up =A 
 
,9 
LO 
	 
F =A 
 
,9 
LO 
	 
D
9r   r9   )r(   rO   r)   zlist[Union[list[int], str]]r*   r   r+   z	list[int]r,   r_   r   zlist[Optional[list[float]]])#
__future__r   loggingrx   collections.abcr   r   r   typingr   r   r	   r
   r   r   r   langchain_core.embeddingsr   langchain_core.utilsr   r   r   pydanticr   r   r   r   r   typing_extensionsr   	getLoggerr   loggerr7   r9   r   r   r   <module>r     s    "   7 7 6 6   0 T T M M "			8	$::': *: 	:
 : !:zyy* yr   