
    A'hI                     v   d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlmZ defdZ ej:                  e      Z ej@                  d      de!fd       Z"defdZ# G d de$e      Z% G d de      Z& G d de&e      Z' G d de&e      Z(y)z@A chain for comparing the output of two models using embeddings.    N)Enum)util)AnyOptional)AsyncCallbackManagerForChainRunCallbackManagerForChainRun	Callbacks)
Embeddings)pre_init)
ConfigDictField)Chain)PairwiseStringEvaluatorStringEvaluatorRUN_KEYreturnc                  J    	 dd l } | S # t        $ r}t        d      |d }~ww xY w)Nr   z@Could not import numpy, please install with `pip install numpy`.)numpyImportError)npes     s/home/kushmeetdev/Regenta/Chatbot/venv/lib/python3.12/site-packages/langchain/evaluation/embedding_distance/base.py_import_numpyr      s2    	 N
	s    	""   )maxsizec                  l    t        t        j                  d            ryt        j	                  d       y)Nr   Ta  NumPy not found in the current Python environment. langchain will use a pure Python implementation for embedding distance operations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpyF)boolr   	find_specloggerwarning     r   _check_numpyr$   %   s,    DNN7#$
NN	 r#   c                      	 ddl m}   |        S # t        $ r) 	 ddlm}  n# t        $ r t        d      w xY wY  |        S w xY w)zaCreate an Embeddings object.
    Returns:
        Embeddings: The created Embeddings object.
    r   OpenAIEmbeddingstCould not import OpenAIEmbeddings. Please install the OpenAIEmbeddings package using `pip install langchain-openai`.)langchain_openair'   r   %langchain_community.embeddings.openair&   s    r   _embedding_factoryr+   3   sd    5   		  	Q 	 	s!    	A A5A Ac                   $    e Zd ZdZdZdZdZdZdZy)EmbeddingDistancea  Embedding Distance Metric.

    Attributes:
        COSINE: Cosine distance metric.
        EUCLIDEAN: Euclidean distance metric.
        MANHATTAN: Manhattan distance metric.
        CHEBYSHEV: Chebyshev distance metric.
        HAMMING: Hamming distance metric.
    cosine	euclidean	manhattan	chebyshevhammingN)	__name__
__module____qualname____doc__COSINE	EUCLIDEAN	MANHATTAN	CHEBYSHEVHAMMINGr"   r#   r   r-   r-   J   s"     FIIIGr#   r-   c                      e Zd ZU dZ ee      Zeed<    ee	j                        Ze	ed<   edeeef   deeef   fd       Z ed	
      Zedee   fd       ZdedefdZde	defdZedededefd       Zedededefd       Zedededefd       Zedededefd       Zedededefd       ZdedefdZy)_EmbeddingDistanceChainMixina0  Shared functionality for embedding distance evaluators.

    Attributes:
        embeddings (Embeddings): The embedding objects to vectorize the outputs.
        distance_metric (EmbeddingDistance): The distance metric to use
                                            for comparing the embeddings.
    )default_factory
embeddings)defaultdistance_metricvaluesr   c                 J   |j                  d      }g }	 ddlm} |j                  |       	 ddlm} |j                  |       |st	        d      t        |t        |            r	 ddl}|S |S # t        $ r Y Nw xY w# t        $ r Y Ew xY w# t        $ r t	        d      w xY w)zValidate that the TikTok library is installed.

        Args:
            values (Dict[str, Any]): The values to validate.

        Returns:
            Dict[str, Any]: The validated values.
        r?   r   r&   r(   NzThe tiktoken library is required to use the default OpenAI embeddings with embedding distance evaluators. Please either manually select a different Embeddings object or install tiktoken using `pip install tiktoken`.)	getr)   r'   appendr   r*   
isinstancetupletiktoken)clsrB   r?   types_r'   rH   s         r   _validate_tiktoken_installedz9_EmbeddingDistanceChainMixin._validate_tiktoken_installedh   s     ZZ-
	9MM*+	 MM*+ Q 
 j%-0 v9  		  		  !I s.   A/ A> 'B /	A;:A;>	B
	B
B"T)arbitrary_types_allowedc                     dgS )zgReturn the output keys of the chain.

        Returns:
            List[str]: The output keys.
        scorer"   selfs    r   output_keysz(_EmbeddingDistanceChainMixin.output_keys   s     yr#   resultc                 D    d|d   i}t         |v r|t            |t         <   |S )NrN   r   )rP   rR   parseds      r   _prepare_outputz,_EmbeddingDistanceChainMixin._prepare_output   s*    6'?+f$WoF7Or#   metricc           
      8   t         j                  | j                  t         j                  | j                  t         j
                  | j                  t         j                  | j                  t         j                  | j                  i}||v r||   S t        d|       )zGet the metric function for the given metric name.

        Args:
            metric (EmbeddingDistance): The metric name.

        Returns:
            Any: The metric function.
        zInvalid metric: )r-   r7   _cosine_distancer8   _euclidean_distancer9   _manhattan_distancer:   _chebyshev_distancer;   _hamming_distance
ValueError)rP   rV   metricss      r   _get_metricz(_EmbeddingDistanceChainMixin._get_metric   s     $$d&;&;'')A)A'')A)A'')A)A%%t'='=
 W6?"/x899r#   abc                 X    	 ddl m} d || |      z
  S # t        $ r t        d      w xY w)zCompute the cosine distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.ndarray: The cosine distance.
        r   )cosine_similarityzThe cosine_similarity function is required to compute cosine distance. Please install the langchain-community package using `pip install langchain-community`.g      ?)langchain_community.utils.mathrc   r   )r`   ra   rc   s      r   rX   z-_EmbeddingDistanceChainMixin._cosine_distance   sB    	H &q!,,,  	6 	s    )c                     t               r"ddl}|j                  j                  | |z
        S t	        d t        | |      D              dz  S )zCompute the Euclidean distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Euclidean distance.
        r   Nc              3   8   K   | ]  \  }}||z
  ||z
  z    y wNr"   .0xys      r   	<genexpr>zC_EmbeddingDistanceChainMixin._euclidean_distance.<locals>.<genexpr>   s!     ;AAEa!e$;s   g      ?)r$   r   linalgnormsumzipr`   ra   r   s      r   rY   z0_EmbeddingDistanceChainMixin._euclidean_distance   s?     >99>>!a%((;Q;;sBBr#   c                     t               r-t               }|j                  |j                  | |z
              S t        d t	        | |      D              S )zCompute the Manhattan distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Manhattan distance.
        c              3   >   K   | ]  \  }}t        ||z
          y wrg   absrh   s      r   rl   zC_EmbeddingDistanceChainMixin._manhattan_distance.<locals>.<genexpr>        4$!Q3q1u:4   )r$   r   ro   ru   rp   rq   s      r   rZ   z0_EmbeddingDistanceChainMixin._manhattan_distance   B     >B66"&&Q-((4#a)444r#   c                     t               r-t               }|j                  |j                  | |z
              S t        d t	        | |      D              S )zCompute the Chebyshev distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Chebyshev distance.
        c              3   >   K   | ]  \  }}t        ||z
          y wrg   rt   rh   s      r   rl   zC_EmbeddingDistanceChainMixin._chebyshev_distance.<locals>.<genexpr>  rv   rw   )r$   r   maxru   rp   rq   s      r   r[   z0_EmbeddingDistanceChainMixin._chebyshev_distance   rx   r#   c                     t               rt               }|j                  | |k7        S t        d t	        | |      D              t        |       z  S )zCompute the Hamming distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Hamming distance.
        c              3   2   K   | ]  \  }}||k7  sd   yw)r   Nr"   rh   s      r   rl   zA_EmbeddingDistanceChainMixin._hamming_distance.<locals>.<genexpr>  s     5Aa1f15s   )r$   r   meanro   rp   lenrq   s      r   r\   z._EmbeddingDistanceChainMixin._hamming_distance  sB     >B7716?"5Q55A>>r#   vectorsc                 L   | j                  | j                        }t               rft        |t	               j
                        rH ||d   j                  dd      |d   j                  dd            j                         }t        |      S  ||d   |d         }t        |      S )zCompute the score based on the distance metric.

        Args:
            vectors (np.ndarray): The input vectors.

        Returns:
            float: The computed score.
        r   r   )	r_   rA   r$   rF   r   ndarrayreshapeitemfloat)rP   r   rV   rN   s       r   _compute_scorez+_EmbeddingDistanceChainMixin._compute_score  s     !!$"6"67>j-/2I2IJ71:--a4gaj6H6HB6OPUUWE U| 71:wqz2EU|r#   N) r3   r4   r5   r6   r   r+   r?   r
   __annotations__r-   r7   rA   r   dictstrr   rK   r   model_configpropertylistrQ   rU   r_   staticmethodrX   rY   rZ   r[   r\   r   r   r"   r#   r   r=   r=   \   s    #3EFJ
F).7H7O7O)PO&P+$sCx. +T#s(^ + +Z  $L T#Y  d t :"3 : :* -C -C -C - -( Cs Cs Cs C C" 5s 5s 5s 5 5  5s 5s 5s 5 5  ?S ?S ?S ? ? c e r#   r=   c                   l   e Zd ZdZedefd       Zedefd       Zede	e   fd       Z
	 ddeeef   dee   deeef   fd	Z	 ddeeef   dee   deeef   fd
Zdddddddedee   dedee	e      deeeef      dededefdZdddddddedee   dedee	e      deeeef      dededefdZy)EmbeddingDistanceEvalChaina"  Use embedding distances to score semantic difference between
    a prediction and reference.

    Examples:
        >>> chain = EmbeddingDistanceEvalChain()
        >>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
        >>> print(result)
        {'score': 0.5}
    r   c                      y)zReturn whether the chain requires a reference.

        Returns:
            bool: True if a reference is required, False otherwise.
        Tr"   rO   s    r   requires_referencez-EmbeddingDistanceEvalChain.requires_reference4  s     r#   c                 6    d| j                   j                   dS )N
embedding_	_distancerA   valuerO   s    r   evaluation_namez*EmbeddingDistanceEvalChain.evaluation_name=  s    D00667yAAr#   c                 
    ddgS )eReturn the input keys of the chain.

        Returns:
            List[str]: The input keys.
        
prediction	referencer"   rO   s    r   
input_keysz%EmbeddingDistanceEvalChain.input_keysA  s     k**r#   Ninputsrun_managerc                     | j                   j                  |d   |d   g      }t               rt               }|j	                  |      }| j                  |      }d|iS )a0  Compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (Optional[CallbackManagerForChainRun], optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   rN   r?   embed_documentsr$   r   arrayr   rP   r   r   r   r   rN   s         r   _callz EmbeddingDistanceEvalChain._callJ  sa     //11L!6+#67
 >Bhhw'G##G,r#   c                    K   | j                   j                  |d   |d   g       d{   }t               rt               }|j	                  |      }| j                  |      }d|iS 7 >w)a:  Asynchronously compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   NrN   r?   aembed_documentsr$   r   r   r   r   s         r   _acallz!EmbeddingDistanceEvalChain._acallb  sr      88|${#
 
 >Bhhw'G##G,
   'A*A(?A*F)r   	callbackstagsmetadatainclude_run_infor   r   r   r   r   r   kwargsc                D     | ||d||||      }| j                  |      S )a  Evaluate the embedding distance between a prediction and
        reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The reference string (required)
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   r   r   r   r   r   rU   	rP   r   r   r   r   r   r   r   rR   s	            r   _evaluate_stringsz,EmbeddingDistanceEvalChain._evaluate_strings}  s5    2 ",9E-
 ##F++r#   c                r   K   | j                  ||d||||       d{   }| j                  |      S 7 w)a  Asynchronously evaluate the embedding distance between
        a prediction and reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   NacallrU   r   s	            r   _aevaluate_stringsz-EmbeddingDistanceEvalChain._aevaluate_strings  sL     2 zz",9E- " 
 
 ##F++
   757rg   )r3   r4   r5   r6   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   r"   r#   r   r   r   )  s    D   B B B +DI + + =A S#X  89  
c3h	 6 BF S#X  =>  
c3h	 > $(#$(-1!& ,  , C=	 ,
  , tCy! , 4S>* ,  ,  , 
 ,L $(#$(-1!& ,  , C=	 ,
  , tCy! , 4S>* ,  ,  , 
 ,r#   r   c                   F   e Zd ZdZedee   fd       Zedefd       Z	 dde	ee
f   dee   de	ee
f   fdZ	 dde	ee
f   dee   de	ee
f   fd	Zdddd
ddedededeee      dee	ee
f      dede
de	fdZdddd
ddedededeee      dee	ee
f      dede
de	fdZy)"PairwiseEmbeddingDistanceEvalChaina  Use embedding distances to score semantic difference between two predictions.

    Examples:
    >>> chain = PairwiseEmbeddingDistanceEvalChain()
    >>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
    >>> print(result)
    {'score': 0.5}
    r   c                 
    ddgS )r   r   prediction_br"   rO   s    r   r   z-PairwiseEmbeddingDistanceEvalChain.input_keys  s     n--r#   c                 6    d| j                   j                   dS )Npairwise_embedding_r   r   rO   s    r   r   z2PairwiseEmbeddingDistanceEvalChain.evaluation_name  s    $T%9%9%?%?$@	JJr#   Nr   r   c                     | j                   j                  |d   |d   g      }t               rt               }|j	                  |      }| j                  |      }d|iS )a  Compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (CallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   rN   r   r   s         r   r   z(PairwiseEmbeddingDistanceEvalChain._call  sd     //11|$~&
 >Bhhw'G##G,r#   c                    K   | j                   j                  |d   |d   g       d{   }t               rt               }|j	                  |      }| j                  |      }d|iS 7 >w)a/  Asynchronously compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   NrN   r   r   s         r   r   z)PairwiseEmbeddingDistanceEvalChain._acall  sr      88|$~&
 
 >Bhhw'G##G,
r   F)r   r   r   r   r   r   r   r   r   r   r   c                D     | ||d||||      }| j                  |      S )a  Evaluate the embedding distance between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   r   r   	rP   r   r   r   r   r   r   r   rR   s	            r   _evaluate_string_pairsz9PairwiseEmbeddingDistanceEvalChain._evaluate_string_pairs  s5    4 ",lK-
 ##F++r#   c                r   K   | j                  ||d||||       d{   }| j                  |      S 7 w)a  Asynchronously evaluate the embedding distance

        between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to traces
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   Nr   r   s	            r   _aevaluate_string_pairsz:PairwiseEmbeddingDistanceEvalChain._aevaluate_string_pairs4  sL     8 zz",lK- " 
 
 ##F++
r   rg   )r3   r4   r5   r6   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   r   r"   r#   r   r   r     s    .DI . . K K K =A S#X  89  
c3h	 < BF S#X  =>  
c3h	 @  $$(-1!&!, !, 	!,
 !, tCy!!, 4S>*!, !, !, 
!,P  $$(-1!&#, #, 	#,
 #, tCy!#, 4S>*#, #, #, 
#,r#   r   ))r6   	functoolsloggingenumr   	importlibr   typingr   r    langchain_core.callbacks.managerr   r   r	   langchain_core.embeddingsr
   langchain_core.utilsr   pydanticr   r   langchain.chains.baser   langchain.evaluation.schemar   r   langchain.schemar   r   	getLoggerr3   r    	lru_cacher   r$   r+   r   r-   r=   r   r   r"   r#   r   <module>r      s    F       
 1 ) & ' P $s  
		8	$ Q
d 
  
J .T $J5 JZV,!= V,rU, "9U,r#   