
    A'hG                    B   d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
 ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ  ej2                  e      Z G d dee
      Z G d de      Z G d d      Z G d dee      Z  G d dee      Z! G d dee      Z"y)z3Interfaces to be implemented by general evaluators.    )annotationsN)ABCabstractmethod)Sequence)Enum)AnyOptionalUnion)warn)AgentAction)BaseLanguageModel)run_in_executor)Chainc                      e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 dZ		 dZ
	 d	Z	 d
Z	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZy)EvaluatorTypezThe types of the evaluators.qacot_qa
context_qapairwise_stringscore_stringlabeled_pairwise_stringlabeled_score_string
trajectorycriterialabeled_criteriastring_distanceexact_matchregex_matchpairwise_string_distanceembedding_distancepairwise_embedding_distancejson_validityjson_equalityjson_edit_distancejson_schema_validationN)__name__
__module____qualname____doc__QACOT_QA
CONTEXT_QAPAIRWISE_STRINGSCORE_STRINGLABELED_PAIRWISE_STRINGLABELED_SCORE_STRINGAGENT_TRAJECTORYCRITERIALABELED_CRITERIASTRING_DISTANCEEXACT_MATCHREGEX_MATCHPAIRWISE_STRING_DISTANCEEMBEDDING_DISTANCEPAIRWISE_EMBEDDING_DISTANCEJSON_VALIDITYJSON_EQUALITYJSON_EDIT_DISTANCEJSON_SCHEMA_VALIDATION     b/home/kushmeetdev/Regenta/Chatbot/venv/lib/python3.12/site-packages/langchain/evaluation/schema.pyr   r      s    &	BF% JS'O!L7H1@#VH<)7'OPKIKN9=-M"?;#M.#M=-T5Ir?   r   c                  ,    e Zd ZdZeedd              Zy)LLMEvalChainz,A base class for evaluators that use an LLM.c                     y)z#Create a new evaluator from an LLM.Nr>   )clsllmkwargss      r@   from_llmzLLMEvalChain.from_llmN       r?   N)rE   r   rF   r   returnrB   )r&   r'   r(   r)   classmethodr   rG   r>   r?   r@   rB   rB   K   s    62  2r?   rB   c                  p    e Zd ZdZedd       Zedd       Zed	d       Zed	d       Z	 	 d
	 	 	 	 	 ddZ	y)_EvalArgsMixinz(Mixin for checking evaluation arguments.c                     yz2Whether this evaluator requires a reference label.Fr>   selfs    r@   requires_referencez!_EvalArgsMixin.requires_referenceW        r?   c                     y)0Whether this evaluator requires an input string.Fr>   rO   s    r@   requires_inputz_EvalArgsMixin.requires_input\   rR   r?   c                6    d| j                   j                   dS )z&Warning to show when input is ignored.zIgnoring input in , as it is not expected.	__class__r&   rO   s    r@   _skip_input_warningz"_EvalArgsMixin._skip_input_warninga   s     $DNN$;$;#<<TUUr?   c                6    d| j                   j                   dS )z*Warning to show when reference is ignored.zIgnoring reference in rW   rX   rO   s    r@   _skip_reference_warningz&_EvalArgsMixin._skip_reference_warningf   s!     %T^^%<%<$==UV	
r?   Nc                T   | j                   r$|"t        | j                  j                   d      |!| j                   st	        | j
                         | j                  r$|"t        | j                  j                   d      |#| j                  st	        | j                         yyy)a  Check if the evaluation arguments are valid.

        Args:
            reference (Optional[str], optional): The reference label.
            input (Optional[str], optional): The input string.
        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.z requires a reference string.)rU   
ValueErrorrY   r&   r   rZ   rQ   r\   )rP   	referenceinputs      r@   _check_evaluation_argsz%_EvalArgsMixin._check_evaluation_argsm   s     5= 7 788RSTTt':':))*""y'8 7 788UVWW"4+B+B--. ,C"r?   rI   boolrI   str)NN)r_   Optional[str]r`   rf   rI   None)
r&   r'   r(   r)   propertyrQ   rU   rZ   r\   ra   r>   r?   r@   rL   rL   T   s~    2    V V 
 
 $(#/ / / 
	/r?   rL   c                      e Zd ZdZed
d       Zedd       Zeddd	 	 	 	 	 	 	 	 	 dd       Zddd	 	 	 	 	 	 	 	 	 ddZ	ddd	 	 	 	 	 	 	 	 	 ddZ
ddd	 	 	 	 	 	 	 	 	 dd	Zy)StringEvaluatorzcGrade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.c                .    | j                   j                  S )zThe name of the evaluation.rX   rO   s    r@   evaluation_namezStringEvaluator.evaluation_name   s     ~~&&&r?   c                     yrN   r>   rO   s    r@   rQ   z"StringEvaluator.requires_reference   rR   r?   Nr_   r`   c                    y)a:  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr>   rP   
predictionr_   r`   rF   s        r@   _evaluate_stringsz!StringEvaluator._evaluate_strings   rH   r?   c               T   K   t        d| j                  f|||d| d{   S 7 w)aI  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nrq   r_   r`   )r   rr   rp   s        r@   _aevaluate_stringsz"StringEvaluator._aevaluate_strings   sE     , %""
 "
 
 
 	
 
s   (&(c               T    | j                  ||        | j                  d|||d|S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        rn   rt   r>   )ra   rr   rp   s        r@   evaluate_stringsz StringEvaluator.evaluate_strings   s@    $ 	##iu#E%t%% 
!Ye
GM
 	
r?   c               p   K   | j                  ||        | j                  d|||d| d{   S 7 w)a	  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        rn   rt   Nr>   )ra   ru   rp   s        r@   aevaluate_stringsz!StringEvaluator.aevaluate_strings   sN     $ 	##iu#E,T,, 
!Ye
GM
 
 	
 
s   -646rd   rb   )
rq   zUnion[str, Any]r_   Optional[Union[str, Any]]r`   rz   rF   r   rI   dict)
rq   re   r_   rf   r`   rf   rF   r   rI   r{   )r&   r'   r(   r)   rh   rl   rQ   r   rr   ru   rw   ry   r>   r?   r@   rj   rj      s;     ' '   
 04+/ $ -	
 )  
 6 04+/
 $
 -	

 )
 
 

F $(#
 
 !	

 
 
 

6 $(#
 
 !	

 
 
 

r?   rj   c                      e Zd ZdZeddd	 	 	 	 	 	 	 	 	 	 	 dd       Zddd	 	 	 	 	 	 	 	 	 	 	 ddZddd	 	 	 	 	 	 	 	 	 	 	 ddZddd	 	 	 	 	 	 	 	 	 	 	 ddZy)	PairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nrn   c                    y)1  Evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr>   rP   rq   prediction_br_   r`   rF   s         r@   _evaluate_string_pairsz.PairwiseStringEvaluator._evaluate_string_pairs   rH   r?   c               V   K   t        d| j                  f||||d| d{   S 7 w)@  Asynchronously evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nrq   r   r_   r`   )r   r   r   s         r@   _aevaluate_string_pairsz/PairwiseStringEvaluator._aevaluate_string_pairs  sH     ( %''
 "%
 
 
 	
 
    )')c               V    | j                  ||        | j                  d||||d|S )r   rn   r   r>   )ra   r   r   s         r@   evaluate_string_pairsz-PairwiseStringEvaluator.evaluate_string_pairs0  sG    ( 	##iu#E*t** 
!%	

 
 	
r?   c               r   K   | j                  ||        | j                  d||||d| d{   S 7 w)r   rn   r   Nr>   )ra   r   r   s         r@   aevaluate_string_pairsz.PairwiseStringEvaluator.aevaluate_string_pairsM  sU     ( 	##iu#E1T11 
!%	

 
 
 	
 
   .757)rq   re   r   re   r_   rf   r`   rf   rF   r   rI   r{   )	r&   r'   r(   r)   r   r   r   r   r   r>   r?   r@   r}   r}      s8   N $(#  	
 !   
 4 $(#
 
 	

 !
 
 
 

F $(#
 
 	

 !
 
 
 

D $(#
 
 	

 !
 
 
 

r?   r}   c                      e Zd ZdZed	d       Zedd	 	 	 	 	 	 	 	 	 	 	 d
d       Zdd	 	 	 	 	 	 	 	 	 	 	 d
dZdd	 	 	 	 	 	 	 	 	 	 	 d
dZ	dd	 	 	 	 	 	 	 	 	 	 	 d
dZ
y)AgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.c                     y)rT   Tr>   rO   s    r@   rU   z'AgentTrajectoryEvaluator.requires_inputn  s     r?   N)r_   c                    y)  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        Nr>   rP   rq   agent_trajectoryr`   r_   rF   s         r@   _evaluate_agent_trajectoryz3AgentTrajectoryEvaluator._evaluate_agent_trajectorys  rH   r?   c               V   K   t        d| j                  f||||d| d{   S 7 w)  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        N)rq   r   r_   r`   )r   r   r   s         r@   _aevaluate_agent_trajectoryz4AgentTrajectoryEvaluator._aevaluate_agent_trajectory  sH     * %++
 "-
 
 
 	
 
r   c               V    | j                  ||        | j                  d||||d|S )r   rn   rq   r`   r   r_   r>   )ra   r   r   s         r@   evaluate_agent_trajectoryz2AgentTrajectoryEvaluator.evaluate_agent_trajectory  sG    * 	##iu#E.t.. 
!-	

 
 	
r?   c               r   K   | j                  ||        | j                  d||||d| d{   S 7 w)r   rn   r   Nr>   )ra   r   r   s         r@   aevaluate_agent_trajectoryz3AgentTrajectoryEvaluator.aevaluate_agent_trajectory  sU     * 	##iu#E5T55 
!-	

 
 
 	
 
r   rb   )rq   re   r   z!Sequence[tuple[AgentAction, str]]r`   re   r_   rf   rF   r   rI   r{   )r&   r'   r(   r)   rh   rU   r   r   r   r   r   r>   r?   r@   r   r   k  s@   6   $(  <	
  !  
 8 $(
 
 <	

 
 !
 
 

J $(
 
 <	

 
 !
 
 

H $(
 
 <	

 
 !
 
 

r?   r   )#r)   
__future__r   loggingabcr   r   collections.abcr   enumr   typingr   r	   r
   warningsr   langchain_core.agentsr   langchain_core.language_modelsr   langchain_core.runnables.configr   langchain.chains.baser   	getLoggerr&   loggerre   r   rB   rL   rj   r}   r   r>   r?   r@   <module>r      s    9 "  # $  ' '  - < ; '			8	$3JC 3Jl25 2./ ./bq
nc q
ho
nc o
dx
~s x
r?   