Ë
    'GîgG  ã                  ó>  — d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ  ej2                  e«      Z G d„ dee«      Z G d„ de«      Z G d„ d«      Z G d„ dee«      Z  G d„ dee«      Z! G d„ dee«      Z"y)z3Interfaces to be implemented by general evaluators.é    )ÚannotationsN)ÚABCÚabstractmethod)ÚEnum)ÚAnyÚOptionalÚSequenceÚTupleÚUnion)Úwarn)ÚAgentAction)ÚBaseLanguageModel)Úrun_in_executor)ÚChainc                  ó†   — e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 dZ		 dZ
	 d	Z	 d
Z	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZy)ÚEvaluatorTypezThe types of the evaluators.ÚqaÚcot_qaÚ
context_qaÚpairwise_stringÚscore_stringÚlabeled_pairwise_stringÚlabeled_score_stringÚ
trajectoryÚcriteriaÚlabeled_criteriaÚstring_distanceÚexact_matchÚregex_matchÚpairwise_string_distanceÚembedding_distanceÚpairwise_embedding_distanceÚjson_validityÚjson_equalityÚjson_edit_distanceÚjson_schema_validationN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚQAÚCOT_QAÚ
CONTEXT_QAÚPAIRWISE_STRINGÚSCORE_STRINGÚLABELED_PAIRWISE_STRINGÚLABELED_SCORE_STRINGÚAGENT_TRAJECTORYÚCRITERIAÚLABELED_CRITERIAÚSTRING_DISTANCEÚEXACT_MATCHÚREGEX_MATCHÚPAIRWISE_STRING_DISTANCEÚEMBEDDING_DISTANCEÚPAIRWISE_EMBEDDING_DISTANCEÚJSON_VALIDITYÚJSON_EQUALITYÚJSON_EDIT_DISTANCEÚJSON_SCHEMA_VALIDATION© ó    úx/home/kushmeetdev/apache_webroot/langgraph_flaskproject/venv/lib/python3.12/site-packages/langchain/evaluation/schema.pyr   r      sÂ   „ Ù&à	€Bðà€Fð%ð €JØSØ'€Oðà!€Lðà7ÐðHà1Ðð@à#ÐØVØ€Hð<à)Ðð7à'€OØPØ€KØIØ€KØNØ9ÐØ=Ø-ÐØMØ"?ÐØ;Ø#€MØ.Ø#€MØ=Ø-ÐØTØ5ÐØIr@   r   c                  ó,   — e Zd ZdZeedd„«       «       Zy)ÚLLMEvalChainz,A base class for evaluators that use an LLM.c                 ó   — y)z#Create a new evaluator from an LLM.Nr?   )ÚclsÚllmÚkwargss      rA   Úfrom_llmzLLMEvalChain.from_llmM   ó    r@   N)rF   r   rG   r   ÚreturnrC   )r'   r(   r)   r*   Úclassmethodr   rH   r?   r@   rA   rC   rC   J   s   „ Ù6àØò2ó ó ñ2r@   rC   c                  óp   — e Zd ZdZedd„«       Zedd„«       Zed	d„«       Zed	d„«       Z	 	 d
	 	 	 	 	 dd„Z	y)Ú_EvalArgsMixinz(Mixin for checking evaluation arguments.c                 ó   — y©z2Whether this evaluator requires a reference label.Fr?   ©Úselfs    rA   Úrequires_referencez!_EvalArgsMixin.requires_referenceV   ó   € ð r@   c                 ó   — y)ú0Whether this evaluator requires an input string.Fr?   rP   s    rA   Úrequires_inputz_EvalArgsMixin.requires_input[   rS   r@   c                ó6   — d| j                   j                  › dS )z&Warning to show when input is ignored.zIgnoring input in ú, as it is not expected.©Ú	__class__r'   rP   s    rA   Ú_skip_input_warningz"_EvalArgsMixin._skip_input_warning`   s   € ð $ D§N¡N×$;Ñ$;Ð#<Ð<TÐUÐUr@   c                ó6   — d| j                   j                  › dS )z*Warning to show when reference is ignored.zIgnoring reference in rX   rY   rP   s    rA   Ú_skip_reference_warningz&_EvalArgsMixin._skip_reference_warninge   s!   € ð % T§^¡^×%<Ñ%<Ð$=Ð=UÐVð	
r@   Nc                óT  — | j                   r$|€"t        | j                  j                  › d«      ‚|!| j                   st	        | j
                  «       | j                  r$|€"t        | j                  j                  › d«      ‚|#| j                  st	        | j                  «       yyy)a†  Check if the evaluation arguments are valid.

        Args:
            reference (Optional[str], optional): The reference label.
            input (Optional[str], optional): The input string.
        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.z requires a reference string.)rV   Ú
ValueErrorrZ   r'   r   r[   rR   r]   )rQ   Ú	referenceÚinputs      rA   Ú_check_evaluation_argsz%_EvalArgsMixin._check_evaluation_argsl   sœ   € ð ×Ò 5 =Ü §¡× 7Ñ 7Ð8Ð8RÐSÓTÐTØÐ t×':Ò':Ü×)Ñ)Ô*Ø×"Ò" yÐ'8Ü §¡× 7Ñ 7Ð8Ð8UÐVÓWÐWØÐ"¨4×+BÒ+BÜ×-Ñ-Õ.ð ,CÐ"r@   ©rJ   Úbool©rJ   Ústr)NN)r`   úOptional[str]ra   rg   rJ   ÚNone)
r'   r(   r)   r*   ÚpropertyrR   rV   r[   r]   rb   r?   r@   rA   rM   rM   S   s~   „ Ù2àòó ðð òó ðð òVó ðVð ò
ó ð
ð $(Ø#ð/à ð/ð ð/ð 
ô	/r@   rM   c                  óÆ   — e Zd ZdZed
d„«       Zedd„«       Zedddœ	 	 	 	 	 	 	 	 	 dd„«       Zdddœ	 	 	 	 	 	 	 	 	 dd„Z	dddœ	 	 	 	 	 	 	 	 	 dd„Z
dddœ	 	 	 	 	 	 	 	 	 dd	„Zy)ÚStringEvaluatorzcGrade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.c                ó.   — | j                   j                  S )zThe name of the evaluation.rY   rP   s    rA   Úevaluation_namezStringEvaluator.evaluation_nameˆ   s   € ð ~‰~×&Ñ&Ð&r@   c                 ó   — yrO   r?   rP   s    rA   rR   z"StringEvaluator.requires_reference   rS   r@   N©r`   ra   c                ó   — y)a:  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr?   ©rQ   Ú
predictionr`   ra   rG   s        rA   Ú_evaluate_stringsz!StringEvaluator._evaluate_strings’   rI   r@   c             ‹  óT   K  — t        d| j                  f|||dœ|¤Žƒ d{  –—† S 7 Œ­w)aI  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        N©rr   r`   ra   )r   rs   rq   s        rA   Ú_aevaluate_stringsz"StringEvaluator._aevaluate_stringsª   sE   è ø€ ô, %ØØ×"Ñ"ð
ð "ØØñ
ð ñ
÷ 
ð 	
ð 
ús   ‚(¡&¢(c               óT   — | j                  ||¬«        | j                  d|||dœ|¤ŽS )aú  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        ro   ru   r?   )rb   rs   rq   s        rA   Úevaluate_stringsz StringEvaluator.evaluate_stringsÉ   s@   € ð$ 	×#Ñ#¨i¸uÐ#ÔEØ%ˆt×%Ñ%ð 
Ø!¨Y¸eñ
ØGMñ
ð 	
r@   c             ‹  óp   K  — | j                  ||¬«        | j                  d|||dœ|¤Žƒ d{  –—† S 7 Œ­w)a	  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        ro   ru   Nr?   )rb   rv   rq   s        rA   Úaevaluate_stringsz!StringEvaluator.aevaluate_stringsà   sN   è ø€ ð$ 	×#Ñ#¨i¸uÐ#ÔEØ,T×,Ñ,ð 
Ø!¨Y¸eñ
ØGMñ
÷ 
ð 	
ð 
ús   ‚-6¯4°6re   rc   )
rr   zUnion[str, Any]r`   úOptional[Union[str, Any]]ra   r{   rG   r   rJ   Údict)
rr   rf   r`   rg   ra   rg   rG   r   rJ   r|   )r'   r(   r)   r*   ri   rm   rR   r   rs   rv   rx   rz   r?   r@   rA   rk   rk   „   s;  „ ñ ð ò'ó ð'ð òó ðð ð
 04Ø+/ñð $ðð -ð	ð
 )ðð ðð 
òó ðð6 04Ø+/ñ
ð $ð
ð -ð	
ð
 )ð
ð ð
ð 
ó
ðF $(Ø#ñ
ð ð
ð !ð	
ð
 ð
ð ð
ð 
ó
ð6 $(Ø#ñ
ð ð
ð !ð	
ð
 ð
ð ð
ð 
ô
r@   rk   c                  ó²   — e Zd ZdZedddœ	 	 	 	 	 	 	 	 	 	 	 dd„«       Zdddœ	 	 	 	 	 	 	 	 	 	 	 dd„Zdddœ	 	 	 	 	 	 	 	 	 	 	 dd„Zdddœ	 	 	 	 	 	 	 	 	 	 	 dd„Zy)	ÚPairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nro   c                ó   — y)á1  Evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr?   ©rQ   rr   Úprediction_br`   ra   rG   s         rA   Ú_evaluate_string_pairsz.PairwiseStringEvaluator._evaluate_string_pairsû   rI   r@   c             ‹  óV   K  — t        d| j                  f||||dœ|¤Žƒ d{  –—† S 7 Œ­w)á@  Asynchronously evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        N©rr   r‚   r`   ra   )r   rƒ   r   s         rA   Ú_aevaluate_string_pairsz/PairwiseStringEvaluator._aevaluate_string_pairs  sH   è ø€ ô( %ØØ×'Ñ'ð
ð "Ø%ØØñ
ð ñ
÷ 
ð 	
ð 
úó   ‚ )¢'£)c               óV   — | j                  ||¬«        | j                  d||||dœ|¤ŽS )r€   ro   r†   r?   )rb   rƒ   r   s         rA   Úevaluate_string_pairsz-PairwiseStringEvaluator.evaluate_string_pairs/  sG   € ð( 	×#Ñ#¨i¸uÐ#ÔEØ*ˆt×*Ñ*ð 
Ø!Ø%ØØñ	
ð
 ñ
ð 	
r@   c             ‹  ór   K  — | j                  ||¬«        | j                  d||||dœ|¤Žƒ d{  –—† S 7 Œ­w)r…   ro   r†   Nr?   )rb   r‡   r   s         rA   Úaevaluate_string_pairsz.PairwiseStringEvaluator.aevaluate_string_pairsL  sU   è ø€ ð( 	×#Ñ#¨i¸uÐ#ÔEØ1T×1Ñ1ð 
Ø!Ø%ØØñ	
ð
 ñ
÷ 
ð 	
ð 
úó   ‚.7°5±7)rr   rf   r‚   rf   r`   rg   ra   rg   rG   r   rJ   r|   )	r'   r(   r)   r*   r   rƒ   r‡   rŠ   rŒ   r?   r@   rA   r~   r~   ø   s8  „ ÙNàð $(Ø#ñð ðð ð	ð
 !ðð ðð ðð 
òó ðð4 $(Ø#ñ
ð ð
ð ð	
ð
 !ð
ð ð
ð ð
ð 
ó
ðF $(Ø#ñ
ð ð
ð ð	
ð
 !ð
ð ð
ð ð
ð 
ó
ðD $(Ø#ñ
ð ð
ð ð	
ð
 !ð
ð ð
ð ð
ð 
ô
r@   r~   c                  ó¼   — e Zd ZdZed	d„«       Zeddœ	 	 	 	 	 	 	 	 	 	 	 d
d„«       Zddœ	 	 	 	 	 	 	 	 	 	 	 d
d„Zddœ	 	 	 	 	 	 	 	 	 	 	 d
d„Z	ddœ	 	 	 	 	 	 	 	 	 	 	 d
d„Z
y)ÚAgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.c                 ó   — y)rU   Tr?   rP   s    rA   rV   z'AgentTrajectoryEvaluator.requires_inputm  s   € ð r@   N)r`   c                ó   — y)á–  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        Nr?   ©rQ   rr   Úagent_trajectoryra   r`   rG   s         rA   Ú_evaluate_agent_trajectoryz3AgentTrajectoryEvaluator._evaluate_agent_trajectoryr  rI   r@   c             ‹  óV   K  — t        d| j                  f||||dœ|¤Žƒ d{  –—† S 7 Œ­w)á¥  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        N)rr   r”   r`   ra   )r   r•   r“   s         rA   Ú_aevaluate_agent_trajectoryz4AgentTrajectoryEvaluator._aevaluate_agent_trajectory‰  sH   è ø€ ô* %ØØ×+Ñ+ð
ð "Ø-ØØñ
ð ñ
÷ 
ð 	
ð 
úrˆ   c               óV   — | j                  ||¬«        | j                  d||||dœ|¤ŽS )r’   ro   ©rr   ra   r”   r`   r?   )rb   r•   r“   s         rA   Úevaluate_agent_trajectoryz2AgentTrajectoryEvaluator.evaluate_agent_trajectory¨  sG   € ð* 	×#Ñ#¨i¸uÐ#ÔEØ.ˆt×.Ñ.ð 
Ø!ØØ-Øñ	
ð
 ñ
ð 	
r@   c             ‹  ór   K  — | j                  ||¬«        | j                  d||||dœ|¤Žƒ d{  –—† S 7 Œ­w)r—   ro   rš   Nr?   )rb   r˜   r“   s         rA   Úaevaluate_agent_trajectoryz3AgentTrajectoryEvaluator.aevaluate_agent_trajectoryÆ  sU   è ø€ ð* 	×#Ñ#¨i¸uÐ#ÔEØ5T×5Ñ5ð 
Ø!ØØ-Øñ	
ð
 ñ
÷ 
ð 	
ð 
úr   rc   )rr   rf   r”   z!Sequence[Tuple[AgentAction, str]]ra   rf   r`   rg   rG   r   rJ   r|   )r'   r(   r)   r*   ri   rV   r   r•   r˜   r›   r   r?   r@   rA   r   r   j  s@  „ Ù6àòó ðð ð $(ñð ðð <ð	ð
 ðð !ðð ðð 
òó ðð8 $(ñ
ð ð
ð <ð	
ð
 ð
ð !ð
ð ð
ð 
ó
ðJ $(ñ
ð ð
ð <ð	
ð
 ð
ð !ð
ð ð
ð 
ó
ðH $(ñ
ð ð
ð <ð	
ð
 ð
ð !ð
ð ð
ð 
ô
r@   r   )#r*   Ú
__future__r   ÚloggingÚabcr   r   Úenumr   Útypingr   r   r	   r
   r   Úwarningsr   Úlangchain_core.agentsr   Úlangchain_core.language_modelsr   Úlangchain_core.runnables.configr   Úlangchain.chains.baser   Ú	getLoggerr'   Úloggerrf   r   rC   rM   rk   r~   r   r?   r@   rA   ú<module>rª      s•   ðÙ 9å "ã ß #Ý ß 8Õ 8Ý å -Ý <Ý ;å 'à	ˆ×	Ñ	˜8Ó	$€ô3JC˜ô 3Jôl25ô 2÷./ñ ./ôbq
n cô q
ôho
˜n¨cô o
ôdx
˜~¨sõ x
r@   