
    A'hE*                       d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZmZ ddlmZ ddZddZ G d deee      Z G d deee      Z  G d de       Z!y)z-LLM Chains for evaluating question answering.    )annotationsN)Sequence)AnyOptional)	Callbacks)BaseLanguageModel)PromptTemplate)
ConfigDict)LLMChain)CONTEXT_PROMPT
COT_PROMPTPROMPT)LLMEvalChainStringEvaluator)RUN_KEYc                   t        j                  d| j                         t         j                        }|rF|j	                  d      j                         dk(  ry|j	                  d      j                         dk(  ry	 | j                         j                         d   j                  t        j                  ddt        j                              }|j                         dk(  ry|j                         dk(  ry| j                         j                         d	   j                  t        j                  ddt        j                              }|j                         dk(  ry|j                         dk(  ry	 y # t        $ r Y y w xY w)
Nzgrade:\s*(correct|incorrect)   CORRECT)r   r   	INCORRECT)r   r   r    )researchstrip
IGNORECASEgroupuppersplit	translatestr	maketransstringpunctuation
IndexError)textmatch
first_word	last_words       i/home/kushmeetdev/Regenta/Chatbot/venv/lib/python3.12/site-packages/langchain/evaluation/qa/eval_chain.py
_get_scorer*      sC   II5tzz|R]]SE;;q>!Y.[[^!!#{2!JJL #--cmmBFDVDV.WX 	 *;.!JJLUWRYs}}RV-?-?@A 	
 ??	)__+-! .   s&   =A&E5 $E5 8A&E5 E5 5	F Fc                ^    | j                         }t        |      }|d\  }}n|\  }}|||dS )zParse the output text.

    Args:
        text (str): The output text to parse.

    Returns:
        Any: The parsed output.
    )NN)	reasoningvaluescore)r   r*   )r%   r,   parsed_scoresr-   r.   s        r)   _parse_string_eval_outputr0   2   sD     

Iy)M!u$u     c                  2   e Zd ZU dZdZded<    ed      Zedd       Z	e
dd       Ze
dd	       Ze
dd
       Ze	 d	 	 	 	 	 	 	 dd       Z	 	 	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddddd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddddd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZy)QAEvalChainz,LLM Chain for evaluating question answering.resultsr    
output_keyignoreextrac                     yNF clss    r)   is_lc_serializablezQAEvalChain.is_lc_serializableQ       r1   c                     y)Ncorrectnessr;   selfs    r)   evaluation_namezQAEvalChain.evaluation_nameU   s    r1   c                     yNTr;   rB   s    r)   requires_referencezQAEvalChain.requires_referenceY       r1   c                     yrF   r;   rB   s    r)   requires_inputzQAEvalChain.requires_input]   rH   r1   Nc                    |xs t         }h d}|t        |j                        k7  rt        d| d|j                          | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'input', 'answer' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            QAEvalChain: the loaded QA eval chain.
        >   queryanswerresultInput variables should be 
, but got llmpromptr;   )r   setinput_variables
ValueError)r=   rR   rS   kwargsexpected_input_varss        r)   from_llmzQAEvalChain.from_llma   sk    , !6;#f&<&<"==,-@,A B!1124  4s64V44r1   	callbacksc                   t        |      D cg c]  \  }}||   ||   ||   |   d }	}}| j                  |	|      S c c}}w )5Evaluate question answering examples and predictions.rL   rM   rN   rZ   	enumerateapply)
rC   examplespredictionsquestion_key
answer_keyprediction_keyr[   iexampleinputss
             r)   evaluatezQAEvalChain.evaluate   sd    " (1
 7	 !.!*-%a.8
 
 zz&Iz66
   Ac                f    t        || j                           }t        |v r|t           |t        <   |S Nr0   r5   r   rC   rN   parsed_results      r)   _prepare_outputzQAEvalChain._prepare_output   1    1&2IJf%+G_M'"r1   F	referenceinputr[   include_run_infoc               B     | |||d||      }| j                  |      S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): the LLM or chain prediction to evaluate.
            reference (Optional[str], optional): the reference label
                to evaluate against.
            input (Optional[str], optional): the input to consider during evaluation
            callbacks (Callbacks, optional): the callbacks to use for tracing.
            include_run_info (bool, optional): whether to include run info in the
                returned results.
            **kwargs: additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r^   r[   rv   rq   rC   
predictionrt   ru   r[   rv   rW   rN   s           r)   _evaluate_stringszQAEvalChain._evaluate_strings   s7    0 #$
  -
 ##F++r1   c               p   K   | j                  |||d||       d {   }| j                  |      S 7 w)Nr^   ri   r[   rv   acallrq   rz   s           r)   _aevaluate_stringszQAEvalChain._aevaluate_strings   sH      zz"i:N- " 
 

 ##F++
   646returnboolr   r    rm   )rR   r   rS   Optional[PromptTemplate]rW   r   r   r3   r^   )rb   Sequence[dict]rc   r   rd   r    re   r    rf   r    r[   r   r   
list[dict]rN   dictr   r   r{   r    rt   Optional[str]ru   r   r[   r   rv   r   rW   r   r   r   )__name__
__module____qualname____doc__r5   __annotations__r
   model_configclassmethodr>   propertyrD   rG   rJ   rY   rj   rq   r|   r   r;   r1   r)   r3   r3   H   s   6JL          ,055 )5 	5
 
5 5D $"&7  $7 7 $7 	7
 7 7 7 
7, $(##!&!, !, !	!,
 !, !, !, !, 
!,N $(##!&, , !	,
 , , , , 
,r1   r3   c                  4   e Zd ZdZedd       Zedd       Zedd       Z e	d      Z
edd       Zedd       Ze	 d	 	 	 	 	 	 	 dd
       Z	 	 	 dd	d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZd	d	d	dd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZd	d	d	dd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZy	)ContextQAEvalChainz3LLM Chain for evaluating QA w/o GT based on contextc                     yr:   r;   r<   s    r)   r>   z%ContextQAEvalChain.is_lc_serializable   r?   r1   c                     y)z.Whether the chain requires a reference string.Tr;   rB   s    r)   rG   z%ContextQAEvalChain.requires_reference        r1   c                     y)z+Whether the chain requires an input string.Tr;   rB   s    r)   rJ   z!ContextQAEvalChain.requires_input   r   r1   r6   r7   c                r    h d}|t        |j                        k7  rt        d| d|j                         y )N>   rL   rN   contextrO   rP   )rT   rU   rV   )r=   rS   rX   s      r)   _validate_input_varsz'ContextQAEvalChain._validate_input_vars   sM    <#f&<&<"==,-@,A B!1124  >r1   c                     y)NzContextual Accuracyr;   rB   s    r)   rD   z"ContextQAEvalChain.evaluation_name   s    $r1   Nc                N    |xs t         }| j                  |        | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'query', 'context' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            ContextQAEvalChain: the loaded QA eval chain.
        rQ   r;   )r   r   r=   rR   rS   rW   s       r)   rY   zContextQAEvalChain.from_llm   s1    , )>  (4s64V44r1   rZ   c                   t        |      D cg c]  \  }}||   ||   ||   |   d }	}}| j                  |	|      S c c}}w )r]   rL   r   rN   rZ   r_   )
rC   rb   rc   rd   context_keyrf   r[   rg   rh   ri   s
             r)   rj   zContextQAEvalChain.evaluate  sd    " (1
 7	 !.";/%a.8
 
 zz&Iz66
rk   c                f    t        || j                           }t        |v r|t           |t        <   |S rm   rn   ro   s      r)   rq   z"ContextQAEvalChain._prepare_output#  rr   r1   Frs   c               B     | |||d||      }| j                  |      S )Nr   rx   ry   rz   s           r)   r|   z$ContextQAEvalChain._evaluate_strings)  s7     $$
  -
 ##F++r1   c               p   K   | j                  |||d||       d {   }| j                  |      S 7 w)Nr   r~   r   rz   s           r)   r   z%ContextQAEvalChain._aevaluate_strings>  sH      zz"yJO- " 
 

 ##F++
r   r   )rS   r	   r   Noner   rm   )rR   r   rS   r   rW   r   r   r   r   )rb   r   rc   r   rd   r    r   r    rf   r    r[   r   r   r   r   r   )r   r   r   r   r   r>   r   rG   rJ   r
   r   r   rD   rY   rj   rq   r|   r   r;   r1   r)   r   r      s   =      L   % %  ,055 )5 	5
 
5 5: $$&7  $77  7 	7
 7 7 7 
7, $(##!&, , !	,
 , , , , 
,2 $(##!&, , !	,
 , , , , 
,r1   r   c                  X    e Zd ZdZedd       Zedd       Ze	 d	 	 	 	 	 	 	 d	d       Zy)
CotQAEvalChainz=LLM Chain for evaluating QA using chain of thought reasoning.c                     yr:   r;   r<   s    r)   r>   z!CotQAEvalChain.is_lc_serializableS  r?   r1   c                     y)NzCOT Contextual Accuracyr;   rB   s    r)   rD   zCotQAEvalChain.evaluation_nameW  s    (r1   Nc                N    |xs t         }| j                  |        | d||d|S )zLoad QA Eval Chain from LLM.rQ   r;   )r   r   r   s       r)   rY   zCotQAEvalChain.from_llm[  s1     %:  (4s64V44r1   r   r   rm   )rR   r   rS   r   rW   r   r   r   )	r   r   r   r   r   r>   r   rD   rY   r;   r1   r)   r   r   P  sj    G  ) )  ,0	5	5 )	5 		5
 
	5 	5r1   r   )r%   r    r   zOptional[tuple[str, int]])r%   r    r   r   )"r   
__future__r   r   r"   collections.abcr   typingr   r    langchain_core.callbacks.managerr   langchain_core.language_modelsr   langchain_core.promptsr	   pydanticr
   langchain.chains.llmr   #langchain.evaluation.qa.eval_promptr   r   r   langchain.evaluation.schemar   r   langchain.schemar   r*   r0   r3   r   r   r;   r1   r)   <module>r      so    3 " 	  $   6 < 1  ) R R E $:,F,(O\ F,R|,?L |,~5' 5r1   