o
    ’òçhQ.  ã                   @   sÀ   d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	 dde
dedefdd„Zd	e
dee
 fd
d„ZG dd„ dƒZd	e
dee
 fdd„Zde
de
de
de	e
eee
e
f  f fdd„ZdS )é    N)ÚListÚDictÚAnyÚOptionalÚTupleé   ÚurlÚtimeoutÚreturnc              
   C   s\   zt j| |dd}|jdk W S  ty- } ztd| › dt|ƒ› ƒ W Y d}~dS d}~ww )zû
    Validate if a URL is accessible and returns a 200 OK status.

    Args:
        url (str): The URL to validate
        timeout (int): Request timeout in seconds

    Returns:
        bool: True if URL is valid and accessible, False otherwise
    T)r	   Úallow_redirectsi  zURL validation failed for z: NF)ÚrequestsÚheadÚstatus_codeÚ	ExceptionÚprintÚstr)r   r	   ÚresponseÚe© r   ú%/var/www/lawbot/citation_generator.pyÚvalidate_url   s   €þr   Úcitation_textc                 C   sú  |   ¡ }d}t ||¡}|r | d¡}| d¡}d|› d|› S d}t ||¡}|r<| d¡}| d¡}d|› d|› S d|v r`d	|v r`d
|v r`d|v r`t d|¡}|r^| d¡}	d|	› S dS d|v r€d	|v r€d|v r€t d|¡}|r~| d¡}	d|	› S dS d|v r¤d	|v r¤d|v r¤d|v r¤t d|¡}|r¢| d¡}	d|	› S dS d|v s¬d|v rÔt d|¡}
|
rÀ|
 d¡ ¡ }d|› S t d|¡}|rÒ| d¡}d|› S dS d|v sÜd|v rût d| ¡}|rù| d¡}| d¡}d|› d|› d |› S d!S d"S )#zÙ
    Map known legal citation patterns to verified URLs.

    Args:
        citation_text (str): The citation text to check

    Returns:
        Optional[str]: A verified URL if a pattern matches, None otherwise
    u"   (\d+)\s+u\.?s\.?c\.?\s+Â§?\s*(\d+)é   é   z(https://www.law.cornell.edu/uscode/text/ú/u"   (\d+)\s+c\.?f\.?r\.?\s+Â§?\s*(\d+)z%https://www.law.cornell.edu/cfr/text/ÚfedÚrÚcivÚpzrule\s+(\d+)z,https://www.law.cornell.edu/rules/frcp/rule_z&https://www.law.cornell.edu/rules/frcpÚevidz+https://www.law.cornell.edu/rules/fre/rule_z%https://www.law.cornell.edu/rules/freÚcrimz.https://www.law.cornell.edu/rules/frcrmp/rule_z(https://www.law.cornell.edu/rules/frcrmpz
u.s. constÚconstitutionzart(?:icle)?\s+([ivx]+)z0https://www.law.cornell.edu/constitution/articlezamend(?:ment)?\s+([ivx]+|\d+)z2https://www.law.cornell.edu/constitution/amendmentz(https://www.law.cornell.edu/constitutionzu.c.c.Úuccu   Â§?\s*(\d+)-(\d+)z https://www.law.cornell.edu/ucc/ú-zhttps://www.law.cornell.edu/uccN)ÚlowerÚreÚsearchÚgroupÚupper)r   Úcitation_lowerÚusc_patternÚ	usc_matchÚtitleÚsectionÚcfr_patternÚ	cfr_matchÚ
rule_matchÚruleÚarticle_matchÚarticleÚamend_matchÚ	amendmentÚsection_matchr   r   r   Úget_known_citation_url   sb   




 



 






r7   c                
   @   sN   e Zd Zdd„ Zddededeeeef  fdd„Zdededefd	d
„ZdS )ÚCitationGeneratorc                 C   s
   || _ dS )z:Initialize the citation generator with the OpenAI API key.N)Úapi_key)Úselfr9   r   r   r   Ú__init__m   s   
zCitationGenerator.__init__ÚneutralÚtextÚperspectiver
   c              
   C   sÂ  |   ||¡}z½tj| jdd}|jjjddddœd|dœgdd	d
did}|jd jj	}t
 |¡}| dg ¡}g }	|D ]…}
d|
vrEd|
d< t|
d ƒ}|rƒt|ƒri||
d< td|
d dd… › dƒ |	 |
¡ q;td|
d dd… › dƒ |
 dd¡ |	 |
¡ q;d|
v r»t|
d ƒr¡td|
d dd… › dƒ |	 |
¡ q;td|
d dd… › dƒ |
 dd¡ |	 |
¡ q;|	 |
¡ q;|	W S  tyà } ztdt|ƒ› ƒ g W  Y d}~S d}~ww )a}  
        Generate citations for legal analysis text.
        
        Args:
            text (str): The legal analysis text to add citations to
            perspective (str): The perspective of the analysis (prosecutor, defense, neutral)
            
        Returns:
            List[Dict[str, str]]: A list of citation objects, each with 'text' and optionally 'url' keys
        g     €f@)r9   r	   zgpt-4oÚsystemzYYou are a legal research assistant that identifies relevant legal citations for analysis.)ÚroleÚcontentÚusergš™™™™™¹?iÜ  ÚtypeÚjson_object)ÚmodelÚmessagesÚtemperatureÚ
max_tokensÚresponse_formatr   Ú	citationsr=   zCitation details unavailabler   u,   âœ“ Using validated known URL for citation: Né2   z...u.   âœ— Known URL validation failed for citation: u#   âœ“ Validated AI URL for citation: u&   âœ— Invalid URL removed for citation: zError generating citations: )Ú_build_citation_promptÚopenaiÚOpenAIr9   ÚchatÚcompletionsÚcreateÚchoicesÚmessagerA   ÚjsonÚloadsÚgetr7   r   r   ÚappendÚpopr   r   )r:   r=   r>   ÚpromptÚclientr   rA   ÚresultrJ   Úprocessed_citationsÚcitationÚ	known_urlr   r   r   r   Úgenerate_citationsq   sT   þø
€þz$CitationGenerator.generate_citationsc                 C   s   d|› d|› dS )z)Build the prompt for citation generation.z8
        Please analyze the following legal text from a u  's perspective and identify relevant case law, 
        statutes, legal principles, or scholarly articles that should be cited to support the claims made.

        For each citation, provide:
        1. The full citation text in proper legal citation format
        2. If applicable, a URL to an online resource where this citation can be verified

        Return your response in JSON format with a 'citations' array containing objects with 'text' and optionally 'url' fields.

        Example of expected output format:
        {
            "citations": [
                {
                    "text": "Smith v. Jones, 123 F.3d 456 (9th Cir. 2005) (holding that...)",
                    "url": "https://caselaw.findlaw.com/us-9th-circuit/1234567.html"
                },
                {
                    "text": "18 U.S.C. Â§ 1343 (Wire Fraud)",
                    "url": "https://www.law.cornell.edu/uscode/text/18/1343"
                }
            ]
        }

        Here is the legal text to analyze:
        
        z	
        r   )r:   r=   r>   r   r   r   rL   »   s
   ÿæz(CitationGenerator._build_citation_promptN)r<   )	Ú__name__Ú
__module__Ú__qualname__r;   r   r   r   r_   rL   r   r   r   r   r8   l   s    $Jr8   c           	      C   sâ   g }d| v r)|   d¡}t|ƒdkr)|d   ¡ d › d|d   ¡ d › }| |¡ d| v rJ|   d¡}t|ƒdkrJd|d   d¡d › d}| |¡ |   ¡ }tt|ƒd ƒD ]}d |||d	 … ¡}t|ƒd
krn| |¡ qV|S )zKExtract key terms from citation text for matching in the original document.z v. r   r   éÿÿÿÿr   zU.S.C.ú)ú é   é
   )ÚsplitÚlenrW   ÚrangeÚjoin)	r   ÚtermsÚ
case_partsÚ	case_nameÚ	usc_partsÚusc_refÚwordsÚiÚphraser   r   r   Ú_extract_key_termsÚ   s$   
&



€rt   Úanalysis_textr>   r9   c              
   C   sà   zPt |ƒ}| | |¡}|s| g fW S | }t|dƒD ]2\}}|d }t|ƒ}	|	D ]#}
|
rJt|
ƒdkrJ|
|v rJd|› d|vrJ| |
|
› d|› dd¡}q'q||fW S  tyo } ztdt|ƒ› ƒ | g fW  Y d}~S d}~ww )	aT  
    Process the analysis text and add citations using the OpenAI API.
    
    Args:
        analysis_text (str): The original analysis text
        perspective (str): The perspective of the analysis
        api_key (str): OpenAI API key
        
    Returns:
        Tuple[str, List[Dict[str, str]]]: (processed_text, citations_list)
    r   r=   r   ú[ú]z [zError processing citations: N)	r8   r_   Ú	enumeratert   ri   Úreplacer   r   r   )ru   r>   r9   Úcitation_generatorrJ   Úprocessed_textrr   r]   r   Ú	key_termsÚtermr   r   r   r   Úprocess_analysis_with_citationsö   s*   
€û
€þr~   )r   )rM   rT   r%   r   Útypingr   r   r   r   r   r   ÚintÚboolr   r7   r8   rt   r~   r   r   r   r   Ú<module>   s    Rn2