import openai
import json
import re
import requests
from typing import List, Dict, Any, Optional, Tuple

def validate_url(url: str, timeout: int = 5) -> bool:
    """
    Validate if a URL is accessible and returns a 200 OK status.

    Args:
        url (str): The URL to validate
        timeout (int): Request timeout in seconds

    Returns:
        bool: True if URL is valid and accessible, False otherwise
    """
    try:
        response = requests.head(url, timeout=timeout, allow_redirects=True)
        # Accept 200 OK or 3xx redirects that eventually lead to 200
        return response.status_code < 400
    except Exception as e:
        print(f"URL validation failed for {url}: {str(e)}")
        return False

def get_known_citation_url(citation_text: str) -> Optional[str]:
    """
    Map known legal citation patterns to verified URLs.

    Args:
        citation_text (str): The citation text to check

    Returns:
        Optional[str]: A verified URL if a pattern matches, None otherwise
    """
    citation_lower = citation_text.lower()

    # U.S. Code (USC) - Cornell LII
    usc_pattern = r'(\d+)\s+u\.?s\.?c\.?\s+§?\s*(\d+)'
    usc_match = re.search(usc_pattern, citation_lower)
    if usc_match:
        title = usc_match.group(1)
        section = usc_match.group(2)
        return f"https://www.law.cornell.edu/uscode/text/{title}/{section}"

    # Code of Federal Regulations (CFR) - Cornell LII
    cfr_pattern = r'(\d+)\s+c\.?f\.?r\.?\s+§?\s*(\d+)'
    cfr_match = re.search(cfr_pattern, citation_lower)
    if cfr_match:
        title = cfr_match.group(1)
        section = cfr_match.group(2)
        return f"https://www.law.cornell.edu/cfr/text/{title}/{section}"

    # Restatements - No reliable free online source with consistent URLs
    # Removed to prevent 404 errors

    # Federal Rules of Civil Procedure
    if 'fed' in citation_lower and 'r' in citation_lower and 'civ' in citation_lower and 'p' in citation_lower:
        rule_match = re.search(r'rule\s+(\d+)', citation_lower)
        if rule_match:
            rule = rule_match.group(1)
            return f"https://www.law.cornell.edu/rules/frcp/rule_{rule}"
        return "https://www.law.cornell.edu/rules/frcp"

    # Federal Rules of Evidence
    if 'fed' in citation_lower and 'r' in citation_lower and 'evid' in citation_lower:
        rule_match = re.search(r'rule\s+(\d+)', citation_lower)
        if rule_match:
            rule = rule_match.group(1)
            return f"https://www.law.cornell.edu/rules/fre/rule_{rule}"
        return "https://www.law.cornell.edu/rules/fre"

    # Federal Rules of Criminal Procedure
    if 'fed' in citation_lower and 'r' in citation_lower and 'crim' in citation_lower and 'p' in citation_lower:
        rule_match = re.search(r'rule\s+(\d+)', citation_lower)
        if rule_match:
            rule = rule_match.group(1)
            return f"https://www.law.cornell.edu/rules/frcrmp/rule_{rule}"
        return "https://www.law.cornell.edu/rules/frcrmp"

    # U.S. Constitution
    if 'u.s. const' in citation_lower or 'constitution' in citation_lower:
        # Try to extract article or amendment
        article_match = re.search(r'art(?:icle)?\s+([ivx]+)', citation_lower)
        if article_match:
            article = article_match.group(1).upper()
            return f"https://www.law.cornell.edu/constitution/article{article}"

        amend_match = re.search(r'amend(?:ment)?\s+([ivx]+|\d+)', citation_lower)
        if amend_match:
            amendment = amend_match.group(1)
            return f"https://www.law.cornell.edu/constitution/amendment{amendment}"

        return "https://www.law.cornell.edu/constitution"

    # Uniform Commercial Code (UCC)
    # Format: UCC 2-302 -> https://www.law.cornell.edu/ucc/2/2-302
    if 'u.c.c.' in citation_lower or 'ucc' in citation_lower:
        section_match = re.search(r'§?\s*(\d+)-(\d+)', citation_text)
        if section_match:
            article = section_match.group(1)
            section = section_match.group(2)
            return f"https://www.law.cornell.edu/ucc/{article}/{article}-{section}"
        return "https://www.law.cornell.edu/ucc"

    return None

class CitationGenerator:
    def __init__(self, api_key):
        """Initialize the citation generator with the OpenAI API key."""
        self.api_key = api_key
        
    def generate_citations(self, text: str, perspective: str = "neutral") -> List[Dict[str, str]]:
        """
        Generate citations for legal analysis text.
        
        Args:
            text (str): The legal analysis text to add citations to
            perspective (str): The perspective of the analysis (prosecutor, defense, neutral)
            
        Returns:
            List[Dict[str, str]]: A list of citation objects, each with 'text' and optionally 'url' keys
        """
        # Prepare prompt for the OpenAI API
        prompt = self._build_citation_prompt(text, perspective)
        
        # Call the OpenAI API
        try:
            client = openai.OpenAI(api_key=self.api_key, timeout=180.0)
            response = client.chat.completions.create(
                model="gpt-4o",  # Use the model you're already using for analysis
                messages=[
                    {"role": "system", "content": "You are a legal research assistant that identifies relevant legal citations for analysis."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.1,  # Low temperature for more consistent results
                max_tokens=1500,
                response_format={"type": "json_object"}
            )
            
            # Parse the response
            content = response.choices[0].message.content
            result = json.loads(content)

            # Ensure the result follows the expected format
            citations = result.get("citations", [])
            processed_citations = []

            for citation in citations:
                if "text" not in citation:
                    citation["text"] = "Citation details unavailable"

                # Try to get a known pattern URL first
                known_url = get_known_citation_url(citation["text"])

                if known_url:
                    # Validate even known URLs to ensure they work
                    if validate_url(known_url):
                        citation["url"] = known_url
                        print(f"✓ Using validated known URL for citation: {citation['text'][:50]}...")
                        processed_citations.append(citation)
                    else:
                        print(f"✗ Known URL validation failed for citation: {citation['text'][:50]}...")
                        # Don't include a URL, just the text
                        citation.pop("url", None)
                        processed_citations.append(citation)
                elif "url" in citation:
                    # Validate AI-generated URL
                    if validate_url(citation["url"]):
                        print(f"✓ Validated AI URL for citation: {citation['text'][:50]}...")
                        processed_citations.append(citation)
                    else:
                        # Remove invalid URL but keep the citation text
                        print(f"✗ Invalid URL removed for citation: {citation['text'][:50]}...")
                        citation.pop("url", None)
                        processed_citations.append(citation)
                else:
                    # No URL provided by AI
                    processed_citations.append(citation)

            return processed_citations
            
        except Exception as e:
            print(f"Error generating citations: {str(e)}")
            return []
    
    def _build_citation_prompt(self, text: str, perspective: str) -> str:
        """Build the prompt for citation generation."""
        return f"""
        Please analyze the following legal text from a {perspective}'s perspective and identify relevant case law, 
        statutes, legal principles, or scholarly articles that should be cited to support the claims made.

        For each citation, provide:
        1. The full citation text in proper legal citation format
        2. If applicable, a URL to an online resource where this citation can be verified

        Return your response in JSON format with a 'citations' array containing objects with 'text' and optionally 'url' fields.

        Example of expected output format:
        {{
            "citations": [
                {{
                    "text": "Smith v. Jones, 123 F.3d 456 (9th Cir. 2005) (holding that...)",
                    "url": "https://caselaw.findlaw.com/us-9th-circuit/1234567.html"
                }},
                {{
                    "text": "18 U.S.C. § 1343 (Wire Fraud)",
                    "url": "https://www.law.cornell.edu/uscode/text/18/1343"
                }}
            ]
        }}

        Here is the legal text to analyze:
        
        {text}
        """

def _extract_key_terms(citation_text: str) -> List[str]:
    """Extract key terms from citation text for matching in the original document."""
    # Simple implementation - In a production environment, this would be more sophisticated
    terms = []
    
    # Try to extract case names (v. pattern)
    if " v. " in citation_text:
        case_parts = citation_text.split(" v. ")
        if len(case_parts) >= 2:
            case_name = f"{case_parts[0].split()[-1]} v. {case_parts[1].split()[0]}"
            terms.append(case_name)
    
    # Try to extract statute references
    if "U.S.C." in citation_text:
        usc_parts = citation_text.split("U.S.C.")
        if len(usc_parts) >= 2:
            usc_ref = f"U.S.C.{usc_parts[1].split(')')[0]})"
            terms.append(usc_ref)
    
    # Extract other potential key phrases
    words = citation_text.split()
    for i in range(len(words) - 2):
        phrase = " ".join(words[i:i+3])
        if len(phrase) > 10:  # Only consider substantial phrases
            terms.append(phrase)
            
    return terms

def process_analysis_with_citations(analysis_text: str, perspective: str, api_key: str) -> Tuple[str, List[Dict[str, str]]]:
    """
    Process the analysis text and add citations using the OpenAI API.
    
    Args:
        analysis_text (str): The original analysis text
        perspective (str): The perspective of the analysis
        api_key (str): OpenAI API key
        
    Returns:
        Tuple[str, List[Dict[str, str]]]: (processed_text, citations_list)
    """
    try:
        citation_generator = CitationGenerator(api_key)
        citations = citation_generator.generate_citations(analysis_text, perspective)
        
        if not citations:
            return analysis_text, []
        
        # Add citation references to the text
        processed_text = analysis_text
        for i, citation in enumerate(citations, 1):
            # Look for key phrases from the citation in the text and add citation markers
            citation_text = citation["text"]
            
            # Extract key terms (case names, statutes, etc.)
            key_terms = _extract_key_terms(citation_text)
            
            for term in key_terms:
                if term and len(term) > 5 and term in processed_text:
                    # Avoid adding the same citation marker multiple times
                    if f"[{i}]" not in processed_text:
                        # Add citation marker after the first occurrence of the term
                        processed_text = processed_text.replace(term, f"{term} [{i}]", 1)
        
        return processed_text, citations
    except Exception as e:
        print(f"Error processing citations: {str(e)}")
        return analysis_text, []
