#!/usr/bin/env python3
"""
Backfill content_text for existing documents
"""
from app import app
from models import db, CaseDocument
from law_library_routes import extract_text_from_file
import os

with app.app_context():
    # Find the specific document
    doc = CaseDocument.query.filter(
        CaseDocument.original_filename.like('%Enhancing_Network_Security%')
    ).first()

    if doc:
        print(f"Found document: {doc.original_filename} (ID: {doc.id})")
        print(f"  File path: {doc.file_path}")
        print(f"  Current content_text: {'SET' if doc.content_text else 'NULL'}")

        if os.path.exists(doc.file_path):
            # Extract file extension
            file_extension = doc.original_filename.rsplit('.', 1)[1].lower() if '.' in doc.original_filename else ''

            print(f"  File extension: {file_extension}")
            print(f"  Extracting text...")

            try:
                content_text = extract_text_from_file(doc.file_path, file_extension)
                doc.content_text = content_text
                db.session.commit()

                print(f"  ✅ Extracted {len(content_text)} characters")
                print(f"  Searching for 'vulnerabilities'...")

                count = content_text.lower().count('vulnerabilities')
                print(f"  Found {count} mentions of 'vulnerabilities'")

                if count > 0:
                    # Show first mention
                    pos = content_text.lower().find('vulnerabilities')
                    snippet = content_text[max(0, pos-100):pos+100]
                    print(f"  First snippet: ...{snippet}...")

            except Exception as e:
                print(f"  ❌ Error extracting text: {e}")
        else:
            print(f"  ❌ File not found at: {doc.file_path}")
    else:
        print("Document not found")