import unittest
import re
from langchain_core.messages import HumanMessage
from cuga.backend.llm.models import LLMManager
from cuga.config import settings


class TestLongOutput(unittest.IsolatedAsyncioTestCase):
    """
    Test class for verifying that LLM can generate long outputs (at least 1600 tokens).
    This tests that max_tokens is properly set and working using LLMManager directly.
    """

    async def test_long_llm_output(self):
        """Test that LLM can generate outputs of at least 1600 tokens."""
        # Get model configuration
        model_config = settings.agent.code.model.copy()

        # Verify max_tokens is set correctly (should be 16000 for Groq)
        max_tokens_config = getattr(model_config, 'max_tokens', None)
        self.assertIsNotNone(max_tokens_config, "max_tokens not found in model configuration")
        self.assertGreater(
            max_tokens_config, 1000, f"max_tokens too low: {max_tokens_config}, should be > 1000"
        )
        self.assertNotEqual(
            max_tokens_config,
            1000,
            "max_tokens is still set to default 1000 - this indicates the fix didn't work",
        )

        print(f"\n=== Testing Long Output with max_tokens={max_tokens_config} ===")

        # Initialize LLM manager and get model
        llm_manager = LLMManager()
        model = llm_manager.get_model(model_config)

        # Verify model has correct max_tokens set
        model_max_tokens = getattr(model, 'max_tokens', None)
        if model_max_tokens:
            print(f"Model max_tokens attribute: {model_max_tokens}")
            # Note: Some models may store this in model_kwargs instead

        # Create a prompt that should generate a very long response
        prompt = (
            "Write a comprehensive, detailed analysis of artificial intelligence, "
            "covering its history from the 1950s to present day, major breakthroughs, "
            "current state-of-the-art techniques, ethical considerations, future implications, "
            "and potential societal impacts. Include specific examples, technical details, "
            "and references to key researchers and organizations. Make this analysis "
            "as thorough and detailed as possible, aiming for at least 2000 words. "
            "Be very detailed and comprehensive in your response."
        )

        print("Sending prompt to LLM...")

        try:
            # Call the LLM directly
            messages = [HumanMessage(content=prompt)]
            response = await model.ainvoke(messages)

            # Extract the response text
            if hasattr(response, 'content'):
                answer_text = response.content
            else:
                answer_text = str(response)

            self.assertIsNotNone(answer_text, "Response is None")
            self.assertNotEqual(answer_text.strip(), "", "Response is empty")

            print(f"Response length: {len(answer_text)} characters")

            # Count approximate tokens
            # More accurate: count words (rough approximation)
            words = re.findall(r'\b\w+\b', answer_text)
            approx_tokens = len(words)

            # Also estimate based on characters (1 token ≈ 4 chars for English)
            char_based_estimate = len(answer_text) // 4

            print(f"Approximate token count (word-based): {approx_tokens}")
            print(f"Approximate token count (char-based): {char_based_estimate}")
            print(f"Acccurate token count: {response.response_metadata}")
            # Use the higher estimate to be conservative
            final_estimate = max(approx_tokens, char_based_estimate)

            # Assert that we have at least 1600 tokens worth of content
            self.assertGreaterEqual(
                final_estimate,
                1600,
                f"Response too short: {final_estimate} tokens (estimated), expected at least 1600. "
                f"This suggests max_tokens may not be set correctly. "
                f"Config max_tokens={max_tokens_config}, Model max_tokens={model_max_tokens}",
            )

            print(f"✅ Response meets minimum length requirement: {final_estimate} tokens (estimated)")

            # Check if response appears truncated
            truncated_indicators = [
                "...",
                "truncated",
                "cut off",
                "incomplete",
                "continues",
                "to be continued",
            ]

            lower_answer = answer_text.lower()
            has_truncation_indicator = any(
                indicator in lower_answer[-200:] for indicator in truncated_indicators
            )

            if has_truncation_indicator and final_estimate < 2000:
                print("⚠️  Response may be truncated (found truncation indicators)")
            else:
                print("✅ Response appears complete")

            # Print a sample of the response
            print("\n--- Response Sample (first 500 chars) ---")
            print(answer_text[:500] + "..." if len(answer_text) > 500 else answer_text)

        except Exception as e:
            self.fail(f"Test failed with exception: {e}")

    def test_max_tokens_from_config(self):
        """Test that max_tokens is correctly read from configuration."""
        # Get the current model configuration
        model_config = settings.agent.code.model

        # Verify max_tokens is set and is a reasonable value
        max_tokens = getattr(model_config, 'max_tokens', None)
        self.assertIsNotNone(max_tokens, "max_tokens not found in model configuration")
        self.assertGreater(max_tokens, 1000, f"max_tokens too low: {max_tokens}, should be > 1000")

        print(f"✅ Model configuration has max_tokens = {max_tokens}")

        # Verify it's not the default 1000 that was causing the issue
        self.assertNotEqual(
            max_tokens, 1000, "max_tokens is still set to default 1000 - this indicates the fix didn't work"
        )

        # Verify LLMManager extracts it correctly
        llm_manager = LLMManager()
        model_config_copy = model_config.copy()

        # This should not raise an assertion error
        try:
            llm_manager.get_model(model_config_copy)
            print(f"✅ LLMManager.get_model() successfully used max_tokens={max_tokens} from config")
        except AssertionError as e:
            if "max_tokens must be specified" in str(e):
                self.fail(f"LLMManager failed to extract max_tokens from config: {e}")
            raise


if __name__ == "__main__":
    unittest.main()