#!/usr/bin/env python3
"""
End-to-End tests for Runtime Tools functionality
Tests StructuredTool loading from langchain, API utilities, and code execution
"""

import unittest
from unittest.mock import patch

from cuga.backend.activity_tracker.tracker import ActivityTracker
from cuga.backend.tools_env.registry.utils.api_utils import get_apis, get_apps
from cuga.backend.tools_env.code_sandbox.sandbox import run_code
from cuga.backend.cuga_graph.state.agent_state import AgentState
from cuga.backend.cuga_graph.utils.controller import AgentRunner as CugaAgent
from system_tests.e2e.calculator_tool import (
    tools as calculator_tools,
    evaluate_expression,
    get_pi,
    calculate_factorial,
)
from langchain_core.tools import StructuredTool


# Global tracker instance since ActivityTracker is a singleton
tracker = ActivityTracker()


class TestRuntimeTools(unittest.IsolatedAsyncioTestCase):
    """Test runtime tools functionality including StructuredTool loading, API utils, and code execution"""

    def setUp(self):
        """Set up test environment"""
        self.cuga_agent = None

    async def asyncSetUp(self):
        """Async setup for tests requiring CugaAgent"""
        pass

    def tearDown(self):
        """Clean up after tests"""
        # Clear any tools set in tracker
        if hasattr(tracker, '_tools'):
            tracker._tools.clear()

    def test_structured_tool_loading_from_langchain(self):
        """Test that StructuredTool.from_function works correctly with langchain tools"""
        # Test evaluate_expression tool
        evaluate_tool = StructuredTool.from_function(evaluate_expression)
        self.assertIsInstance(evaluate_tool, StructuredTool)
        self.assertEqual(evaluate_tool.name, "evaluate_expression")
        self.assertIn("expression", evaluate_tool.args_schema.model_fields)

        # Test get_pi tool
        pi_tool = StructuredTool.from_function(get_pi)
        self.assertIsInstance(pi_tool, StructuredTool)
        self.assertEqual(pi_tool.name, "get_pi")

        # Test factorial tool
        factorial_tool = StructuredTool.from_function(calculate_factorial)
        self.assertIsInstance(factorial_tool, StructuredTool)
        self.assertEqual(factorial_tool.name, "calculate_factorial")
        self.assertIn("n", factorial_tool.args_schema.model_fields)

        # Test that all calculator tools are properly created
        self.assertEqual(len(calculator_tools), 3)
        tool_names = [tool.name for tool in calculator_tools]
        self.assertIn("evaluate_expression", tool_names)
        self.assertIn("get_pi", tool_names)
        self.assertIn("calculate_factorial", tool_names)

    def test_calculator_tool_functionality(self):
        """Test the calculator tool functions work correctly"""
        # Test evaluate_expression
        result = evaluate_expression("2 + 3 * 4")
        self.assertTrue(result.success)
        self.assertEqual(result.result, 14.0)
        self.assertEqual(result.expression, "2 + 3 * 4")

        # Test with math functions
        result = evaluate_expression("sin(pi/2)")
        self.assertTrue(result.success)
        self.assertAlmostEqual(result.result, 1.0, places=5)

        # Test error handling
        result = evaluate_expression("invalid syntax +++")
        self.assertFalse(result.success)
        self.assertIn("error_message", result.model_dump())

        # Test get_pi
        result = get_pi()
        self.assertAlmostEqual(result.pi_value, 3.141592653589793, places=10)

        # Test factorial
        result = calculate_factorial(5)
        self.assertTrue(result.success)
        self.assertEqual(result.result, 120)
        self.assertEqual(result.n, 5)

        # Test factorial error
        result = calculate_factorial(-1)
        self.assertFalse(result.success)
        self.assertIn("error_message", result.model_dump())

    async def test_get_apis_with_runtime_tools(self):
        """Test get_apis function with runtime tools setup"""
        # Set metadata similar to main.py example
        for tool in calculator_tools:
            tool.metadata = {'server_name': "calculator"}

        tracker.set_tools(calculator_tools)

        # Test get_apis when registry is disabled (using external tools)
        # with patch('cuga.config.settings.advanced_features.registry', False):
        result = await get_apis("calculator")
        self.assertIsInstance(result, dict)
        self.assertEqual(len(result), 3)
        tool_names = list(result.keys())
        self.assertIn("evaluate_expression", tool_names)
        self.assertIn("get_pi", tool_names)
        self.assertIn("calculate_factorial", tool_names)

    async def test_get_apps_with_runtime_tools(self):
        """Test get_apps function with runtime tools setup"""
        # Set metadata similar to main.py example
        for tool in calculator_tools:
            tool.metadata = {'server_name': "calculator"}

        tracker.set_tools(calculator_tools)

        # Test get_apps when registry is disabled (using external apps)
        with patch('cuga.config.settings.advanced_features.registry', False):
            result = await get_apps()
            self.assertIsInstance(result, list)
            app_names = [app.name for app in result]
            # Should include apps from tools
            self.assertIn("calculator", app_names)

    async def test_run_code_with_api_calls(self):
        """Test run_code functionality with code that makes API calls"""
        for tool in calculator_tools:
            tool.metadata = {'server_name': "calculator"}

        tracker.set_tools(calculator_tools)
        # Create code that uses call_api to invoke calculator functions
        code = '''
# Test evaluate expression via API
result1 = await call_api("calculator", "evaluate_expression", {"expression": "10 + 5"})
print(f"10 + 5 = {result1['result']}")

# Test get pi via API
pi_result = await call_api("calculator", "get_pi", {})
print(f"Pi = {pi_result}")

# Test factorial via API
fact_result = await call_api("calculator", "calculate_factorial", {"n": 4})
print(f"4! = {fact_result['result']}")

# Test API-style calls with different expressions
api_result1 = await call_api("calculator", "evaluate_expression", {"expression": "2 * 3 + 4"})
print(f"API call result: {api_result1['result']}")

api_result2 = await call_api("calculator", "calculate_factorial", {"n": 3})
print(f"API factorial result: {api_result2['result']}")
'''

        # Run the code in sandbox
        state = AgentState(input="test", url="")
        output, locals_dict = await run_code(code, state)
        print(output)
        # Verify output contains expected results
        self.assertIn("10 + 5 = 15.0", output)
        self.assertIn("Pi =", output)
        self.assertIn("4! = 24", output)
        self.assertIn("API call result: 10.0", output)
        self.assertIn("API factorial result: 6", output)

    async def test_full_runtime_tools_workflow(self):
        """Test the full workflow similar to main.py example"""
        # Initialize CugaAgent similar to main.py
        cuga_agent = CugaAgent(browser_enabled=False)
        await cuga_agent.initialize_appworld_env()

        # Set up tools similar to main.py lines 27-33
        tools = calculator_tools
        for tool in tools:
            tool.metadata = {'server_name': "calculator"}
        tracker.set_tools(tools)

        # Verify tools are registered
        registered_tools = tracker.get_tools_by_server("calculator")
        self.assertEqual(len(registered_tools), 3)

        tool_names = list(registered_tools.keys())
        self.assertIn("evaluate_expression", tool_names)
        self.assertIn("get_pi", tool_names)
        self.assertIn("calculate_factorial", tool_names)

        # Verify apps are created
        self.assertGreater(len(tracker.apps), 0)
        app_names = [app.name for app in tracker.apps]
        self.assertIn("calculator", app_names)

    def test_langchain_tool_metadata(self):
        """Test that langchain tools have proper metadata and descriptions"""
        for tool in calculator_tools:
            # Check tool has name
            self.assertIsNotNone(tool.name)
            self.assertIsInstance(tool.name, str)

            # Check tool has description
            self.assertIsNotNone(tool.description)
            self.assertIsInstance(tool.description, str)

            # Check tool has args_schema
            self.assertIsNotNone(tool.args_schema)

            # Verify metadata can be set (as done in main.py)
            tool.metadata = {'server_name': "calculator"}
            self.assertEqual(tool.metadata['server_name'], "calculator")

    async def test_asyncio_run_from_running_loop_fixed(self):
        """Test that run_code works correctly even when called from an async context

        This test runs in an async context (note the 'async def' above).
        The fix detects the running event loop and creates a new loop in a separate thread,
        avoiding the 'asyncio.run() cannot be called from a running event loop' error.
        Tests multiple consecutive calls to ensure the fix is robust.
        """
        for tool in calculator_tools:
            tool.metadata = {'server_name': "calculator"}

        tracker.set_tools(calculator_tools)

        # First code execution
        code1 = '''
result = await call_api("calculator", "evaluate_expression", {"expression": "5 + 3"})
print(f"Result 1: {result['result']}")
'''

        state = AgentState(input="test", url="")
        output1, locals_dict1 = await run_code(code1, state)
        print(f"First Output: {output1}")

        # Verify first execution worked correctly
        self.assertNotIn("asyncio.run() cannot be called from a running event loop", output1)
        self.assertIn("Result 1: 8.0", output1)

        # Second consecutive code execution
        code2 = '''
factorial_result = await call_api("calculator", "calculate_factorial", {"n": 5})
print(f"Result 2: {factorial_result['result']}")
'''

        output2, locals_dict2 = await run_code(code2, state)
        print(f"Second Output: {output2}")

        # Verify second execution also worked correctly
        self.assertNotIn("asyncio.run() cannot be called from a running event loop", output2)
        self.assertIn("Result 2: 120", output2)


if __name__ == "__main__":
    # Run tests
    unittest.main()