Spaces:

davidlms
/

aphra

Running

App Files Files Community

David commited on Nov 1

Commit

1aa6f63

1 Parent(s): 22dea1c

new version working

Browse files

Files changed (31) hide show

.gitignore +30 -0
README.md +33 -2
aphra/__init__.py +4 -7
aphra/core/__init__.py +35 -0
aphra/core/config.py +106 -0
aphra/core/context.py +64 -0
aphra/{llm_client.py → core/llm_client.py} +34 -13
aphra/core/parsers.py +125 -0
aphra/core/prompts.py +82 -0
aphra/core/registry.py +167 -0
aphra/core/workflow.py +97 -0
aphra/parsers.py +0 -59
aphra/prompts.py +0 -22
aphra/translate.py +23 -115
aphra/workflows/__init__.py +152 -0
aphra/workflows/short_article/__init__.py +10 -0
aphra/workflows/short_article/aux/__init__.py +10 -0
aphra/workflows/short_article/aux/parsers.py +75 -0
aphra/workflows/short_article/config/default.toml +7 -0
aphra/{prompts → workflows/short_article/prompts}/step1_system.txt +0 -0
aphra/{prompts → workflows/short_article/prompts}/step1_user.txt +0 -0
aphra/{prompts → workflows/short_article/prompts}/step2_system.txt +0 -0
aphra/{prompts → workflows/short_article/prompts}/step2_user.txt +0 -0
aphra/{prompts → workflows/short_article/prompts}/step3_system.txt +0 -0
aphra/{prompts → workflows/short_article/prompts}/step3_user.txt +0 -0
aphra/{prompts → workflows/short_article/prompts}/step4_system.txt +0 -0
aphra/{prompts → workflows/short_article/prompts}/step4_user.txt +0 -0
aphra/{prompts → workflows/short_article/prompts}/step5_system.txt +0 -0
aphra/{prompts → workflows/short_article/prompts}/step5_user.txt +0 -0
aphra/workflows/short_article/short_article_workflow.py +325 -0
app.py +105 -16

.gitignore ADDED Viewed

	@@ -0,0 +1,30 @@

+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+*.log
+.DS_Store
+config.toml

README.md CHANGED Viewed

@@ -10,6 +10,37 @@ pinned: true
 license: mit
 ---
-🌐💬 Aphra is an open-source translation agent designed to enhance the quality of text translations by leveraging large language models (LLMs). Unlike traditional translation tools that rely solely on direct translations, Aphra introduces a multi-stage, context-aware process that includes glossary creation, contextual search, critique, and refinement. This approach aims to produce translations that not only retain the original meaning but also incorporate translator notes, contextual adjustments, and stylistic improvements. Whether you're translating blog posts, articles, or complex documents, Aphra ensures a more nuanced and accurate translation that respects the original content's integrity.
-> **Important Note:** 🌐💬 Aphra is not intended to replace the work of a professional translator. Instead, it aims to facilitate multilingual support in small projects where hiring a professional translator may not be feasible. Aphra offers a practical solution for achieving quality translations in contexts where a fully professional translation service is out of scope, ensuring that language barriers do not hinder the global reach of your content.

 license: mit
 ---
+# 🌐💬 Aphra - Open-Source Translation Agent
+🌐💬 Aphra is an open-source translation agent with a workflow architecture designed to enhance the quality of text translations by leveraging large language models (LLMs). Unlike traditional translation tools that rely solely on direct translations, Aphra introduces a multi-stage, context-aware process that includes glossary creation, contextual search, critique, and refinement. This approach aims to produce translations that not only retain the original meaning but also incorporate translator notes, contextual adjustments, and stylistic improvements.
+## Features
+- **Multi-stage Translation Process**: Analysis → Search → Translation → Critique → Refinement
+- **Context-Aware**: Uses web search to gather contextual information about key terms
+- **Flexible Model Selection**: Choose different LLMs for writing, searching, and critique
+- **Workflow Architecture**: Modular design allows for easy customization and extension
+- **OpenRouter Integration**: Access to multiple state-of-the-art language models
+## How to Use
+1. **Enter your OpenRouter API Key**: Get one at [OpenRouter](https://openrouter.ai/)
+2. **Select Models**: Choose appropriate models for each role (Writer, Searcher, Critic)
+3. **Set Languages**: Select source and target languages
+4. **Input Text**: Upload a file (.txt or .md) or paste your text directly
+5. **Translate**: Click the translate button and wait for the result
+## Model Recommendations
+- **Writer**: `anthropic/claude-sonnet-4` - Excellent for translation and refinement
+- **Searcher**: `perplexity/sonar` - Specialized in web search and information gathering
+- **Critic**: `anthropic/claude-sonnet-4` - Good at analyzing and providing feedback
+## Links
+- [Project Page](https://davidlms.github.io/aphra/)
+- [GitHub Repository](https://github.com/DavidLMS/aphra)
+## Important Note
+🌐💬 Aphra is not intended to replace the work of a professional translator. Instead, it aims to facilitate multilingual support in small projects where hiring a professional translator may not be feasible. Aphra offers a practical solution for achieving quality translations in contexts where a fully professional translation service is out of scope, ensuring that language barriers do not hinder the global reach of your content.

aphra/__init__.py CHANGED Viewed

@@ -1,12 +1,9 @@
 """
 Aphra package initializer.
-This module exposes the translate function from the translate module.
 """
 from .translate import translate
-from . import llm_client
-from . import prompts
-from . import parsers
-__all__ = ['translate', 'llm_client', 'prompts', 'parsers']

 """
 Aphra package initializer.
+This module exposes the main API components and modules.
 """
 from .translate import translate
+from . import workflows
+from . import core
+__all__ = ['translate', 'workflows', 'core']

aphra/core/__init__.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""
+Core components for the Aphra translation system.
+This module contains the fundamental building blocks used across
+all workflows.
+"""
+from .llm_client import LLMModelClient
+from .parsers import parse_xml_tag, parse_multiple_xml_tags, parse_xml_tag_with_attributes
+from .prompts import get_prompt, list_workflow_prompts
+from .context import TranslationContext
+from .workflow import AbstractWorkflow
+from .registry import (
+    WorkflowRegistry,
+    get_registry,
+    register_workflow,
+    get_workflow,
+    get_suitable_workflow
+)
+__all__ = [
+    'LLMModelClient',
+    'parse_xml_tag',
+    'parse_multiple_xml_tags',
+    'parse_xml_tag_with_attributes',
+    'get_prompt',
+    'list_workflow_prompts',
+    'TranslationContext',
+    'AbstractWorkflow',
+    'WorkflowRegistry',
+    'get_registry',
+    'register_workflow',
+    'get_workflow',
+    'get_suitable_workflow'
+]

aphra/core/config.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""
+Generic configuration management for workflows.
+This module provides functions to load and merge workflow-specific configuration
+with user overrides for any workflow in the system.
+"""
+import os
+from typing import Dict, Any, Optional
+import logging
+import toml
+def load_workflow_config(workflow_name: str,
+                        global_config_path: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Load workflow configuration with user overrides.
+    This generic function works for any workflow by:
+    1. Loading default config from workflows/{workflow_name}/config/default.toml
+    2. Applying user overrides from config.toml section [{workflow_name}]
+    3. Returning the merged configuration
+    Args:
+        workflow_name: Name of the workflow (e.g., 'short_article', 'subtitles')
+        global_config_path: Path to global config.toml file. If None, looks for
+                          config.toml in the current working directory.
+    Returns:
+        Dict containing merged configuration values
+    Example:
+        config = load_workflow_config('short_article')
+        writer_model = config.get('writer', 'default-model')
+    """
+    # Build path to workflow's default config
+    # Assuming we're in aphra/core/ and want to reach aphra/workflows/
+    core_dir = os.path.dirname(__file__)
+    aphra_dir = os.path.dirname(core_dir)
+    workflow_config_path = os.path.join(
+        aphra_dir, 'workflows', workflow_name, 'config', 'default.toml'
+    )
+    # Load default workflow config
+    config = {}
+    try:
+        with open(workflow_config_path, 'r', encoding='utf-8') as config_file:
+            config = toml.load(config_file)
+            logging.debug("Loaded default config for workflow '%s'", workflow_name)
+    except FileNotFoundError:
+        logging.warning("Default config not found for workflow '%s' at %s",
+                       workflow_name, workflow_config_path)
+        config = {}
+    except Exception as exc:
+        logging.error("Error loading default config for workflow '%s': %s",
+                     workflow_name, exc)
+        config = {}
+    # Load user overrides from global config
+    if global_config_path is None:
+        global_config_path = 'config.toml'
+    try:
+        with open(global_config_path, 'r', encoding='utf-8') as config_file:
+            global_config = toml.load(config_file)
+        # Apply overrides from workflow-specific section
+        if workflow_name in global_config:
+            config.update(global_config[workflow_name])
+            logging.debug("Applied user overrides for workflow '%s'", workflow_name)
+    except FileNotFoundError:
+        logging.debug("Global config file not found: %s", global_config_path)
+        # No global config file, use defaults
+    except Exception as exc:
+        logging.warning("Error reading global config file %s: %s",
+                       global_config_path, exc)
+        # Error reading config, use defaults
+    return config
+def get_workflow_config_path(workflow_name: str) -> str:
+    """
+    Get the path to a workflow's default configuration file.
+    Args:
+        workflow_name: Name of the workflow
+    Returns:
+        str: Path to the workflow's default.toml file
+    """
+    core_dir = os.path.dirname(__file__)
+    aphra_dir = os.path.dirname(core_dir)
+    return os.path.join(aphra_dir, 'workflows', workflow_name, 'config', 'default.toml')
+def workflow_has_config(workflow_name: str) -> bool:
+    """
+    Check if a workflow has a configuration file.
+    Args:
+        workflow_name: Name of the workflow
+    Returns:
+        bool: True if the workflow has a default.toml file
+    """
+    config_path = get_workflow_config_path(workflow_name)
+    return os.path.isfile(config_path)

aphra/core/context.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""
+Context management for translation workflows.
+This module provides the TranslationContext class that encapsulates
+all the state and configuration needed during translation execution.
+"""
+from dataclasses import dataclass
+from typing import Dict, Any, Optional
+from .llm_client import LLMModelClient
+@dataclass
+class TranslationContext:
+    """
+    Context for translation containing parameters and settings.
+    This class encapsulates the parameters and settings needed for performing a translation,
+    including the model client, source and target languages, and logging preferences.
+    """
+    model_client: LLMModelClient
+    source_language: str
+    target_language: str
+    log_calls: bool
+    # Additional fields for workflow state
+    metadata: Dict[str, Any] = None
+    intermediate_results: Dict[str, Any] = None
+    workflow_config: Optional[Dict[str, Any]] = None
+    def __post_init__(self):
+        """Initialize optional fields if not provided."""
+        if self.metadata is None:
+            self.metadata = {}
+        if self.intermediate_results is None:
+            self.intermediate_results = {}
+        if self.workflow_config is None:
+            self.workflow_config = {}
+    def get_workflow_config(self, key: str = None, default: Any = None) -> Any:
+        """
+        Get workflow-specific configuration value.
+        Args:
+            key: Configuration key to retrieve. If None, returns full config dict.
+            default: Default value if key is not found.
+        Returns:
+            Configuration value or default if not found.
+        """
+        if key is None:
+            return self.workflow_config
+        return self.workflow_config.get(key, default)
+    def set_workflow_config(self, config: Dict[str, Any]) -> None:
+        """Set workflow-specific configuration."""
+        self.workflow_config = config
+    def store_result(self, step_name: str, result: Any) -> None:
+        """Store intermediate result from a workflow step."""
+        self.intermediate_results[step_name] = result
+    def get_result(self, step_name: str) -> Any:
+        """Retrieve intermediate result from a workflow step."""
+        return self.intermediate_results.get(step_name)

aphra/{llm_client.py → core/llm_client.py} RENAMED Viewed

@@ -35,18 +35,19 @@ class LLMModelClient:
             with open(config_file_path, 'r', encoding='utf-8') as file:
                 config = toml.load(file)
             self.api_key_openrouter = config['openrouter']['api_key']
-            self.llms = config['llms']
         except FileNotFoundError:
             logging.error('File not found: %s', config_file_path)
             raise
         except toml.TomlDecodeError:
             logging.error('Error decoding TOML file: %s', config_file_path)
             raise
-        except KeyError as e:
-            logging.error('Missing key in config file: %s', e)
             raise
-    def call_model(self, system_prompt, user_prompt, model_name, log_call=False):
         """
         Calls the model with the provided prompts.
@@ -54,28 +55,48 @@ class LLMModelClient:
         :param user_prompt: The user prompt to send to the model.
         :param model_name: The name of the model to use.
         :param log_call: Boolean indicating whether to log the call details.
         :return: The model's response.
         """
         try:
-            response = self.client.chat.completions.create(
-                model=model_name,
-                messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_prompt}
                 ]
-            )
             response_content = response.choices[0].message.content
             if log_call:
                 self.log_model_call(user_prompt, response_content)
             return response_content
-        except requests.exceptions.RequestException as e:
-            logging.error('Request error: %s', e)
             raise
-        except (ValueError, KeyError, TypeError) as e:
-            logging.error('Error parsing response: %s', e)
-            logging.error('Response content: %s', response.text if response else 'No response')
             raise
     def log_model_call(self, user_prompt, response):

             with open(config_file_path, 'r', encoding='utf-8') as file:
                 config = toml.load(file)
             self.api_key_openrouter = config['openrouter']['api_key']
         except FileNotFoundError:
             logging.error('File not found: %s', config_file_path)
             raise
         except toml.TomlDecodeError:
             logging.error('Error decoding TOML file: %s', config_file_path)
             raise
+        except KeyError as exc:
+            logging.error('Missing key in config file: %s', exc)
             raise
+    def call_model(self, system_prompt, user_prompt, model_name, *,
+                   log_call=False, enable_web_search=False,
+                   web_search_context="high"):
         """
         Calls the model with the provided prompts.
         :param user_prompt: The user prompt to send to the model.
         :param model_name: The name of the model to use.
         :param log_call: Boolean indicating whether to log the call details.
+        :param enable_web_search: Boolean indicating whether to enable web search via OpenRouter.
+        :param web_search_context: Context size for web search ('low', 'medium', 'high').
         :return: The model's response.
         """
+        response = None
         try:
+            # Prepare the request parameters
+            request_params = {
+                "model": model_name,
+                "messages": [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_prompt}
                 ]
+            }
+            # Add web search capabilities if enabled (OpenRouter format)
+            if enable_web_search:
+                # Append :online to model name for web search
+                if not model_name.endswith(":online"):
+                    request_params["model"] = f"{model_name}:online"
+                # Add web search options
+                request_params["web_search_options"] = {
+                    "search_context_size": web_search_context
+                }
+            response = self.client.chat.completions.create(**request_params)
             response_content = response.choices[0].message.content
             if log_call:
                 self.log_model_call(user_prompt, response_content)
             return response_content
+        except requests.exceptions.RequestException as exc:
+            logging.error('Request error: %s', exc)
             raise
+        except (ValueError, KeyError, TypeError) as exc:
+            logging.error('Error parsing response: %s', exc)
+            if response and hasattr(response, 'text'):
+                logging.error('Response content: %s', response.text)
+            else:
+                logging.error('No response available')
             raise
     def log_model_call(self, user_prompt, response):

aphra/core/parsers.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""
+Generic parsing utilities for XML-like content extraction.
+This module provides generic parsers that can be used across different
+workflows for extracting content from XML-like tags in LLM responses.
+"""
+import logging
+import re
+from typing import Optional
+def parse_xml_tag(content: str, tag_name: str) -> Optional[str]:
+    """
+    Extract content from within XML-like tags in a string.
+    This is a generic parser that can extract content from any XML-like tag
+    in LLM responses, making it reusable across different workflows.
+    Args:
+        content: The string content containing XML-like tags
+        tag_name: The name of the tag to extract (without < >)
+    Returns:
+        str: The content within the tags, or None if not found
+    Example:
+        >>> content = "Some text <result>Hello World</result> more text"
+        >>> parse_xml_tag(content, "result")
+        "Hello World"
+    """
+    try:
+        start_tag = f"<{tag_name}>"
+        end_tag = f"</{tag_name}>"
+        start_index = content.find(start_tag)
+        if start_index == -1:
+            logging.warning("Start tag '<%s>' not found in content", tag_name)
+            return None
+        start_index += len(start_tag)
+        end_index = content.find(end_tag, start_index)
+        if end_index == -1:
+            logging.warning("End tag '</%s>' not found in content", tag_name)
+            return None
+        extracted_content = content[start_index:end_index].strip()
+        return extracted_content
+    except Exception as exc:
+        logging.error("Error parsing XML tag '%s': %s", tag_name, exc)
+        return None
+def parse_multiple_xml_tags(content: str, tag_name: str) -> list[str]:
+    """
+    Extract content from multiple XML-like tags of the same type.
+    Args:
+        content: The string content containing XML-like tags
+        tag_name: The name of the tag to extract (without < >)
+    Returns:
+        list[str]: List of content within all matching tags
+    Example:
+        >>> content = "Text <item>First</item> more <item>Second</item> end"
+        >>> parse_multiple_xml_tags(content, "item")
+        ["First", "Second"]
+    """
+    try:
+        # Use regex to find all occurrences
+        pattern = f"<{re.escape(tag_name)}>(.*?)</{re.escape(tag_name)}>"
+        matches = re.findall(pattern, content, re.DOTALL)
+        # Strip whitespace from each match
+        results = [match.strip() for match in matches]
+        return results
+    except Exception as exc:
+        logging.error("Error parsing multiple XML tags '%s': %s", tag_name, exc)
+        return []
+def parse_xml_tag_with_attributes(content: str, tag_name: str) -> Optional[dict]:
+    """
+    Extract content and attributes from XML-like tags.
+    Args:
+        content: The string content containing XML-like tags
+        tag_name: The name of the tag to extract (without < >)
+    Returns:
+        dict: Dictionary with 'content' and 'attributes' keys, or None if not found
+    Example:
+        >>> content = 'Text <result type="success">Hello World</result>'
+        >>> parse_xml_tag_with_attributes(content, "result")
+        {"content": "Hello World", "attributes": {"type": "success"}}
+    """
+    try:
+        # Pattern to match tag with optional attributes
+        pattern = f"<{re.escape(tag_name)}([^>]*)>(.*?)</{re.escape(tag_name)}>"
+        match = re.search(pattern, content, re.DOTALL)
+        if not match:
+            logging.warning("Tag '<%s>' not found in content", tag_name)
+            return None
+        attributes_str = match.group(1).strip()
+        tag_content = match.group(2).strip()
+        # Parse attributes if any
+        attributes = {}
+        if attributes_str:
+            # Simple attribute parsing (handles key="value" format)
+            attr_pattern = r'(\w+)="([^"]*)"'
+            attributes = dict(re.findall(attr_pattern, attributes_str))
+        return {
+            'content': tag_content,
+            'attributes': attributes
+        }
+    except Exception as exc:
+        logging.error("Error parsing XML tag with attributes '%s': %s", tag_name, exc)
+        return None

aphra/core/prompts.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""
+Core prompt template loading utilities.
+This module provides generic prompt template loading functionality
+for all workflows in the Aphra translation system.
+"""
+import os
+from importlib import resources
+def get_prompt(workflow_name: str, file_name: str, **kwargs) -> str:
+    """
+    Reads a prompt template from a workflow's prompts directory and formats it.
+    Args:
+        workflow_name: Name of the workflow (e.g., 'short_article', 'subtitles')
+        file_name: Name of the prompt file (e.g., 'step1_system.txt')
+        **kwargs: Optional keyword arguments to format the prompt template
+    Returns:
+        str: The formatted prompt content
+    Raises:
+        FileNotFoundError: If the prompt file doesn't exist
+        KeyError: If required format parameters are missing
+    """
+    try:
+        # Try using importlib.resources first (works in packaged installations)
+        ref = resources.files('aphra.workflows') / workflow_name / 'prompts' / file_name
+        with ref.open('r', encoding="utf-8") as file:
+            content = file.read()
+    except (AttributeError, FileNotFoundError) as exc:
+        # Fallback to direct file access (works in development)
+        workflows_path = os.path.dirname(os.path.dirname(__file__))  # Go up to aphra/
+        file_path = os.path.join(workflows_path, 'workflows', workflow_name, 'prompts', file_name)
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"Prompt file not found: {file_path}") from exc
+        with open(file_path, 'r', encoding="utf-8") as file:
+            content = file.read()
+    # Format the content with provided kwargs if any
+    if kwargs:
+        try:
+            formatted_prompt = content.format(**kwargs)
+        except KeyError as exc:
+            msg = f"Missing format parameter {exc} for prompt {workflow_name}/{file_name}"
+            raise KeyError(msg) from exc
+    else:
+        formatted_prompt = content
+    return formatted_prompt
+def list_workflow_prompts(workflow_name: str) -> list[str]:
+    """
+    List all available prompt files for a workflow.
+    Args:
+        workflow_name: Name of the workflow
+    Returns:
+        list[str]: List of prompt filenames available for the workflow
+    Raises:
+        FileNotFoundError: If the workflow prompts directory doesn't exist
+    """
+    try:
+        # Try using importlib.resources first
+        prompts_ref = resources.files('aphra.workflows') / workflow_name / 'prompts'
+        return [f.name for f in prompts_ref.iterdir() if f.is_file()]
+    except (AttributeError, FileNotFoundError) as exc:
+        # Fallback to direct directory access
+        workflows_path = os.path.dirname(os.path.dirname(__file__))
+        prompts_path = os.path.join(workflows_path, 'workflows', workflow_name, 'prompts')
+        if not os.path.exists(prompts_path):
+            msg = f"Workflow prompts directory not found: {prompts_path}"
+            raise FileNotFoundError(msg) from exc
+        return [f for f in os.listdir(prompts_path)
+                if os.path.isfile(os.path.join(prompts_path, f))]

aphra/core/registry.py ADDED Viewed

	@@ -0,0 +1,167 @@

+"""
+Workflow registry for managing available translation workflows.
+This module provides a centralized registry for discovering and
+managing translation workflows.
+"""
+import logging
+from typing import Dict, List, Optional, Type
+from .workflow import AbstractWorkflow
+logger = logging.getLogger(__name__)
+class WorkflowRegistry:
+    """
+    Registry for managing translation workflows.
+    This class maintains a registry of available workflows and provides
+    methods for workflow discovery and selection. Workflows are automatically
+    discovered and registered from the workflows package.
+    """
+    def __init__(self):
+        """Initialize the workflow registry with auto-discovered workflows."""
+        self._workflows: Dict[str, Type[AbstractWorkflow]] = {}
+        self._register_discovered_workflows()
+    def _register_discovered_workflows(self):
+        """Register all auto-discovered workflows from the workflows package."""
+        try:
+            # Import workflows after the module is fully initialized
+            from .. import workflows
+            # Get all workflow classes that were auto-discovered
+            for class_name in workflows.__all__:
+                workflow_class = getattr(workflows, class_name, None)
+                if workflow_class is not None:
+                    self.register_workflow(workflow_class)
+                    logger.debug("Auto-registered workflow: %s", class_name)
+                else:
+                    logger.warning("Failed to get workflow class: %s", class_name)
+        except ImportError as exc:
+            logger.error("Failed to import workflows for auto-registration: %s", exc)
+        except Exception as exc:
+            logger.error("Unexpected error during workflow auto-registration: %s", exc)
+    def register_workflow(self, workflow_class: Type[AbstractWorkflow]):
+        """
+        Register a new workflow type.
+        Args:
+            workflow_class: The workflow class to register
+        """
+        # Create temporary instance to get the workflow name
+        temp_workflow = workflow_class()
+        workflow_name = temp_workflow.get_workflow_name()
+        self._workflows[workflow_name] = workflow_class
+    def get_workflow(self, workflow_name: str) -> Optional[AbstractWorkflow]:
+        """
+        Get a workflow instance by name.
+        Args:
+            workflow_name: The name of the workflow to retrieve
+        Returns:
+            AbstractWorkflow: An instance of the requested workflow, or None if not found
+        """
+        workflow_class = self._workflows.get(workflow_name)
+        if workflow_class:
+            return workflow_class()
+        return None
+    def get_suitable_workflow(self, text: str, **kwargs) -> Optional[AbstractWorkflow]:
+        """
+        Find the most suitable workflow for the given content.
+        Args:
+            text: The text content to analyze
+            **kwargs: Additional parameters for workflow evaluation
+        Returns:
+            AbstractWorkflow: The most suitable workflow instance, or None if none found
+        """
+        # For now, we check workflows in registration order
+        # In the future, we could implement more sophisticated selection logic
+        for workflow_class in self._workflows.values():
+            workflow = workflow_class()
+            if workflow.is_suitable_for(text, **kwargs):
+                return workflow
+        return None
+    def list_workflows(self) -> List[str]:
+        """
+        Get a list of all registered workflow names.
+        Returns:
+            List[str]: Names of all registered workflows
+        """
+        return list(self._workflows.keys())
+    def get_workflow_info(self, workflow_name: str) -> Optional[Dict[str, str]]:
+        """
+        Get information about a specific workflow.
+        Args:
+            workflow_name: The name of the workflow
+        Returns:
+            Dict[str, str]: Information about the workflow, or None if not found
+        """
+        workflow = self.get_workflow(workflow_name)
+        if workflow:
+            return {
+                'name': workflow.get_workflow_name(),
+                'class': workflow.__class__.__name__,
+                'module': workflow.__class__.__module__
+            }
+        return None
+# Global registry instance
+_registry = WorkflowRegistry()
+def get_registry() -> WorkflowRegistry:
+    """
+    Get the global workflow registry instance.
+    Returns:
+        WorkflowRegistry: The global registry instance
+    """
+    return _registry
+def register_workflow(workflow_class: Type[AbstractWorkflow]):
+    """
+    Convenient function to register a workflow with the global registry.
+    Args:
+        workflow_class: The workflow class to register
+    """
+    _registry.register_workflow(workflow_class)
+def get_workflow(workflow_name: str) -> Optional[AbstractWorkflow]:
+    """
+    Convenient function to get a workflow from the global registry.
+    Args:
+        workflow_name: The name of the workflow to retrieve
+    Returns:
+        AbstractWorkflow: An instance of the requested workflow, or None if not found
+    """
+    return _registry.get_workflow(workflow_name)
+def get_suitable_workflow(text: str, **kwargs) -> Optional[AbstractWorkflow]:
+    """
+    Convenient function to find a suitable workflow from the global registry.
+    Args:
+        text: The text content to analyze
+        **kwargs: Additional parameters for workflow evaluation
+    Returns:
+        AbstractWorkflow: The most suitable workflow instance, or None if none found
+    """
+    return _registry.get_suitable_workflow(text, **kwargs)

aphra/core/workflow.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+Workflow base classes.
+This module defines the contract for translation workflows.
+"""
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+from .context import TranslationContext
+from .config import load_workflow_config
+class AbstractWorkflow(ABC):
+    """
+    Base class for translation workflows.
+    A workflow orchestrates a translation process for a specific type of content
+    using methods that can be overridden to customize behavior.
+    """
+    @abstractmethod
+    def get_workflow_name(self) -> str:
+        """
+        Get the unique name of this workflow.
+        Returns:
+            str: The workflow name identifier
+        """
+        raise NotImplementedError("Subclasses must implement get_workflow_name")
+    @abstractmethod
+    def is_suitable_for(self, text: str, **kwargs) -> bool:
+        """
+        Determine if this workflow is suitable for the given content.
+        Args:
+            text: The text content to evaluate
+            **kwargs: Additional evaluation parameters
+        Returns:
+            bool: True if this workflow is suitable for the content
+        """
+        raise NotImplementedError("Subclasses must implement is_suitable_for")
+    def load_config(self, global_config_path: str = None) -> Dict[str, Any]:
+        """
+        Load workflow-specific configuration.
+        This method automatically loads the workflow's default configuration
+        and applies user overrides from the global config file.
+        Args:
+            global_config_path: Path to global config file. If None, uses 'config.toml'
+        Returns:
+            Dict containing merged configuration values
+        """
+        return load_workflow_config(self.get_workflow_name(), global_config_path)
+    def run(self, context: TranslationContext, text: str = None) -> str:
+        """
+        Run the complete workflow with configuration management.
+        This method:
+        1. Loads workflow-specific configuration
+        2. Sets it in the translation context
+        3. Calls the execute method
+        Args:
+            context: The translation context
+            text: The text to translate (optional if already in context)
+        Returns:
+            str: The final translation result
+        """
+        # Load workflow configuration and set it in context
+        workflow_config = self.load_config()
+        context.set_workflow_config(workflow_config)
+        # Get text from context if not provided
+        if text is None:
+            text = getattr(context, 'text', '')
+        return self.execute(context, text)
+    @abstractmethod
+    def execute(self, context: TranslationContext, text: str) -> str:
+        """
+        Execute the complete workflow with the given context and text.
+        Args:
+            context: The translation context
+            text: The text to translate
+        Returns:
+            str: The final translation result
+        """
+        raise NotImplementedError("Subclasses must implement execute")

aphra/parsers.py DELETED Viewed

@@ -1,59 +0,0 @@
-"""
-Module for parsing analysis and translation strings.
-"""
-import xml.etree.ElementTree as ET
-import logging
-def parse_analysis(analysis_str):
-    """
-    Parses the analysis part of the provided string and returns
-    a list of items with their names and keywords.
-    :param analysis_str: String containing the analysis in the specified format.
-    :return: A list of dictionaries, each containing 'name' and 'keywords' from the analysis.
-    """
-    try:
-        # Extract the <analysis> part
-        analysis_start = analysis_str.index("<analysis>") + len("<analysis>")
-        analysis_end = analysis_str.index("</analysis>")
-        analysis_content = analysis_str[analysis_start:analysis_end].strip()
-        # Parse the analysis content using XML parser
-        root = ET.fromstring(f"<root>{analysis_content}</root>")
-        items = []
-        for item in root.findall('item'):
-            name = item.find('name').text
-            keywords = item.find('keywords').text
-            items.append({'name': name, 'keywords': keywords.split(', ')})
-        return items
-    except ValueError as e:
-        logging.error('Error parsing analysis string: %s', e)
-        return []
-    except ET.ParseError as e:
-        logging.error('Error parsing XML content: %s', e)
-        return []
-def parse_translation(translation_str):
-    """
-    Parses the provided string and returns the content within
-    <improved_translation> and <translators_notes> tags.
-    :param translation_str: String containing the translation and notes in the specified format.
-    :return: String containing the <improved_translation>.
-    """
-    try:
-        improved_translation_start = (
-            translation_str.index("<improved_translation>") + len("<improved_translation>")
-        )
-        improved_translation_end = translation_str.index("</improved_translation>")
-        improved_translation_content = translation_str[
-            improved_translation_start:improved_translation_end
-        ].strip()
-        return improved_translation_content
-    except ValueError as e:
-        logging.error('Error parsing translation string: %s', e)
-        return "", ""

aphra/prompts.py DELETED Viewed

@@ -1,22 +0,0 @@
-"""
-Module for reading and formatting prompt templates.
-"""
-from pkg_resources import resource_filename
-def get_prompt(file_name, **kwargs):
-    """
-    Reads a prompt template from a file and formats it with the given arguments.
-    :param file_name: Path to the file containing the prompt template.
-    :param kwargs: Optional keyword arguments to format the prompt template.
-    :return: The formatted prompt.
-    """
-    file_path = resource_filename(__name__, f'prompts/{file_name}')
-    with open(file_path, 'r', encoding="utf-8") as file:
-        content = file.read()
-        if kwargs:
-            formatted_prompt = content.format(**kwargs)
-        else:
-            formatted_prompt = content
-    return formatted_prompt

aphra/translate.py CHANGED Viewed

@@ -1,24 +1,13 @@
 """
 Module for translating text using multiple steps and language models.
-"""
-from dataclasses import dataclass
-from .llm_client import LLMModelClient
-from .prompts import get_prompt
-from .parsers import parse_analysis, parse_translation
-@dataclass
-class TranslationContext:
-    """
-    Context for translation containing parameters and settings.
-    This class encapsulates the parameters and settings needed for performing a translation,
-    including the model client, source and target languages, and logging preferences.
-    """
-    model_client: LLMModelClient
-    source_language: str
-    target_language: str
-    log_calls: bool
 def load_model_client(config_file):
     """
@@ -29,57 +18,12 @@ def load_model_client(config_file):
     """
     return LLMModelClient(config_file)
-def execute_model_call(context, system_file, user_file, model_name, **kwargs):
-    """
-    Executes a model call using the provided system and user prompts.
-    :param context: An instance of TranslationContext containing translation parameters.
-    :param system_file: Path to the file containing the system prompt.
-    :param user_file: Path to the file containing the user prompt.
-    :param model_name: The name of the model to use.
-    :param kwargs: Optional keyword arguments to format the prompt templates.
-    :return: The model's response content.
-    """
-    system_prompt = get_prompt(system_file, **kwargs)
-    user_prompt = get_prompt(user_file, **kwargs)
-    return context.model_client.call_model(
-        system_prompt,
-        user_prompt,
-        model_name,
-        log_call=context.log_calls
-    )
-def generate_glossary(context, parsed_items, model_searcher):
-    """
-    Generates a glossary of terms based on the parsed analysis items.
-    :param context: An instance of TranslationContext containing translation parameters.
-    :param parsed_items: A list of dictionaries containing 'name' and 'keywords' for each item.
-    :param model_searcher: The name of the model to use for searching term explanations.
-    :return: A formatted string containing the glossary entries.
-    """
-    glossary = []
-    for item in parsed_items:
-        term_explanation = execute_model_call(
-            context,
-            'step2_system.txt',
-            'step2_user.txt',
-            model_searcher,
-            term=item['name'],
-            keywords=", ".join(item['keywords']),
-            source_language=context.source_language,
-            target_language=context.target_language
-        )
-        glossary_entry = (
-            f"### {item['name']}\n\n**Keywords:** {', '.join(item['keywords'])}\n\n"
-            f"**Explanation:**\n{term_explanation}\n"
-        )
-        glossary.append(glossary_entry)
-    return "\n".join(glossary)
 def translate(source_language, target_language, text, config_file="config.toml", log_calls=False):
     """
-    Translates the provided text from the source language to the target language in multiple steps.
     :param source_language: The source language of the text.
     :param target_language: The target language of the text.
@@ -88,60 +32,24 @@ def translate(source_language, target_language, text, config_file="config.toml",
     :param log_calls: Boolean indicating whether to log the call details.
     :return: The improved translation of the text.
     """
     model_client = load_model_client(config_file)
-    models = model_client.llms
-    context = TranslationContext(model_client, source_language, target_language, log_calls)
-    analysis_content = execute_model_call(
-        context,
-        'step1_system.txt',
-        'step1_user.txt',
-        models['writer'],
-        post_content=text,
-        source_language=source_language,
-        target_language=target_language
-    )
-    parsed_items = parse_analysis(analysis_content)
-    glossary_content = generate_glossary(
-        context, parsed_items, models['searcher']
-    )
-    translated_content = execute_model_call(
-        context,
-        'step3_system.txt',
-        'step3_user.txt',
-        models['writer'],
-        text=text,
         source_language=source_language,
-        target_language=target_language
     )
-    critique = execute_model_call(
-        context,
-        'step4_system.txt',
-        'step4_user.txt',
-        models['critiquer'],
-        text=text,
-        translation=translated_content,
-        glossary=glossary_content,
-        source_language=source_language,
-        target_language=target_language
-    )
-    final_translation_content = execute_model_call(
-        context,
-        'step5_system.txt',
-        'step5_user.txt',
-        models['writer'],
-        text=text,
-        translation=translated_content,
-        glossary=glossary_content,
-        critique=critique,
-        source_language=source_language,
-        target_language=target_language
-    )
-    improved_translation = parse_translation(final_translation_content)
-    return improved_translation

 """
 Module for translating text using multiple steps and language models.
+This module provides the main translation functionality using Aphra's
+workflow-based translation system.
+"""
+from .core.llm_client import LLMModelClient
+from .core.context import TranslationContext
+from .core.registry import get_suitable_workflow
 def load_model_client(config_file):
     """
     """
     return LLMModelClient(config_file)
 def translate(source_language, target_language, text, config_file="config.toml", log_calls=False):
     """
+    Translates the provided text from the source language to the target language using workflows.
+    This function provides a convenient interface to Aphra's workflow-based
+    translation system.
     :param source_language: The source language of the text.
     :param target_language: The target language of the text.
     :param log_calls: Boolean indicating whether to log the call details.
     :return: The improved translation of the text.
     """
+    # Load the model client
     model_client = load_model_client(config_file)
+    # Create translation context
+    context = TranslationContext(
+        model_client=model_client,
         source_language=source_language,
+        target_language=target_language,
+        log_calls=log_calls
     )
+    # Find the most suitable workflow for this content
+    workflow = get_suitable_workflow(text)
+    if workflow is None:
+        raise ValueError("No suitable workflow found for the provided text")
+    # Execute the workflow
+    result = workflow.run(context, text)
+    return result

aphra/workflows/__init__.py ADDED Viewed

	@@ -0,0 +1,152 @@

+"""
+Workflow implementations with automatic discovery.
+This module automatically discovers and imports all workflow classes
+from subdirectories, making it easy to add new workflows without
+modifying this file.
+"""
+import os
+import importlib
+import logging
+from typing import List, Type, Dict
+# Import the base class for type checking
+try:
+    from ..core.workflow import AbstractWorkflow
+except ImportError:
+    # Fallback for cases where core is not yet available
+    AbstractWorkflow = None
+logger = logging.getLogger(__name__)
+# Initialize __all__ as empty list - will be populated by auto-discovery
+__all__ = []
+def _discover_workflows() -> Dict[str, Type]:
+    """
+    Auto-discover workflow classes from subdirectories.
+    Scans all subdirectories of the workflows package and looks for
+    classes that inherit from AbstractWorkflow.
+    Returns:
+        Dict[str, Type]: Mapping of class name to workflow class
+    """
+    workflows = {}
+    current_dir = os.path.dirname(__file__)
+    if not current_dir:
+        logger.warning("Could not determine workflows directory")
+        return workflows
+    try:
+        # Scan all items in the workflows directory
+        for item in os.listdir(current_dir):
+            item_path = os.path.join(current_dir, item)
+            # Skip files and special directories
+            if not os.path.isdir(item_path) or item.startswith('__'):
+                continue
+            # Skip if no __init__.py (not a proper Python package)
+            init_file = os.path.join(item_path, '__init__.py')
+            if not os.path.exists(init_file):
+                logger.debug("Skipping %s: no __init__.py found", item)
+                continue
+            try:
+                # Import the workflow package
+                module = importlib.import_module(f'.{item}', package=__name__)
+                logger.debug("Successfully imported workflow package: %s", item)
+                # Look for workflow classes in the module
+                workflow_classes_found = 0
+                for attr_name in dir(module):
+                    attr = getattr(module, attr_name, None)
+                    # Check if it's a class that inherits from AbstractWorkflow
+                    if (isinstance(attr, type) and
+                        AbstractWorkflow is not None and
+                        issubclass(attr, AbstractWorkflow) and
+                        attr != AbstractWorkflow):
+                        workflows[attr_name] = attr
+                        workflow_classes_found += 1
+                        logger.debug("Discovered workflow: %s from %s", attr_name, item)
+                if workflow_classes_found == 0:
+                    logger.warning("No workflow classes found in %s", item)
+            except ImportError as exc:
+                logger.warning("Failed to import workflow package %s: %s", item, exc)
+                continue
+            except Exception as exc:
+                logger.error("Unexpected error while discovering workflow %s: %s", item, exc)
+                continue
+    except OSError as exc:
+        logger.error("Failed to scan workflows directory: %s", exc)
+    logger.debug("Workflow discovery completed. Found %d workflows: %s",
+                 len(workflows), list(workflows.keys()))
+    return workflows
+def _setup_module_exports(workflows: Dict[str, Type]) -> List[str]:
+    """
+    Set up module-level exports for discovered workflows.
+    Args:
+        workflows: Dictionary of workflow name to class mappings
+    Returns:
+        List[str]: List of exported workflow class names
+    """
+    exported_classes = []
+    # Add each workflow class to the module globals and collect names
+    for class_name, workflow_class in workflows.items():
+        globals()[class_name] = workflow_class
+        exported_classes.append(class_name)
+    # Sort for consistency
+    exported_classes.sort()
+    return exported_classes
+# Perform auto-discovery
+logger.debug("Starting workflow auto-discovery...")
+_discovered_workflows = _discover_workflows()
+# Set up module exports
+__all__ = _setup_module_exports(_discovered_workflows)
+# Log final state
+logger.debug("Workflows module initialized with: %s", __all__)
+# For backward compatibility and explicit access
+def get_available_workflows() -> List[str]:
+    """
+    Get a list of all available workflow class names.
+    Returns:
+        List[str]: List of available workflow class names
+    """
+    return list(__all__)
+def get_workflow_class(name: str) -> Type:
+    """
+    Get a workflow class by name.
+    Args:
+        name: The name of the workflow class
+    Returns:
+        Type: The workflow class
+    Raises:
+        AttributeError: If the workflow class is not found
+    """
+    if name not in globals():
+        raise AttributeError(f"Workflow class '{name}' not found. Available: {__all__}")
+    return globals()[name]

aphra/workflows/short_article/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""
+Short Article workflow for translating articles and blog posts.
+This module contains the short article translation workflow which implements
+a 5-step process for contextual translation of articles and blog posts.
+"""
+from .short_article_workflow import ShortArticleWorkflow
+__all__ = ['ShortArticleWorkflow']

aphra/workflows/short_article/aux/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""
+Auxiliary utilities for the Short Article workflow.
+This module contains parsers and utilities specific to the short article
+translation workflow.
+"""
+from .parsers import parse_analysis, parse_translation
+__all__ = ['parse_analysis', 'parse_translation']

aphra/workflows/short_article/aux/parsers.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""
+Parsers specific to the Short Article workflow.
+This module contains parsers for extracting content from LLM responses
+that are specific to the short article translation workflow.
+These parsers use the generic XML parsing functions from the core module
+to avoid code duplication while maintaining a clear API.
+"""
+import logging
+from typing import List, Dict, Any
+from ....core.parsers import parse_xml_tag, parse_multiple_xml_tags
+def parse_analysis(analysis_str: str) -> List[Dict[str, Any]]:
+    """
+    Parses the analysis part of the provided string and returns
+    a list of items with their names and keywords.
+    Uses generic XML parsers from the core module to extract structured data
+    from the <analysis> tag and its nested <item> elements.
+    Args:
+        analysis_str: String containing the analysis in the specified format.
+    Returns:
+        List[Dict]: A list of dictionaries, each containing 'name' and 'keywords' from the analysis.
+    """
+    # 1. Extract content of <analysis> tag
+    analysis_content = parse_xml_tag(analysis_str, "analysis")
+    if not analysis_content:
+        logging.error('Could not find <analysis> tag in content')
+        return []
+    # 2. Extract all <item> tags within the analysis
+    item_contents = parse_multiple_xml_tags(analysis_content, "item")
+    if not item_contents:
+        logging.warning('No <item> tags found within <analysis>')
+        return []
+    # 3. For each item, extract name and keywords
+    items = []
+    for item_content in item_contents:
+        name = parse_xml_tag(item_content, "name")
+        keywords_str = parse_xml_tag(item_content, "keywords")
+        if name and keywords_str:
+            items.append({
+                'name': name,
+                'keywords': keywords_str.split(', ')
+            })
+        else:
+            logging.warning('Incomplete item found - name: %s, keywords: %s', name, keywords_str)
+    return items
+def parse_translation(translation_str: str) -> str:
+    """
+    Parses the provided string and returns the content within
+    <improved_translation> tags.
+    Uses the generic XML parser from the core module to extract the translation.
+    Args:
+        translation_str: String containing the translation in the specified format.
+    Returns:
+        str: String containing the <improved_translation> content.
+    """
+    result = parse_xml_tag(translation_str, "improved_translation")
+    if result is None:
+        logging.error('Could not find <improved_translation> tag in content')
+        return ""
+    return result

aphra/workflows/short_article/config/default.toml ADDED Viewed

	@@ -0,0 +1,7 @@

+# Default configuration for the Short Article workflow
+# These values can be overridden in config.toml under the [short_article] section
+# LLM models used by this workflow
+writer = "anthropic/claude-sonnet-4"
+searcher = "perplexity/sonar"
+critiquer = "anthropic/claude-sonnet-4"

aphra/{prompts → workflows/short_article/prompts}/step1_system.txt RENAMED Viewed

File without changes

aphra/{prompts → workflows/short_article/prompts}/step1_user.txt RENAMED Viewed

File without changes

aphra/{prompts → workflows/short_article/prompts}/step2_system.txt RENAMED Viewed

File without changes

aphra/{prompts → workflows/short_article/prompts}/step2_user.txt RENAMED Viewed

File without changes

aphra/{prompts → workflows/short_article/prompts}/step3_system.txt RENAMED Viewed

File without changes

aphra/{prompts → workflows/short_article/prompts}/step3_user.txt RENAMED Viewed

File without changes

aphra/{prompts → workflows/short_article/prompts}/step4_system.txt RENAMED Viewed

File without changes

aphra/{prompts → workflows/short_article/prompts}/step4_user.txt RENAMED Viewed

File without changes

aphra/{prompts → workflows/short_article/prompts}/step5_system.txt RENAMED Viewed

File without changes

aphra/{prompts → workflows/short_article/prompts}/step5_user.txt RENAMED Viewed

File without changes

aphra/workflows/short_article/short_article_workflow.py ADDED Viewed

	@@ -0,0 +1,325 @@

+"""
+Short Article workflow implementation.
+This workflow implements the 5-step translation process for articles
+and similar content types.
+"""
+from typing import List, Dict, Any
+from ...core.context import TranslationContext
+from ...core.prompts import get_prompt
+from ...core.workflow import AbstractWorkflow
+from .aux.parsers import parse_analysis, parse_translation
+class ShortArticleWorkflow(AbstractWorkflow):
+    """
+    Workflow for translating articles and similar content.
+    This workflow implements the proven 5-step process using direct methods:
+    1. analyze() - Identify key terms and concepts
+    2. search() - Generate contextual explanations with web search
+    3. translate() - Create initial translation
+    4. critique() - Evaluate translation quality
+    5. refine() - Produce final improved translation
+    To customize: simply inherit from this class and override any method.
+    """
+    def get_workflow_name(self) -> str:
+        """Get the unique name of this workflow."""
+        return "short_article"
+    def is_suitable_for(self, text: str, **_kwargs) -> bool:
+        """
+        Determine if this workflow is suitable for the given content.
+        This workflow is suitable for:
+        - Articles and blog posts
+        - General text content
+        - Serves as the default workflow when no other workflow matches
+        Args:
+            text: The text content to evaluate
+            **kwargs: Additional evaluation parameters
+        Returns:
+            bool: True if this workflow is suitable
+        """
+        # This workflow accepts any non-empty text
+        return len(text.strip()) > 0
+    def analyze(self, context: TranslationContext, text: str) -> List[Dict[str, Any]]:
+        """
+        Analyze the source text to identify key terms and concepts.
+        Args:
+            context: The translation context
+            text: The text to analyze
+        Returns:
+            List[Dict]: Parsed analysis results with term names and keywords
+        """
+        # Get writer model from workflow configuration
+        writer_model = context.get_workflow_config('writer')
+        # Get prompts for analysis
+        system_prompt = get_prompt(
+            'short_article',
+            'step1_system.txt',
+            post_content=text,
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        user_prompt = get_prompt(
+            'short_article',
+            'step1_user.txt',
+            post_content=text,
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        # Call LLM for analysis
+        analysis_content = context.model_client.call_model(
+            system_prompt,
+            user_prompt,
+            writer_model,
+            log_call=context.log_calls
+        )
+        # Parse and return analysis
+        return parse_analysis(analysis_content)
+    def search(self, context: TranslationContext, parsed_items: List[Dict[str, Any]]) -> str:
+        """
+        Generate contextual explanations for analyzed terms using web search.
+        Args:
+            context: The translation context
+            parsed_items: List of terms from analysis step
+        Returns:
+            str: Formatted glossary content
+        """
+        if not parsed_items:
+            return ""
+        # Get searcher model from workflow configuration
+        searcher_model = context.get_workflow_config('searcher')
+        glossary = []
+        for item in parsed_items:
+            # Generate explanation for each term using web search
+            term_explanation = self._generate_term_explanation(context, item, searcher_model)
+            # Format glossary entry
+            glossary_entry = (
+                f"### {item['name']}\n\n**Keywords:** {', '.join(item['keywords'])}\n\n"
+                f"**Explanation:**\n{term_explanation}\n"
+            )
+            glossary.append(glossary_entry)
+        return "\n".join(glossary)
+    def translate(self, context: TranslationContext, text: str) -> str:
+        """
+        Create the initial translation of the source text.
+        Args:
+            context: The translation context
+            text: The text to translate
+        Returns:
+            str: The initial translation
+        """
+        # Get writer model from workflow configuration
+        writer_model = context.get_workflow_config('writer')
+        # Get prompts for translation
+        system_prompt = get_prompt(
+            'short_article',
+            'step3_system.txt',
+            text=text,
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        user_prompt = get_prompt(
+            'short_article',
+            'step3_user.txt',
+            text=text,
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        # Call LLM for translation
+        return context.model_client.call_model(
+            system_prompt,
+            user_prompt,
+            writer_model,
+            log_call=context.log_calls
+        )
+    def critique(self, context: TranslationContext, text: str,
+                 translation: str, glossary: str) -> str:
+        """
+        Evaluate the translation quality and provide feedback.
+        Args:
+            context: The translation context
+            text: The original text
+            translation: The initial translation
+            glossary: The glossary from search step
+        Returns:
+            str: Critique and feedback
+        """
+        # Get critiquer model from workflow configuration
+        critiquer_model = context.get_workflow_config('critiquer')
+        # Get prompts for critique
+        system_prompt = get_prompt(
+            'short_article',
+            'step4_system.txt',
+            text=text,
+            translation=translation,
+            glossary=glossary,
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        user_prompt = get_prompt(
+            'short_article',
+            'step4_user.txt',
+            text=text,
+            translation=translation,
+            glossary=glossary,
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        # Call LLM for critique
+        return context.model_client.call_model(
+            system_prompt,
+            user_prompt,
+            critiquer_model,
+            log_call=context.log_calls
+        )
+    def refine(self, context: TranslationContext, text: str, *,
+               translation: str, glossary: str, critique: str) -> str:
+        """
+        Produce the final refined translation based on critique feedback.
+        Args:
+            context: The translation context
+            text: The original text
+            translation: The initial translation
+            glossary: The glossary from search step
+            critique: The critique feedback
+        Returns:
+            str: The final refined translation
+        """
+        # Get writer model from workflow configuration
+        writer_model = context.get_workflow_config('writer')
+        # Get prompts for refinement
+        system_prompt = get_prompt(
+            'short_article',
+            'step5_system.txt',
+            text=text,
+            translation=translation,
+            glossary=glossary,
+            critique=critique,
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        user_prompt = get_prompt(
+            'short_article',
+            'step5_user.txt',
+            text=text,
+            translation=translation,
+            glossary=glossary,
+            critique=critique,
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        # Call LLM for refinement
+        final_translation_content = context.model_client.call_model(
+            system_prompt,
+            user_prompt,
+            writer_model,
+            log_call=context.log_calls
+        )
+        # Parse and return final translation
+        return parse_translation(final_translation_content)
+    def execute(self, context: TranslationContext, text: str) -> str:
+        """
+        Execute the complete short article workflow.
+        This method orchestrates the 5-step process in sequence.
+        Args:
+            context: The translation context
+            text: The text to translate
+        Returns:
+            str: The final refined translation
+        """
+        # Step 1: Analyze the text to identify key terms
+        analysis = self.analyze(context, text)
+        # Step 2: Search for contextual information about the terms
+        glossary = self.search(context, analysis)
+        # Step 3: Create initial translation
+        translation = self.translate(context, text)
+        # Step 4: Critique the translation
+        critique = self.critique(context, text, translation, glossary)
+        # Step 5: Refine the translation based on critique
+        final_translation = self.refine(context, text, translation=translation,
+                                        glossary=glossary, critique=critique)
+        return final_translation
+    def _generate_term_explanation(self, context: TranslationContext,
+                                   item: Dict[str, Any], model: str) -> str:
+        """
+        Generate explanation for a single term using web search.
+        Args:
+            context: The translation context
+            item: Dictionary with 'name' and 'keywords' keys
+            model: The model to use for generation
+        Returns:
+            str: The generated explanation with web search results
+        """
+        system_prompt = get_prompt(
+            'short_article',
+            'step2_system.txt',
+            term=item['name'],
+            keywords=", ".join(item['keywords']),
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        user_prompt = get_prompt(
+            'short_article',
+            'step2_user.txt',
+            term=item['name'],
+            keywords=", ".join(item['keywords']),
+            source_language=context.source_language,
+            target_language=context.target_language
+        )
+        return context.model_client.call_model(
+            system_prompt,
+            user_prompt,
+            model,
+            log_call=context.log_calls,
+            enable_web_search=True,
+            web_search_context="high"
+        )

app.py CHANGED Viewed

@@ -1,19 +1,83 @@
 import os
 import tempfile
 import gradio as gr
 import toml
 from aphra import translate
 theme = gr.themes.Soft(
     primary_hue="rose",
     secondary_hue="pink",
     spacing_size="lg",
 )
 def create_config_file(api_key, writer_model, searcher_model, critic_model):
     config = {
         "openrouter": {"api_key": api_key},
-        "llms": {
             "writer": writer_model,
             "searcher": searcher_model,
             "critiquer": critic_model
@@ -24,6 +88,22 @@ def create_config_file(api_key, writer_model, searcher_model, critic_model):
     return tmp.name
 def process_input(file, text_input, api_key, writer_model, searcher_model, critic_model, source_lang, target_lang):
     if file is not None:
         with open(file, 'r', encoding='utf-8') as file:
             text = file.read()
@@ -40,10 +120,19 @@ def process_input(file, text_input, api_key, writer_model, searcher_model, criti
         )
     finally:
         os.unlink(config_file)
     return translation
 def create_interface():
     with gr.Blocks(theme=theme) as demo:
         gr.Markdown("<font size=6.5><center>🌐💬 Aphra</center></font>")
         gr.Markdown(
@@ -51,27 +140,27 @@ def create_interface():
             [<a href="https://davidlms.github.io/aphra/">Project Page</a>] | [<a href="https://github.com/DavidLMS/aphra">Github</a>]</div>
             """
         )
-        gr.Markdown("🌐💬 Aphra is an open-source translation agent designed to enhance the quality of text translations by leveraging large language models (LLMs).")
         with gr.Row():
             api_key = gr.Textbox(label="OpenRouter API Key", type="password")
             writer_model = gr.Dropdown(
-                ["anthropic/claude-3.7-sonnet", "openai/chatgpt-4o-latest", "google/gemini-pro-1.5"],
                 label="Writer Model",
-                value="anthropic/claude-3.7-sonnet",
                 allow_custom_value=True
             )
             searcher_model = gr.Dropdown(
-                ["perplexity/llama-3.1-sonar-small-128k-online", "perplexity/llama-3.1-sonar-large-128k-online"],
                 label="Searcher Model",
-                value="perplexity/llama-3.1-sonar-large-128k-online",
                 allow_custom_value=True
             )
             critic_model = gr.Dropdown(
-                ["anthropic/claude-3.7-sonnet", "openai/chatgpt-4o-latest", "google/gemini-pro-1.5"],
                 label="Critic Model",
-                value="anthropic/claude-3.7-sonnet",
                 allow_custom_value=True
             )
@@ -89,20 +178,20 @@ def create_interface():
                 allow_custom_value=True
             )
-        with gr.Row():
             file = gr.File(label="Upload .txt or .md file", file_types=[".txt", ".md"])
             text_input = gr.Textbox(label="Or paste your text here", lines=5)
         translate_btn = gr.Button("Translate with 🌐💬 Aphra")
         output = gr.Textbox(label="Translation by 🌐💬 Aphra")
         translate_btn.click(
             process_input,
             inputs=[file, text_input, api_key, writer_model, searcher_model, critic_model, source_lang, target_lang],
             outputs=[output]
         )
     return demo
 if __name__ == "__main__":

+"""
+Gradio web interface demo for Aphra translation system.
+This module provides a user-friendly web interface for the Aphra translation
+system using Gradio, allowing users to configure models and translate text
+through a browser interface.
+"""
 import os
 import tempfile
 import gradio as gr
 import toml
+import requests
+import logging
+# Import the translate function
 from aphra import translate
+OPENROUTER_MODELS_URL="https://openrouter.ai/api/v1/models"
 theme = gr.themes.Soft(
     primary_hue="rose",
     secondary_hue="pink",
     spacing_size="lg",
 )
+def fetch_openrouter_models():
+    """
+    Fetch available models from OpenRouter API.
+    Returns a list of model IDs (names).
+    """
+    try:
+        response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        # Extract model IDs from the response
+        models = [model['id'] for model in data.get('data', [])]
+        return sorted(models)
+    except requests.RequestException as e:
+        logging.warning(f"Failed to fetch models from OpenRouter: {e}")
+        # Fallback to default models if API fails
+        return [
+            "anthropic/claude-sonnet-4",
+            "perplexity/sonar"
+        ]
+def get_default_models():
+    """Get default model selections for different roles."""
+    models = fetch_openrouter_models()
+    # Default selections based on common good models
+    writer_default = "anthropic/claude-sonnet-4"
+    searcher_default = "perplexity/sonar"
+    critic_default = "anthropic/claude-sonnet-4"
+    # Use fallbacks if defaults not available
+    if writer_default not in models and models:
+        writer_default = models[0]
+    if searcher_default not in models and models:
+        searcher_default = models[0]
+    if critic_default not in models and models:
+        critic_default = models[0]
+    return models, writer_default, searcher_default, critic_default
 def create_config_file(api_key, writer_model, searcher_model, critic_model):
+    """
+    Create a temporary TOML configuration file for Aphra.
+    Args:
+        api_key: OpenRouter API key
+        writer_model: Model to use for writing/translation
+        searcher_model: Model to use for searching/research
+        critic_model: Model to use for criticism/review
+    Returns:
+        str: Path to the temporary configuration file
+    """
     config = {
         "openrouter": {"api_key": api_key},
+        "short_article": {
             "writer": writer_model,
             "searcher": searcher_model,
             "critiquer": critic_model
     return tmp.name
 def process_input(file, text_input, api_key, writer_model, searcher_model, critic_model, source_lang, target_lang):
+    """
+    Process translation input from either file or text input.
+    Args:
+        file: Uploaded file object (if any)
+        text_input: Direct text input string
+        api_key: OpenRouter API key
+        writer_model: Model for writing/translation
+        searcher_model: Model for searching/research
+        critic_model: Model for criticism/review
+        source_lang: Source language for translation
+        target_lang: Target language for translation
+    Returns:
+        str: Translated text
+    """
     if file is not None:
         with open(file, 'r', encoding='utf-8') as file:
             text = file.read()
         )
     finally:
         os.unlink(config_file)
     return translation
 def create_interface():
+    """
+    Create and configure the Gradio web interface.
+    Returns:
+        gr.Blocks: Configured Gradio interface
+    """
+    # Get dynamic model list and defaults
+    models, writer_default, searcher_default, critic_default = get_default_models()
     with gr.Blocks(theme=theme) as demo:
         gr.Markdown("<font size=6.5><center>🌐💬 Aphra</center></font>")
         gr.Markdown(
             [<a href="https://davidlms.github.io/aphra/">Project Page</a>] | [<a href="https://github.com/DavidLMS/aphra">Github</a>]</div>
             """
         )
+        gr.Markdown("🌐💬 Aphra is an open-source translation agent with a workflow architecture designed to enhance the quality of text translations by leveraging large language models (LLMs).")
         with gr.Row():
             api_key = gr.Textbox(label="OpenRouter API Key", type="password")
             writer_model = gr.Dropdown(
+                models,
                 label="Writer Model",
+                value=writer_default,
                 allow_custom_value=True
             )
             searcher_model = gr.Dropdown(
+                models,
                 label="Searcher Model",
+                value=searcher_default,
                 allow_custom_value=True
             )
             critic_model = gr.Dropdown(
+                models,
                 label="Critic Model",
+                value=critic_default,
                 allow_custom_value=True
             )
                 allow_custom_value=True
             )
+        with gr.Row():
             file = gr.File(label="Upload .txt or .md file", file_types=[".txt", ".md"])
             text_input = gr.Textbox(label="Or paste your text here", lines=5)
         translate_btn = gr.Button("Translate with 🌐💬 Aphra")
         output = gr.Textbox(label="Translation by 🌐💬 Aphra")
         translate_btn.click(
             process_input,
             inputs=[file, text_input, api_key, writer_model, searcher_model, critic_model, source_lang, target_lang],
             outputs=[output]
         )
     return demo
 if __name__ == "__main__":