David commited on
Commit
1aa6f63
Β·
1 Parent(s): 22dea1c

new version working

Browse files
Files changed (31) hide show
  1. .gitignore +30 -0
  2. README.md +33 -2
  3. aphra/__init__.py +4 -7
  4. aphra/core/__init__.py +35 -0
  5. aphra/core/config.py +106 -0
  6. aphra/core/context.py +64 -0
  7. aphra/{llm_client.py β†’ core/llm_client.py} +34 -13
  8. aphra/core/parsers.py +125 -0
  9. aphra/core/prompts.py +82 -0
  10. aphra/core/registry.py +167 -0
  11. aphra/core/workflow.py +97 -0
  12. aphra/parsers.py +0 -59
  13. aphra/prompts.py +0 -22
  14. aphra/translate.py +23 -115
  15. aphra/workflows/__init__.py +152 -0
  16. aphra/workflows/short_article/__init__.py +10 -0
  17. aphra/workflows/short_article/aux/__init__.py +10 -0
  18. aphra/workflows/short_article/aux/parsers.py +75 -0
  19. aphra/workflows/short_article/config/default.toml +7 -0
  20. aphra/{prompts β†’ workflows/short_article/prompts}/step1_system.txt +0 -0
  21. aphra/{prompts β†’ workflows/short_article/prompts}/step1_user.txt +0 -0
  22. aphra/{prompts β†’ workflows/short_article/prompts}/step2_system.txt +0 -0
  23. aphra/{prompts β†’ workflows/short_article/prompts}/step2_user.txt +0 -0
  24. aphra/{prompts β†’ workflows/short_article/prompts}/step3_system.txt +0 -0
  25. aphra/{prompts β†’ workflows/short_article/prompts}/step3_user.txt +0 -0
  26. aphra/{prompts β†’ workflows/short_article/prompts}/step4_system.txt +0 -0
  27. aphra/{prompts β†’ workflows/short_article/prompts}/step4_user.txt +0 -0
  28. aphra/{prompts β†’ workflows/short_article/prompts}/step5_system.txt +0 -0
  29. aphra/{prompts β†’ workflows/short_article/prompts}/step5_user.txt +0 -0
  30. aphra/workflows/short_article/short_article_workflow.py +325 -0
  31. app.py +105 -16
.gitignore ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ build/
7
+ develop-eggs/
8
+ dist/
9
+ downloads/
10
+ eggs/
11
+ .eggs/
12
+ lib/
13
+ lib64/
14
+ parts/
15
+ sdist/
16
+ var/
17
+ wheels/
18
+ *.egg-info/
19
+ .installed.cfg
20
+ *.egg
21
+ .env
22
+ .venv
23
+ env/
24
+ venv/
25
+ ENV/
26
+ env.bak/
27
+ venv.bak/
28
+ *.log
29
+ .DS_Store
30
+ config.toml
README.md CHANGED
@@ -10,6 +10,37 @@ pinned: true
10
  license: mit
11
  ---
12
 
13
- πŸŒπŸ’¬ Aphra is an open-source translation agent designed to enhance the quality of text translations by leveraging large language models (LLMs). Unlike traditional translation tools that rely solely on direct translations, Aphra introduces a multi-stage, context-aware process that includes glossary creation, contextual search, critique, and refinement. This approach aims to produce translations that not only retain the original meaning but also incorporate translator notes, contextual adjustments, and stylistic improvements. Whether you're translating blog posts, articles, or complex documents, Aphra ensures a more nuanced and accurate translation that respects the original content's integrity.
14
 
15
- > **Important Note:** πŸŒπŸ’¬ Aphra is not intended to replace the work of a professional translator. Instead, it aims to facilitate multilingual support in small projects where hiring a professional translator may not be feasible. Aphra offers a practical solution for achieving quality translations in contexts where a fully professional translation service is out of scope, ensuring that language barriers do not hinder the global reach of your content.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: mit
11
  ---
12
 
13
+ # πŸŒπŸ’¬ Aphra - Open-Source Translation Agent
14
 
15
+ πŸŒπŸ’¬ Aphra is an open-source translation agent with a workflow architecture designed to enhance the quality of text translations by leveraging large language models (LLMs). Unlike traditional translation tools that rely solely on direct translations, Aphra introduces a multi-stage, context-aware process that includes glossary creation, contextual search, critique, and refinement. This approach aims to produce translations that not only retain the original meaning but also incorporate translator notes, contextual adjustments, and stylistic improvements.
16
+
17
+ ## Features
18
+
19
+ - **Multi-stage Translation Process**: Analysis β†’ Search β†’ Translation β†’ Critique β†’ Refinement
20
+ - **Context-Aware**: Uses web search to gather contextual information about key terms
21
+ - **Flexible Model Selection**: Choose different LLMs for writing, searching, and critique
22
+ - **Workflow Architecture**: Modular design allows for easy customization and extension
23
+ - **OpenRouter Integration**: Access to multiple state-of-the-art language models
24
+
25
+ ## How to Use
26
+
27
+ 1. **Enter your OpenRouter API Key**: Get one at [OpenRouter](https://openrouter.ai/)
28
+ 2. **Select Models**: Choose appropriate models for each role (Writer, Searcher, Critic)
29
+ 3. **Set Languages**: Select source and target languages
30
+ 4. **Input Text**: Upload a file (.txt or .md) or paste your text directly
31
+ 5. **Translate**: Click the translate button and wait for the result
32
+
33
+ ## Model Recommendations
34
+
35
+ - **Writer**: `anthropic/claude-sonnet-4` - Excellent for translation and refinement
36
+ - **Searcher**: `perplexity/sonar` - Specialized in web search and information gathering
37
+ - **Critic**: `anthropic/claude-sonnet-4` - Good at analyzing and providing feedback
38
+
39
+ ## Links
40
+
41
+ - [Project Page](https://davidlms.github.io/aphra/)
42
+ - [GitHub Repository](https://github.com/DavidLMS/aphra)
43
+
44
+ ## Important Note
45
+
46
+ πŸŒπŸ’¬ Aphra is not intended to replace the work of a professional translator. Instead, it aims to facilitate multilingual support in small projects where hiring a professional translator may not be feasible. Aphra offers a practical solution for achieving quality translations in contexts where a fully professional translation service is out of scope, ensuring that language barriers do not hinder the global reach of your content.
aphra/__init__.py CHANGED
@@ -1,12 +1,9 @@
1
  """
2
  Aphra package initializer.
3
-
4
- This module exposes the translate function from the translate module.
5
  """
6
-
7
  from .translate import translate
8
- from . import llm_client
9
- from . import prompts
10
- from . import parsers
11
 
12
- __all__ = ['translate', 'llm_client', 'prompts', 'parsers']
 
1
  """
2
  Aphra package initializer.
3
+ This module exposes the main API components and modules.
 
4
  """
 
5
  from .translate import translate
6
+ from . import workflows
7
+ from . import core
 
8
 
9
+ __all__ = ['translate', 'workflows', 'core']
aphra/core/__init__.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core components for the Aphra translation system.
3
+
4
+ This module contains the fundamental building blocks used across
5
+ all workflows.
6
+ """
7
+
8
+ from .llm_client import LLMModelClient
9
+ from .parsers import parse_xml_tag, parse_multiple_xml_tags, parse_xml_tag_with_attributes
10
+ from .prompts import get_prompt, list_workflow_prompts
11
+ from .context import TranslationContext
12
+ from .workflow import AbstractWorkflow
13
+ from .registry import (
14
+ WorkflowRegistry,
15
+ get_registry,
16
+ register_workflow,
17
+ get_workflow,
18
+ get_suitable_workflow
19
+ )
20
+
21
+ __all__ = [
22
+ 'LLMModelClient',
23
+ 'parse_xml_tag',
24
+ 'parse_multiple_xml_tags',
25
+ 'parse_xml_tag_with_attributes',
26
+ 'get_prompt',
27
+ 'list_workflow_prompts',
28
+ 'TranslationContext',
29
+ 'AbstractWorkflow',
30
+ 'WorkflowRegistry',
31
+ 'get_registry',
32
+ 'register_workflow',
33
+ 'get_workflow',
34
+ 'get_suitable_workflow'
35
+ ]
aphra/core/config.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Generic configuration management for workflows.
3
+
4
+ This module provides functions to load and merge workflow-specific configuration
5
+ with user overrides for any workflow in the system.
6
+ """
7
+
8
+ import os
9
+ from typing import Dict, Any, Optional
10
+ import logging
11
+ import toml
12
+
13
+ def load_workflow_config(workflow_name: str,
14
+ global_config_path: Optional[str] = None) -> Dict[str, Any]:
15
+ """
16
+ Load workflow configuration with user overrides.
17
+
18
+ This generic function works for any workflow by:
19
+ 1. Loading default config from workflows/{workflow_name}/config/default.toml
20
+ 2. Applying user overrides from config.toml section [{workflow_name}]
21
+ 3. Returning the merged configuration
22
+
23
+ Args:
24
+ workflow_name: Name of the workflow (e.g., 'short_article', 'subtitles')
25
+ global_config_path: Path to global config.toml file. If None, looks for
26
+ config.toml in the current working directory.
27
+
28
+ Returns:
29
+ Dict containing merged configuration values
30
+
31
+ Example:
32
+ config = load_workflow_config('short_article')
33
+ writer_model = config.get('writer', 'default-model')
34
+ """
35
+ # Build path to workflow's default config
36
+ # Assuming we're in aphra/core/ and want to reach aphra/workflows/
37
+ core_dir = os.path.dirname(__file__)
38
+ aphra_dir = os.path.dirname(core_dir)
39
+ workflow_config_path = os.path.join(
40
+ aphra_dir, 'workflows', workflow_name, 'config', 'default.toml'
41
+ )
42
+
43
+ # Load default workflow config
44
+ config = {}
45
+ try:
46
+ with open(workflow_config_path, 'r', encoding='utf-8') as config_file:
47
+ config = toml.load(config_file)
48
+ logging.debug("Loaded default config for workflow '%s'", workflow_name)
49
+ except FileNotFoundError:
50
+ logging.warning("Default config not found for workflow '%s' at %s",
51
+ workflow_name, workflow_config_path)
52
+ config = {}
53
+ except Exception as exc:
54
+ logging.error("Error loading default config for workflow '%s': %s",
55
+ workflow_name, exc)
56
+ config = {}
57
+
58
+ # Load user overrides from global config
59
+ if global_config_path is None:
60
+ global_config_path = 'config.toml'
61
+
62
+ try:
63
+ with open(global_config_path, 'r', encoding='utf-8') as config_file:
64
+ global_config = toml.load(config_file)
65
+
66
+ # Apply overrides from workflow-specific section
67
+ if workflow_name in global_config:
68
+ config.update(global_config[workflow_name])
69
+ logging.debug("Applied user overrides for workflow '%s'", workflow_name)
70
+
71
+ except FileNotFoundError:
72
+ logging.debug("Global config file not found: %s", global_config_path)
73
+ # No global config file, use defaults
74
+ except Exception as exc:
75
+ logging.warning("Error reading global config file %s: %s",
76
+ global_config_path, exc)
77
+ # Error reading config, use defaults
78
+
79
+ return config
80
+
81
+ def get_workflow_config_path(workflow_name: str) -> str:
82
+ """
83
+ Get the path to a workflow's default configuration file.
84
+
85
+ Args:
86
+ workflow_name: Name of the workflow
87
+
88
+ Returns:
89
+ str: Path to the workflow's default.toml file
90
+ """
91
+ core_dir = os.path.dirname(__file__)
92
+ aphra_dir = os.path.dirname(core_dir)
93
+ return os.path.join(aphra_dir, 'workflows', workflow_name, 'config', 'default.toml')
94
+
95
+ def workflow_has_config(workflow_name: str) -> bool:
96
+ """
97
+ Check if a workflow has a configuration file.
98
+
99
+ Args:
100
+ workflow_name: Name of the workflow
101
+
102
+ Returns:
103
+ bool: True if the workflow has a default.toml file
104
+ """
105
+ config_path = get_workflow_config_path(workflow_name)
106
+ return os.path.isfile(config_path)
aphra/core/context.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Context management for translation workflows.
3
+
4
+ This module provides the TranslationContext class that encapsulates
5
+ all the state and configuration needed during translation execution.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Dict, Any, Optional
10
+ from .llm_client import LLMModelClient
11
+
12
+ @dataclass
13
+ class TranslationContext:
14
+ """
15
+ Context for translation containing parameters and settings.
16
+
17
+ This class encapsulates the parameters and settings needed for performing a translation,
18
+ including the model client, source and target languages, and logging preferences.
19
+ """
20
+ model_client: LLMModelClient
21
+ source_language: str
22
+ target_language: str
23
+ log_calls: bool
24
+
25
+ # Additional fields for workflow state
26
+ metadata: Dict[str, Any] = None
27
+ intermediate_results: Dict[str, Any] = None
28
+ workflow_config: Optional[Dict[str, Any]] = None
29
+
30
+ def __post_init__(self):
31
+ """Initialize optional fields if not provided."""
32
+ if self.metadata is None:
33
+ self.metadata = {}
34
+ if self.intermediate_results is None:
35
+ self.intermediate_results = {}
36
+ if self.workflow_config is None:
37
+ self.workflow_config = {}
38
+
39
+ def get_workflow_config(self, key: str = None, default: Any = None) -> Any:
40
+ """
41
+ Get workflow-specific configuration value.
42
+
43
+ Args:
44
+ key: Configuration key to retrieve. If None, returns full config dict.
45
+ default: Default value if key is not found.
46
+
47
+ Returns:
48
+ Configuration value or default if not found.
49
+ """
50
+ if key is None:
51
+ return self.workflow_config
52
+ return self.workflow_config.get(key, default)
53
+
54
+ def set_workflow_config(self, config: Dict[str, Any]) -> None:
55
+ """Set workflow-specific configuration."""
56
+ self.workflow_config = config
57
+
58
+ def store_result(self, step_name: str, result: Any) -> None:
59
+ """Store intermediate result from a workflow step."""
60
+ self.intermediate_results[step_name] = result
61
+
62
+ def get_result(self, step_name: str) -> Any:
63
+ """Retrieve intermediate result from a workflow step."""
64
+ return self.intermediate_results.get(step_name)
aphra/{llm_client.py β†’ core/llm_client.py} RENAMED
@@ -35,18 +35,19 @@ class LLMModelClient:
35
  with open(config_file_path, 'r', encoding='utf-8') as file:
36
  config = toml.load(file)
37
  self.api_key_openrouter = config['openrouter']['api_key']
38
- self.llms = config['llms']
39
  except FileNotFoundError:
40
  logging.error('File not found: %s', config_file_path)
41
  raise
42
  except toml.TomlDecodeError:
43
  logging.error('Error decoding TOML file: %s', config_file_path)
44
  raise
45
- except KeyError as e:
46
- logging.error('Missing key in config file: %s', e)
47
  raise
48
 
49
- def call_model(self, system_prompt, user_prompt, model_name, log_call=False):
 
 
50
  """
51
  Calls the model with the provided prompts.
52
 
@@ -54,28 +55,48 @@ class LLMModelClient:
54
  :param user_prompt: The user prompt to send to the model.
55
  :param model_name: The name of the model to use.
56
  :param log_call: Boolean indicating whether to log the call details.
 
 
57
  :return: The model's response.
58
  """
 
59
  try:
60
- response = self.client.chat.completions.create(
61
- model=model_name,
62
- messages=[
 
63
  {"role": "system", "content": system_prompt},
64
  {"role": "user", "content": user_prompt}
65
  ]
66
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  response_content = response.choices[0].message.content
68
 
69
  if log_call:
70
  self.log_model_call(user_prompt, response_content)
71
 
72
  return response_content
73
- except requests.exceptions.RequestException as e:
74
- logging.error('Request error: %s', e)
75
  raise
76
- except (ValueError, KeyError, TypeError) as e:
77
- logging.error('Error parsing response: %s', e)
78
- logging.error('Response content: %s', response.text if response else 'No response')
 
 
 
79
  raise
80
 
81
  def log_model_call(self, user_prompt, response):
 
35
  with open(config_file_path, 'r', encoding='utf-8') as file:
36
  config = toml.load(file)
37
  self.api_key_openrouter = config['openrouter']['api_key']
 
38
  except FileNotFoundError:
39
  logging.error('File not found: %s', config_file_path)
40
  raise
41
  except toml.TomlDecodeError:
42
  logging.error('Error decoding TOML file: %s', config_file_path)
43
  raise
44
+ except KeyError as exc:
45
+ logging.error('Missing key in config file: %s', exc)
46
  raise
47
 
48
+ def call_model(self, system_prompt, user_prompt, model_name, *,
49
+ log_call=False, enable_web_search=False,
50
+ web_search_context="high"):
51
  """
52
  Calls the model with the provided prompts.
53
 
 
55
  :param user_prompt: The user prompt to send to the model.
56
  :param model_name: The name of the model to use.
57
  :param log_call: Boolean indicating whether to log the call details.
58
+ :param enable_web_search: Boolean indicating whether to enable web search via OpenRouter.
59
+ :param web_search_context: Context size for web search ('low', 'medium', 'high').
60
  :return: The model's response.
61
  """
62
+ response = None
63
  try:
64
+ # Prepare the request parameters
65
+ request_params = {
66
+ "model": model_name,
67
+ "messages": [
68
  {"role": "system", "content": system_prompt},
69
  {"role": "user", "content": user_prompt}
70
  ]
71
+ }
72
+
73
+ # Add web search capabilities if enabled (OpenRouter format)
74
+ if enable_web_search:
75
+ # Append :online to model name for web search
76
+ if not model_name.endswith(":online"):
77
+ request_params["model"] = f"{model_name}:online"
78
+
79
+ # Add web search options
80
+ request_params["web_search_options"] = {
81
+ "search_context_size": web_search_context
82
+ }
83
+
84
+ response = self.client.chat.completions.create(**request_params)
85
  response_content = response.choices[0].message.content
86
 
87
  if log_call:
88
  self.log_model_call(user_prompt, response_content)
89
 
90
  return response_content
91
+ except requests.exceptions.RequestException as exc:
92
+ logging.error('Request error: %s', exc)
93
  raise
94
+ except (ValueError, KeyError, TypeError) as exc:
95
+ logging.error('Error parsing response: %s', exc)
96
+ if response and hasattr(response, 'text'):
97
+ logging.error('Response content: %s', response.text)
98
+ else:
99
+ logging.error('No response available')
100
  raise
101
 
102
  def log_model_call(self, user_prompt, response):
aphra/core/parsers.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Generic parsing utilities for XML-like content extraction.
3
+
4
+ This module provides generic parsers that can be used across different
5
+ workflows for extracting content from XML-like tags in LLM responses.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ from typing import Optional
11
+
12
+ def parse_xml_tag(content: str, tag_name: str) -> Optional[str]:
13
+ """
14
+ Extract content from within XML-like tags in a string.
15
+
16
+ This is a generic parser that can extract content from any XML-like tag
17
+ in LLM responses, making it reusable across different workflows.
18
+
19
+ Args:
20
+ content: The string content containing XML-like tags
21
+ tag_name: The name of the tag to extract (without < >)
22
+
23
+ Returns:
24
+ str: The content within the tags, or None if not found
25
+
26
+ Example:
27
+ >>> content = "Some text <result>Hello World</result> more text"
28
+ >>> parse_xml_tag(content, "result")
29
+ "Hello World"
30
+ """
31
+ try:
32
+ start_tag = f"<{tag_name}>"
33
+ end_tag = f"</{tag_name}>"
34
+
35
+ start_index = content.find(start_tag)
36
+ if start_index == -1:
37
+ logging.warning("Start tag '<%s>' not found in content", tag_name)
38
+ return None
39
+
40
+ start_index += len(start_tag)
41
+ end_index = content.find(end_tag, start_index)
42
+
43
+ if end_index == -1:
44
+ logging.warning("End tag '</%s>' not found in content", tag_name)
45
+ return None
46
+
47
+ extracted_content = content[start_index:end_index].strip()
48
+ return extracted_content
49
+
50
+ except Exception as exc:
51
+ logging.error("Error parsing XML tag '%s': %s", tag_name, exc)
52
+ return None
53
+
54
+ def parse_multiple_xml_tags(content: str, tag_name: str) -> list[str]:
55
+ """
56
+ Extract content from multiple XML-like tags of the same type.
57
+
58
+ Args:
59
+ content: The string content containing XML-like tags
60
+ tag_name: The name of the tag to extract (without < >)
61
+
62
+ Returns:
63
+ list[str]: List of content within all matching tags
64
+
65
+ Example:
66
+ >>> content = "Text <item>First</item> more <item>Second</item> end"
67
+ >>> parse_multiple_xml_tags(content, "item")
68
+ ["First", "Second"]
69
+ """
70
+ try:
71
+ # Use regex to find all occurrences
72
+ pattern = f"<{re.escape(tag_name)}>(.*?)</{re.escape(tag_name)}>"
73
+ matches = re.findall(pattern, content, re.DOTALL)
74
+
75
+ # Strip whitespace from each match
76
+ results = [match.strip() for match in matches]
77
+ return results
78
+
79
+ except Exception as exc:
80
+ logging.error("Error parsing multiple XML tags '%s': %s", tag_name, exc)
81
+ return []
82
+
83
+ def parse_xml_tag_with_attributes(content: str, tag_name: str) -> Optional[dict]:
84
+ """
85
+ Extract content and attributes from XML-like tags.
86
+
87
+ Args:
88
+ content: The string content containing XML-like tags
89
+ tag_name: The name of the tag to extract (without < >)
90
+
91
+ Returns:
92
+ dict: Dictionary with 'content' and 'attributes' keys, or None if not found
93
+
94
+ Example:
95
+ >>> content = 'Text <result type="success">Hello World</result>'
96
+ >>> parse_xml_tag_with_attributes(content, "result")
97
+ {"content": "Hello World", "attributes": {"type": "success"}}
98
+ """
99
+ try:
100
+ # Pattern to match tag with optional attributes
101
+ pattern = f"<{re.escape(tag_name)}([^>]*)>(.*?)</{re.escape(tag_name)}>"
102
+ match = re.search(pattern, content, re.DOTALL)
103
+
104
+ if not match:
105
+ logging.warning("Tag '<%s>' not found in content", tag_name)
106
+ return None
107
+
108
+ attributes_str = match.group(1).strip()
109
+ tag_content = match.group(2).strip()
110
+
111
+ # Parse attributes if any
112
+ attributes = {}
113
+ if attributes_str:
114
+ # Simple attribute parsing (handles key="value" format)
115
+ attr_pattern = r'(\w+)="([^"]*)"'
116
+ attributes = dict(re.findall(attr_pattern, attributes_str))
117
+
118
+ return {
119
+ 'content': tag_content,
120
+ 'attributes': attributes
121
+ }
122
+
123
+ except Exception as exc:
124
+ logging.error("Error parsing XML tag with attributes '%s': %s", tag_name, exc)
125
+ return None
aphra/core/prompts.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core prompt template loading utilities.
3
+
4
+ This module provides generic prompt template loading functionality
5
+ for all workflows in the Aphra translation system.
6
+ """
7
+
8
+ import os
9
+ from importlib import resources
10
+
11
+ def get_prompt(workflow_name: str, file_name: str, **kwargs) -> str:
12
+ """
13
+ Reads a prompt template from a workflow's prompts directory and formats it.
14
+
15
+ Args:
16
+ workflow_name: Name of the workflow (e.g., 'short_article', 'subtitles')
17
+ file_name: Name of the prompt file (e.g., 'step1_system.txt')
18
+ **kwargs: Optional keyword arguments to format the prompt template
19
+
20
+ Returns:
21
+ str: The formatted prompt content
22
+
23
+ Raises:
24
+ FileNotFoundError: If the prompt file doesn't exist
25
+ KeyError: If required format parameters are missing
26
+ """
27
+ try:
28
+ # Try using importlib.resources first (works in packaged installations)
29
+ ref = resources.files('aphra.workflows') / workflow_name / 'prompts' / file_name
30
+ with ref.open('r', encoding="utf-8") as file:
31
+ content = file.read()
32
+ except (AttributeError, FileNotFoundError) as exc:
33
+ # Fallback to direct file access (works in development)
34
+ workflows_path = os.path.dirname(os.path.dirname(__file__)) # Go up to aphra/
35
+ file_path = os.path.join(workflows_path, 'workflows', workflow_name, 'prompts', file_name)
36
+
37
+ if not os.path.exists(file_path):
38
+ raise FileNotFoundError(f"Prompt file not found: {file_path}") from exc
39
+
40
+ with open(file_path, 'r', encoding="utf-8") as file:
41
+ content = file.read()
42
+
43
+ # Format the content with provided kwargs if any
44
+ if kwargs:
45
+ try:
46
+ formatted_prompt = content.format(**kwargs)
47
+ except KeyError as exc:
48
+ msg = f"Missing format parameter {exc} for prompt {workflow_name}/{file_name}"
49
+ raise KeyError(msg) from exc
50
+ else:
51
+ formatted_prompt = content
52
+
53
+ return formatted_prompt
54
+
55
+ def list_workflow_prompts(workflow_name: str) -> list[str]:
56
+ """
57
+ List all available prompt files for a workflow.
58
+
59
+ Args:
60
+ workflow_name: Name of the workflow
61
+
62
+ Returns:
63
+ list[str]: List of prompt filenames available for the workflow
64
+
65
+ Raises:
66
+ FileNotFoundError: If the workflow prompts directory doesn't exist
67
+ """
68
+ try:
69
+ # Try using importlib.resources first
70
+ prompts_ref = resources.files('aphra.workflows') / workflow_name / 'prompts'
71
+ return [f.name for f in prompts_ref.iterdir() if f.is_file()]
72
+ except (AttributeError, FileNotFoundError) as exc:
73
+ # Fallback to direct directory access
74
+ workflows_path = os.path.dirname(os.path.dirname(__file__))
75
+ prompts_path = os.path.join(workflows_path, 'workflows', workflow_name, 'prompts')
76
+
77
+ if not os.path.exists(prompts_path):
78
+ msg = f"Workflow prompts directory not found: {prompts_path}"
79
+ raise FileNotFoundError(msg) from exc
80
+
81
+ return [f for f in os.listdir(prompts_path)
82
+ if os.path.isfile(os.path.join(prompts_path, f))]
aphra/core/registry.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Workflow registry for managing available translation workflows.
3
+
4
+ This module provides a centralized registry for discovering and
5
+ managing translation workflows.
6
+ """
7
+
8
+ import logging
9
+ from typing import Dict, List, Optional, Type
10
+ from .workflow import AbstractWorkflow
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class WorkflowRegistry:
15
+ """
16
+ Registry for managing translation workflows.
17
+
18
+ This class maintains a registry of available workflows and provides
19
+ methods for workflow discovery and selection. Workflows are automatically
20
+ discovered and registered from the workflows package.
21
+ """
22
+
23
+ def __init__(self):
24
+ """Initialize the workflow registry with auto-discovered workflows."""
25
+ self._workflows: Dict[str, Type[AbstractWorkflow]] = {}
26
+ self._register_discovered_workflows()
27
+
28
+ def _register_discovered_workflows(self):
29
+ """Register all auto-discovered workflows from the workflows package."""
30
+ try:
31
+ # Import workflows after the module is fully initialized
32
+ from .. import workflows
33
+
34
+ # Get all workflow classes that were auto-discovered
35
+ for class_name in workflows.__all__:
36
+ workflow_class = getattr(workflows, class_name, None)
37
+ if workflow_class is not None:
38
+ self.register_workflow(workflow_class)
39
+ logger.debug("Auto-registered workflow: %s", class_name)
40
+ else:
41
+ logger.warning("Failed to get workflow class: %s", class_name)
42
+
43
+ except ImportError as exc:
44
+ logger.error("Failed to import workflows for auto-registration: %s", exc)
45
+ except Exception as exc:
46
+ logger.error("Unexpected error during workflow auto-registration: %s", exc)
47
+
48
+ def register_workflow(self, workflow_class: Type[AbstractWorkflow]):
49
+ """
50
+ Register a new workflow type.
51
+
52
+ Args:
53
+ workflow_class: The workflow class to register
54
+ """
55
+ # Create temporary instance to get the workflow name
56
+ temp_workflow = workflow_class()
57
+ workflow_name = temp_workflow.get_workflow_name()
58
+ self._workflows[workflow_name] = workflow_class
59
+
60
+ def get_workflow(self, workflow_name: str) -> Optional[AbstractWorkflow]:
61
+ """
62
+ Get a workflow instance by name.
63
+
64
+ Args:
65
+ workflow_name: The name of the workflow to retrieve
66
+
67
+ Returns:
68
+ AbstractWorkflow: An instance of the requested workflow, or None if not found
69
+ """
70
+ workflow_class = self._workflows.get(workflow_name)
71
+ if workflow_class:
72
+ return workflow_class()
73
+ return None
74
+
75
+ def get_suitable_workflow(self, text: str, **kwargs) -> Optional[AbstractWorkflow]:
76
+ """
77
+ Find the most suitable workflow for the given content.
78
+
79
+ Args:
80
+ text: The text content to analyze
81
+ **kwargs: Additional parameters for workflow evaluation
82
+
83
+ Returns:
84
+ AbstractWorkflow: The most suitable workflow instance, or None if none found
85
+ """
86
+ # For now, we check workflows in registration order
87
+ # In the future, we could implement more sophisticated selection logic
88
+ for workflow_class in self._workflows.values():
89
+ workflow = workflow_class()
90
+ if workflow.is_suitable_for(text, **kwargs):
91
+ return workflow
92
+
93
+ return None
94
+
95
+ def list_workflows(self) -> List[str]:
96
+ """
97
+ Get a list of all registered workflow names.
98
+
99
+ Returns:
100
+ List[str]: Names of all registered workflows
101
+ """
102
+ return list(self._workflows.keys())
103
+
104
+ def get_workflow_info(self, workflow_name: str) -> Optional[Dict[str, str]]:
105
+ """
106
+ Get information about a specific workflow.
107
+
108
+ Args:
109
+ workflow_name: The name of the workflow
110
+
111
+ Returns:
112
+ Dict[str, str]: Information about the workflow, or None if not found
113
+ """
114
+ workflow = self.get_workflow(workflow_name)
115
+ if workflow:
116
+ return {
117
+ 'name': workflow.get_workflow_name(),
118
+ 'class': workflow.__class__.__name__,
119
+ 'module': workflow.__class__.__module__
120
+ }
121
+ return None
122
+
123
+ # Global registry instance
124
+ _registry = WorkflowRegistry()
125
+
126
+ def get_registry() -> WorkflowRegistry:
127
+ """
128
+ Get the global workflow registry instance.
129
+
130
+ Returns:
131
+ WorkflowRegistry: The global registry instance
132
+ """
133
+ return _registry
134
+
135
+ def register_workflow(workflow_class: Type[AbstractWorkflow]):
136
+ """
137
+ Convenient function to register a workflow with the global registry.
138
+
139
+ Args:
140
+ workflow_class: The workflow class to register
141
+ """
142
+ _registry.register_workflow(workflow_class)
143
+
144
+ def get_workflow(workflow_name: str) -> Optional[AbstractWorkflow]:
145
+ """
146
+ Convenient function to get a workflow from the global registry.
147
+
148
+ Args:
149
+ workflow_name: The name of the workflow to retrieve
150
+
151
+ Returns:
152
+ AbstractWorkflow: An instance of the requested workflow, or None if not found
153
+ """
154
+ return _registry.get_workflow(workflow_name)
155
+
156
+ def get_suitable_workflow(text: str, **kwargs) -> Optional[AbstractWorkflow]:
157
+ """
158
+ Convenient function to find a suitable workflow from the global registry.
159
+
160
+ Args:
161
+ text: The text content to analyze
162
+ **kwargs: Additional parameters for workflow evaluation
163
+
164
+ Returns:
165
+ AbstractWorkflow: The most suitable workflow instance, or None if none found
166
+ """
167
+ return _registry.get_suitable_workflow(text, **kwargs)
aphra/core/workflow.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Workflow base classes.
3
+
4
+ This module defines the contract for translation workflows.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Dict, Any
9
+ from .context import TranslationContext
10
+ from .config import load_workflow_config
11
+
12
+ class AbstractWorkflow(ABC):
13
+ """
14
+ Base class for translation workflows.
15
+
16
+ A workflow orchestrates a translation process for a specific type of content
17
+ using methods that can be overridden to customize behavior.
18
+ """
19
+
20
+ @abstractmethod
21
+ def get_workflow_name(self) -> str:
22
+ """
23
+ Get the unique name of this workflow.
24
+
25
+ Returns:
26
+ str: The workflow name identifier
27
+ """
28
+ raise NotImplementedError("Subclasses must implement get_workflow_name")
29
+
30
+ @abstractmethod
31
+ def is_suitable_for(self, text: str, **kwargs) -> bool:
32
+ """
33
+ Determine if this workflow is suitable for the given content.
34
+
35
+ Args:
36
+ text: The text content to evaluate
37
+ **kwargs: Additional evaluation parameters
38
+
39
+ Returns:
40
+ bool: True if this workflow is suitable for the content
41
+ """
42
+ raise NotImplementedError("Subclasses must implement is_suitable_for")
43
+
44
+ def load_config(self, global_config_path: str = None) -> Dict[str, Any]:
45
+ """
46
+ Load workflow-specific configuration.
47
+
48
+ This method automatically loads the workflow's default configuration
49
+ and applies user overrides from the global config file.
50
+
51
+ Args:
52
+ global_config_path: Path to global config file. If None, uses 'config.toml'
53
+
54
+ Returns:
55
+ Dict containing merged configuration values
56
+ """
57
+ return load_workflow_config(self.get_workflow_name(), global_config_path)
58
+
59
+ def run(self, context: TranslationContext, text: str = None) -> str:
60
+ """
61
+ Run the complete workflow with configuration management.
62
+
63
+ This method:
64
+ 1. Loads workflow-specific configuration
65
+ 2. Sets it in the translation context
66
+ 3. Calls the execute method
67
+
68
+ Args:
69
+ context: The translation context
70
+ text: The text to translate (optional if already in context)
71
+
72
+ Returns:
73
+ str: The final translation result
74
+ """
75
+ # Load workflow configuration and set it in context
76
+ workflow_config = self.load_config()
77
+ context.set_workflow_config(workflow_config)
78
+
79
+ # Get text from context if not provided
80
+ if text is None:
81
+ text = getattr(context, 'text', '')
82
+
83
+ return self.execute(context, text)
84
+
85
+ @abstractmethod
86
+ def execute(self, context: TranslationContext, text: str) -> str:
87
+ """
88
+ Execute the complete workflow with the given context and text.
89
+
90
+ Args:
91
+ context: The translation context
92
+ text: The text to translate
93
+
94
+ Returns:
95
+ str: The final translation result
96
+ """
97
+ raise NotImplementedError("Subclasses must implement execute")
aphra/parsers.py DELETED
@@ -1,59 +0,0 @@
1
- """
2
- Module for parsing analysis and translation strings.
3
- """
4
-
5
- import xml.etree.ElementTree as ET
6
- import logging
7
-
8
- def parse_analysis(analysis_str):
9
- """
10
- Parses the analysis part of the provided string and returns
11
- a list of items with their names and keywords.
12
-
13
- :param analysis_str: String containing the analysis in the specified format.
14
- :return: A list of dictionaries, each containing 'name' and 'keywords' from the analysis.
15
- """
16
- try:
17
- # Extract the <analysis> part
18
- analysis_start = analysis_str.index("<analysis>") + len("<analysis>")
19
- analysis_end = analysis_str.index("</analysis>")
20
- analysis_content = analysis_str[analysis_start:analysis_end].strip()
21
-
22
- # Parse the analysis content using XML parser
23
- root = ET.fromstring(f"<root>{analysis_content}</root>")
24
- items = []
25
-
26
- for item in root.findall('item'):
27
- name = item.find('name').text
28
- keywords = item.find('keywords').text
29
- items.append({'name': name, 'keywords': keywords.split(', ')})
30
-
31
- return items
32
- except ValueError as e:
33
- logging.error('Error parsing analysis string: %s', e)
34
- return []
35
- except ET.ParseError as e:
36
- logging.error('Error parsing XML content: %s', e)
37
- return []
38
-
39
- def parse_translation(translation_str):
40
- """
41
- Parses the provided string and returns the content within
42
- <improved_translation> and <translators_notes> tags.
43
-
44
- :param translation_str: String containing the translation and notes in the specified format.
45
- :return: String containing the <improved_translation>.
46
- """
47
- try:
48
- improved_translation_start = (
49
- translation_str.index("<improved_translation>") + len("<improved_translation>")
50
- )
51
- improved_translation_end = translation_str.index("</improved_translation>")
52
- improved_translation_content = translation_str[
53
- improved_translation_start:improved_translation_end
54
- ].strip()
55
-
56
- return improved_translation_content
57
- except ValueError as e:
58
- logging.error('Error parsing translation string: %s', e)
59
- return "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aphra/prompts.py DELETED
@@ -1,22 +0,0 @@
1
- """
2
- Module for reading and formatting prompt templates.
3
- """
4
-
5
- from pkg_resources import resource_filename
6
-
7
- def get_prompt(file_name, **kwargs):
8
- """
9
- Reads a prompt template from a file and formats it with the given arguments.
10
-
11
- :param file_name: Path to the file containing the prompt template.
12
- :param kwargs: Optional keyword arguments to format the prompt template.
13
- :return: The formatted prompt.
14
- """
15
- file_path = resource_filename(__name__, f'prompts/{file_name}')
16
- with open(file_path, 'r', encoding="utf-8") as file:
17
- content = file.read()
18
- if kwargs:
19
- formatted_prompt = content.format(**kwargs)
20
- else:
21
- formatted_prompt = content
22
- return formatted_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aphra/translate.py CHANGED
@@ -1,24 +1,13 @@
1
  """
2
  Module for translating text using multiple steps and language models.
3
- """
4
-
5
- from dataclasses import dataclass
6
- from .llm_client import LLMModelClient
7
- from .prompts import get_prompt
8
- from .parsers import parse_analysis, parse_translation
9
 
10
- @dataclass
11
- class TranslationContext:
12
- """
13
- Context for translation containing parameters and settings.
14
 
15
- This class encapsulates the parameters and settings needed for performing a translation,
16
- including the model client, source and target languages, and logging preferences.
17
- """
18
- model_client: LLMModelClient
19
- source_language: str
20
- target_language: str
21
- log_calls: bool
22
 
23
  def load_model_client(config_file):
24
  """
@@ -29,57 +18,12 @@ def load_model_client(config_file):
29
  """
30
  return LLMModelClient(config_file)
31
 
32
- def execute_model_call(context, system_file, user_file, model_name, **kwargs):
33
- """
34
- Executes a model call using the provided system and user prompts.
35
-
36
- :param context: An instance of TranslationContext containing translation parameters.
37
- :param system_file: Path to the file containing the system prompt.
38
- :param user_file: Path to the file containing the user prompt.
39
- :param model_name: The name of the model to use.
40
- :param kwargs: Optional keyword arguments to format the prompt templates.
41
- :return: The model's response content.
42
- """
43
- system_prompt = get_prompt(system_file, **kwargs)
44
- user_prompt = get_prompt(user_file, **kwargs)
45
- return context.model_client.call_model(
46
- system_prompt,
47
- user_prompt,
48
- model_name,
49
- log_call=context.log_calls
50
- )
51
-
52
- def generate_glossary(context, parsed_items, model_searcher):
53
- """
54
- Generates a glossary of terms based on the parsed analysis items.
55
-
56
- :param context: An instance of TranslationContext containing translation parameters.
57
- :param parsed_items: A list of dictionaries containing 'name' and 'keywords' for each item.
58
- :param model_searcher: The name of the model to use for searching term explanations.
59
- :return: A formatted string containing the glossary entries.
60
- """
61
- glossary = []
62
- for item in parsed_items:
63
- term_explanation = execute_model_call(
64
- context,
65
- 'step2_system.txt',
66
- 'step2_user.txt',
67
- model_searcher,
68
- term=item['name'],
69
- keywords=", ".join(item['keywords']),
70
- source_language=context.source_language,
71
- target_language=context.target_language
72
- )
73
- glossary_entry = (
74
- f"### {item['name']}\n\n**Keywords:** {', '.join(item['keywords'])}\n\n"
75
- f"**Explanation:**\n{term_explanation}\n"
76
- )
77
- glossary.append(glossary_entry)
78
- return "\n".join(glossary)
79
-
80
  def translate(source_language, target_language, text, config_file="config.toml", log_calls=False):
81
  """
82
- Translates the provided text from the source language to the target language in multiple steps.
 
 
 
83
 
84
  :param source_language: The source language of the text.
85
  :param target_language: The target language of the text.
@@ -88,60 +32,24 @@ def translate(source_language, target_language, text, config_file="config.toml",
88
  :param log_calls: Boolean indicating whether to log the call details.
89
  :return: The improved translation of the text.
90
  """
 
91
  model_client = load_model_client(config_file)
92
- models = model_client.llms
93
- context = TranslationContext(model_client, source_language, target_language, log_calls)
94
-
95
- analysis_content = execute_model_call(
96
- context,
97
- 'step1_system.txt',
98
- 'step1_user.txt',
99
- models['writer'],
100
- post_content=text,
101
- source_language=source_language,
102
- target_language=target_language
103
- )
104
 
105
- parsed_items = parse_analysis(analysis_content)
106
- glossary_content = generate_glossary(
107
- context, parsed_items, models['searcher']
108
- )
109
-
110
- translated_content = execute_model_call(
111
- context,
112
- 'step3_system.txt',
113
- 'step3_user.txt',
114
- models['writer'],
115
- text=text,
116
  source_language=source_language,
117
- target_language=target_language
 
118
  )
119
 
120
- critique = execute_model_call(
121
- context,
122
- 'step4_system.txt',
123
- 'step4_user.txt',
124
- models['critiquer'],
125
- text=text,
126
- translation=translated_content,
127
- glossary=glossary_content,
128
- source_language=source_language,
129
- target_language=target_language
130
- )
131
 
132
- final_translation_content = execute_model_call(
133
- context,
134
- 'step5_system.txt',
135
- 'step5_user.txt',
136
- models['writer'],
137
- text=text,
138
- translation=translated_content,
139
- glossary=glossary_content,
140
- critique=critique,
141
- source_language=source_language,
142
- target_language=target_language
143
- )
144
 
145
- improved_translation = parse_translation(final_translation_content)
 
146
 
147
- return improved_translation
 
1
  """
2
  Module for translating text using multiple steps and language models.
 
 
 
 
 
 
3
 
4
+ This module provides the main translation functionality using Aphra's
5
+ workflow-based translation system.
6
+ """
 
7
 
8
+ from .core.llm_client import LLMModelClient
9
+ from .core.context import TranslationContext
10
+ from .core.registry import get_suitable_workflow
 
 
 
 
11
 
12
  def load_model_client(config_file):
13
  """
 
18
  """
19
  return LLMModelClient(config_file)
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def translate(source_language, target_language, text, config_file="config.toml", log_calls=False):
22
  """
23
+ Translates the provided text from the source language to the target language using workflows.
24
+
25
+ This function provides a convenient interface to Aphra's workflow-based
26
+ translation system.
27
 
28
  :param source_language: The source language of the text.
29
  :param target_language: The target language of the text.
 
32
  :param log_calls: Boolean indicating whether to log the call details.
33
  :return: The improved translation of the text.
34
  """
35
+ # Load the model client
36
  model_client = load_model_client(config_file)
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # Create translation context
39
+ context = TranslationContext(
40
+ model_client=model_client,
 
 
 
 
 
 
 
 
41
  source_language=source_language,
42
+ target_language=target_language,
43
+ log_calls=log_calls
44
  )
45
 
46
+ # Find the most suitable workflow for this content
47
+ workflow = get_suitable_workflow(text)
 
 
 
 
 
 
 
 
 
48
 
49
+ if workflow is None:
50
+ raise ValueError("No suitable workflow found for the provided text")
 
 
 
 
 
 
 
 
 
 
51
 
52
+ # Execute the workflow
53
+ result = workflow.run(context, text)
54
 
55
+ return result
aphra/workflows/__init__.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Workflow implementations with automatic discovery.
3
+
4
+ This module automatically discovers and imports all workflow classes
5
+ from subdirectories, making it easy to add new workflows without
6
+ modifying this file.
7
+ """
8
+
9
+ import os
10
+ import importlib
11
+ import logging
12
+ from typing import List, Type, Dict
13
+
14
+ # Import the base class for type checking
15
+ try:
16
+ from ..core.workflow import AbstractWorkflow
17
+ except ImportError:
18
+ # Fallback for cases where core is not yet available
19
+ AbstractWorkflow = None
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Initialize __all__ as empty list - will be populated by auto-discovery
24
+ __all__ = []
25
+
26
+ def _discover_workflows() -> Dict[str, Type]:
27
+ """
28
+ Auto-discover workflow classes from subdirectories.
29
+
30
+ Scans all subdirectories of the workflows package and looks for
31
+ classes that inherit from AbstractWorkflow.
32
+
33
+ Returns:
34
+ Dict[str, Type]: Mapping of class name to workflow class
35
+ """
36
+ workflows = {}
37
+ current_dir = os.path.dirname(__file__)
38
+
39
+ if not current_dir:
40
+ logger.warning("Could not determine workflows directory")
41
+ return workflows
42
+
43
+ try:
44
+ # Scan all items in the workflows directory
45
+ for item in os.listdir(current_dir):
46
+ item_path = os.path.join(current_dir, item)
47
+
48
+ # Skip files and special directories
49
+ if not os.path.isdir(item_path) or item.startswith('__'):
50
+ continue
51
+
52
+ # Skip if no __init__.py (not a proper Python package)
53
+ init_file = os.path.join(item_path, '__init__.py')
54
+ if not os.path.exists(init_file):
55
+ logger.debug("Skipping %s: no __init__.py found", item)
56
+ continue
57
+
58
+ try:
59
+ # Import the workflow package
60
+ module = importlib.import_module(f'.{item}', package=__name__)
61
+ logger.debug("Successfully imported workflow package: %s", item)
62
+
63
+ # Look for workflow classes in the module
64
+ workflow_classes_found = 0
65
+ for attr_name in dir(module):
66
+ attr = getattr(module, attr_name, None)
67
+
68
+ # Check if it's a class that inherits from AbstractWorkflow
69
+ if (isinstance(attr, type) and
70
+ AbstractWorkflow is not None and
71
+ issubclass(attr, AbstractWorkflow) and
72
+ attr != AbstractWorkflow):
73
+
74
+ workflows[attr_name] = attr
75
+ workflow_classes_found += 1
76
+ logger.debug("Discovered workflow: %s from %s", attr_name, item)
77
+
78
+ if workflow_classes_found == 0:
79
+ logger.warning("No workflow classes found in %s", item)
80
+
81
+ except ImportError as exc:
82
+ logger.warning("Failed to import workflow package %s: %s", item, exc)
83
+ continue
84
+ except Exception as exc:
85
+ logger.error("Unexpected error while discovering workflow %s: %s", item, exc)
86
+ continue
87
+
88
+ except OSError as exc:
89
+ logger.error("Failed to scan workflows directory: %s", exc)
90
+
91
+ logger.debug("Workflow discovery completed. Found %d workflows: %s",
92
+ len(workflows), list(workflows.keys()))
93
+ return workflows
94
+
95
+ def _setup_module_exports(workflows: Dict[str, Type]) -> List[str]:
96
+ """
97
+ Set up module-level exports for discovered workflows.
98
+
99
+ Args:
100
+ workflows: Dictionary of workflow name to class mappings
101
+
102
+ Returns:
103
+ List[str]: List of exported workflow class names
104
+ """
105
+ exported_classes = []
106
+
107
+ # Add each workflow class to the module globals and collect names
108
+ for class_name, workflow_class in workflows.items():
109
+ globals()[class_name] = workflow_class
110
+ exported_classes.append(class_name)
111
+
112
+ # Sort for consistency
113
+ exported_classes.sort()
114
+ return exported_classes
115
+
116
+ # Perform auto-discovery
117
+ logger.debug("Starting workflow auto-discovery...")
118
+ _discovered_workflows = _discover_workflows()
119
+
120
+ # Set up module exports
121
+ __all__ = _setup_module_exports(_discovered_workflows)
122
+
123
+ # Log final state
124
+ logger.debug("Workflows module initialized with: %s", __all__)
125
+
126
+ # For backward compatibility and explicit access
127
+ def get_available_workflows() -> List[str]:
128
+ """
129
+ Get a list of all available workflow class names.
130
+
131
+ Returns:
132
+ List[str]: List of available workflow class names
133
+ """
134
+ return list(__all__)
135
+
136
+ def get_workflow_class(name: str) -> Type:
137
+ """
138
+ Get a workflow class by name.
139
+
140
+ Args:
141
+ name: The name of the workflow class
142
+
143
+ Returns:
144
+ Type: The workflow class
145
+
146
+ Raises:
147
+ AttributeError: If the workflow class is not found
148
+ """
149
+ if name not in globals():
150
+ raise AttributeError(f"Workflow class '{name}' not found. Available: {__all__}")
151
+
152
+ return globals()[name]
aphra/workflows/short_article/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Short Article workflow for translating articles and blog posts.
3
+
4
+ This module contains the short article translation workflow which implements
5
+ a 5-step process for contextual translation of articles and blog posts.
6
+ """
7
+
8
+ from .short_article_workflow import ShortArticleWorkflow
9
+
10
+ __all__ = ['ShortArticleWorkflow']
aphra/workflows/short_article/aux/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Auxiliary utilities for the Short Article workflow.
3
+
4
+ This module contains parsers and utilities specific to the short article
5
+ translation workflow.
6
+ """
7
+
8
+ from .parsers import parse_analysis, parse_translation
9
+
10
+ __all__ = ['parse_analysis', 'parse_translation']
aphra/workflows/short_article/aux/parsers.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Parsers specific to the Short Article workflow.
3
+
4
+ This module contains parsers for extracting content from LLM responses
5
+ that are specific to the short article translation workflow.
6
+
7
+ These parsers use the generic XML parsing functions from the core module
8
+ to avoid code duplication while maintaining a clear API.
9
+ """
10
+
11
+ import logging
12
+ from typing import List, Dict, Any
13
+ from ....core.parsers import parse_xml_tag, parse_multiple_xml_tags
14
+
15
+ def parse_analysis(analysis_str: str) -> List[Dict[str, Any]]:
16
+ """
17
+ Parses the analysis part of the provided string and returns
18
+ a list of items with their names and keywords.
19
+
20
+ Uses generic XML parsers from the core module to extract structured data
21
+ from the <analysis> tag and its nested <item> elements.
22
+
23
+ Args:
24
+ analysis_str: String containing the analysis in the specified format.
25
+
26
+ Returns:
27
+ List[Dict]: A list of dictionaries, each containing 'name' and 'keywords' from the analysis.
28
+ """
29
+ # 1. Extract content of <analysis> tag
30
+ analysis_content = parse_xml_tag(analysis_str, "analysis")
31
+ if not analysis_content:
32
+ logging.error('Could not find <analysis> tag in content')
33
+ return []
34
+
35
+ # 2. Extract all <item> tags within the analysis
36
+ item_contents = parse_multiple_xml_tags(analysis_content, "item")
37
+ if not item_contents:
38
+ logging.warning('No <item> tags found within <analysis>')
39
+ return []
40
+
41
+ # 3. For each item, extract name and keywords
42
+ items = []
43
+ for item_content in item_contents:
44
+ name = parse_xml_tag(item_content, "name")
45
+ keywords_str = parse_xml_tag(item_content, "keywords")
46
+
47
+ if name and keywords_str:
48
+ items.append({
49
+ 'name': name,
50
+ 'keywords': keywords_str.split(', ')
51
+ })
52
+ else:
53
+ logging.warning('Incomplete item found - name: %s, keywords: %s', name, keywords_str)
54
+
55
+ return items
56
+
57
+ def parse_translation(translation_str: str) -> str:
58
+ """
59
+ Parses the provided string and returns the content within
60
+ <improved_translation> tags.
61
+
62
+ Uses the generic XML parser from the core module to extract the translation.
63
+
64
+ Args:
65
+ translation_str: String containing the translation in the specified format.
66
+
67
+ Returns:
68
+ str: String containing the <improved_translation> content.
69
+ """
70
+ result = parse_xml_tag(translation_str, "improved_translation")
71
+ if result is None:
72
+ logging.error('Could not find <improved_translation> tag in content')
73
+ return ""
74
+
75
+ return result
aphra/workflows/short_article/config/default.toml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Default configuration for the Short Article workflow
2
+ # These values can be overridden in config.toml under the [short_article] section
3
+
4
+ # LLM models used by this workflow
5
+ writer = "anthropic/claude-sonnet-4"
6
+ searcher = "perplexity/sonar"
7
+ critiquer = "anthropic/claude-sonnet-4"
aphra/{prompts β†’ workflows/short_article/prompts}/step1_system.txt RENAMED
File without changes
aphra/{prompts β†’ workflows/short_article/prompts}/step1_user.txt RENAMED
File without changes
aphra/{prompts β†’ workflows/short_article/prompts}/step2_system.txt RENAMED
File without changes
aphra/{prompts β†’ workflows/short_article/prompts}/step2_user.txt RENAMED
File without changes
aphra/{prompts β†’ workflows/short_article/prompts}/step3_system.txt RENAMED
File without changes
aphra/{prompts β†’ workflows/short_article/prompts}/step3_user.txt RENAMED
File without changes
aphra/{prompts β†’ workflows/short_article/prompts}/step4_system.txt RENAMED
File without changes
aphra/{prompts β†’ workflows/short_article/prompts}/step4_user.txt RENAMED
File without changes
aphra/{prompts β†’ workflows/short_article/prompts}/step5_system.txt RENAMED
File without changes
aphra/{prompts β†’ workflows/short_article/prompts}/step5_user.txt RENAMED
File without changes
aphra/workflows/short_article/short_article_workflow.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Short Article workflow implementation.
3
+
4
+ This workflow implements the 5-step translation process for articles
5
+ and similar content types.
6
+ """
7
+
8
+ from typing import List, Dict, Any
9
+ from ...core.context import TranslationContext
10
+ from ...core.prompts import get_prompt
11
+ from ...core.workflow import AbstractWorkflow
12
+ from .aux.parsers import parse_analysis, parse_translation
13
+
14
+ class ShortArticleWorkflow(AbstractWorkflow):
15
+ """
16
+ Workflow for translating articles and similar content.
17
+
18
+ This workflow implements the proven 5-step process using direct methods:
19
+ 1. analyze() - Identify key terms and concepts
20
+ 2. search() - Generate contextual explanations with web search
21
+ 3. translate() - Create initial translation
22
+ 4. critique() - Evaluate translation quality
23
+ 5. refine() - Produce final improved translation
24
+
25
+ To customize: simply inherit from this class and override any method.
26
+ """
27
+
28
+ def get_workflow_name(self) -> str:
29
+ """Get the unique name of this workflow."""
30
+ return "short_article"
31
+
32
+ def is_suitable_for(self, text: str, **_kwargs) -> bool:
33
+ """
34
+ Determine if this workflow is suitable for the given content.
35
+
36
+ This workflow is suitable for:
37
+ - Articles and blog posts
38
+ - General text content
39
+ - Serves as the default workflow when no other workflow matches
40
+
41
+ Args:
42
+ text: The text content to evaluate
43
+ **kwargs: Additional evaluation parameters
44
+
45
+ Returns:
46
+ bool: True if this workflow is suitable
47
+ """
48
+ # This workflow accepts any non-empty text
49
+ return len(text.strip()) > 0
50
+
51
+ def analyze(self, context: TranslationContext, text: str) -> List[Dict[str, Any]]:
52
+ """
53
+ Analyze the source text to identify key terms and concepts.
54
+
55
+ Args:
56
+ context: The translation context
57
+ text: The text to analyze
58
+
59
+ Returns:
60
+ List[Dict]: Parsed analysis results with term names and keywords
61
+ """
62
+ # Get writer model from workflow configuration
63
+ writer_model = context.get_workflow_config('writer')
64
+
65
+ # Get prompts for analysis
66
+ system_prompt = get_prompt(
67
+ 'short_article',
68
+ 'step1_system.txt',
69
+ post_content=text,
70
+ source_language=context.source_language,
71
+ target_language=context.target_language
72
+ )
73
+ user_prompt = get_prompt(
74
+ 'short_article',
75
+ 'step1_user.txt',
76
+ post_content=text,
77
+ source_language=context.source_language,
78
+ target_language=context.target_language
79
+ )
80
+
81
+ # Call LLM for analysis
82
+ analysis_content = context.model_client.call_model(
83
+ system_prompt,
84
+ user_prompt,
85
+ writer_model,
86
+ log_call=context.log_calls
87
+ )
88
+
89
+ # Parse and return analysis
90
+ return parse_analysis(analysis_content)
91
+
92
+ def search(self, context: TranslationContext, parsed_items: List[Dict[str, Any]]) -> str:
93
+ """
94
+ Generate contextual explanations for analyzed terms using web search.
95
+
96
+ Args:
97
+ context: The translation context
98
+ parsed_items: List of terms from analysis step
99
+
100
+ Returns:
101
+ str: Formatted glossary content
102
+ """
103
+ if not parsed_items:
104
+ return ""
105
+
106
+ # Get searcher model from workflow configuration
107
+ searcher_model = context.get_workflow_config('searcher')
108
+ glossary = []
109
+
110
+ for item in parsed_items:
111
+ # Generate explanation for each term using web search
112
+ term_explanation = self._generate_term_explanation(context, item, searcher_model)
113
+
114
+ # Format glossary entry
115
+ glossary_entry = (
116
+ f"### {item['name']}\n\n**Keywords:** {', '.join(item['keywords'])}\n\n"
117
+ f"**Explanation:**\n{term_explanation}\n"
118
+ )
119
+ glossary.append(glossary_entry)
120
+
121
+ return "\n".join(glossary)
122
+
123
+ def translate(self, context: TranslationContext, text: str) -> str:
124
+ """
125
+ Create the initial translation of the source text.
126
+
127
+ Args:
128
+ context: The translation context
129
+ text: The text to translate
130
+
131
+ Returns:
132
+ str: The initial translation
133
+ """
134
+ # Get writer model from workflow configuration
135
+ writer_model = context.get_workflow_config('writer')
136
+
137
+ # Get prompts for translation
138
+ system_prompt = get_prompt(
139
+ 'short_article',
140
+ 'step3_system.txt',
141
+ text=text,
142
+ source_language=context.source_language,
143
+ target_language=context.target_language
144
+ )
145
+ user_prompt = get_prompt(
146
+ 'short_article',
147
+ 'step3_user.txt',
148
+ text=text,
149
+ source_language=context.source_language,
150
+ target_language=context.target_language
151
+ )
152
+
153
+ # Call LLM for translation
154
+ return context.model_client.call_model(
155
+ system_prompt,
156
+ user_prompt,
157
+ writer_model,
158
+ log_call=context.log_calls
159
+ )
160
+
161
+ def critique(self, context: TranslationContext, text: str,
162
+ translation: str, glossary: str) -> str:
163
+ """
164
+ Evaluate the translation quality and provide feedback.
165
+
166
+ Args:
167
+ context: The translation context
168
+ text: The original text
169
+ translation: The initial translation
170
+ glossary: The glossary from search step
171
+
172
+ Returns:
173
+ str: Critique and feedback
174
+ """
175
+ # Get critiquer model from workflow configuration
176
+ critiquer_model = context.get_workflow_config('critiquer')
177
+
178
+ # Get prompts for critique
179
+ system_prompt = get_prompt(
180
+ 'short_article',
181
+ 'step4_system.txt',
182
+ text=text,
183
+ translation=translation,
184
+ glossary=glossary,
185
+ source_language=context.source_language,
186
+ target_language=context.target_language
187
+ )
188
+ user_prompt = get_prompt(
189
+ 'short_article',
190
+ 'step4_user.txt',
191
+ text=text,
192
+ translation=translation,
193
+ glossary=glossary,
194
+ source_language=context.source_language,
195
+ target_language=context.target_language
196
+ )
197
+
198
+ # Call LLM for critique
199
+ return context.model_client.call_model(
200
+ system_prompt,
201
+ user_prompt,
202
+ critiquer_model,
203
+ log_call=context.log_calls
204
+ )
205
+
206
+ def refine(self, context: TranslationContext, text: str, *,
207
+ translation: str, glossary: str, critique: str) -> str:
208
+ """
209
+ Produce the final refined translation based on critique feedback.
210
+
211
+ Args:
212
+ context: The translation context
213
+ text: The original text
214
+ translation: The initial translation
215
+ glossary: The glossary from search step
216
+ critique: The critique feedback
217
+
218
+ Returns:
219
+ str: The final refined translation
220
+ """
221
+ # Get writer model from workflow configuration
222
+ writer_model = context.get_workflow_config('writer')
223
+
224
+ # Get prompts for refinement
225
+ system_prompt = get_prompt(
226
+ 'short_article',
227
+ 'step5_system.txt',
228
+ text=text,
229
+ translation=translation,
230
+ glossary=glossary,
231
+ critique=critique,
232
+ source_language=context.source_language,
233
+ target_language=context.target_language
234
+ )
235
+ user_prompt = get_prompt(
236
+ 'short_article',
237
+ 'step5_user.txt',
238
+ text=text,
239
+ translation=translation,
240
+ glossary=glossary,
241
+ critique=critique,
242
+ source_language=context.source_language,
243
+ target_language=context.target_language
244
+ )
245
+
246
+ # Call LLM for refinement
247
+ final_translation_content = context.model_client.call_model(
248
+ system_prompt,
249
+ user_prompt,
250
+ writer_model,
251
+ log_call=context.log_calls
252
+ )
253
+
254
+ # Parse and return final translation
255
+ return parse_translation(final_translation_content)
256
+
257
+ def execute(self, context: TranslationContext, text: str) -> str:
258
+ """
259
+ Execute the complete short article workflow.
260
+
261
+ This method orchestrates the 5-step process in sequence.
262
+
263
+ Args:
264
+ context: The translation context
265
+ text: The text to translate
266
+
267
+ Returns:
268
+ str: The final refined translation
269
+ """
270
+ # Step 1: Analyze the text to identify key terms
271
+ analysis = self.analyze(context, text)
272
+
273
+ # Step 2: Search for contextual information about the terms
274
+ glossary = self.search(context, analysis)
275
+
276
+ # Step 3: Create initial translation
277
+ translation = self.translate(context, text)
278
+
279
+ # Step 4: Critique the translation
280
+ critique = self.critique(context, text, translation, glossary)
281
+
282
+ # Step 5: Refine the translation based on critique
283
+ final_translation = self.refine(context, text, translation=translation,
284
+ glossary=glossary, critique=critique)
285
+
286
+ return final_translation
287
+
288
+ def _generate_term_explanation(self, context: TranslationContext,
289
+ item: Dict[str, Any], model: str) -> str:
290
+ """
291
+ Generate explanation for a single term using web search.
292
+
293
+ Args:
294
+ context: The translation context
295
+ item: Dictionary with 'name' and 'keywords' keys
296
+ model: The model to use for generation
297
+
298
+ Returns:
299
+ str: The generated explanation with web search results
300
+ """
301
+ system_prompt = get_prompt(
302
+ 'short_article',
303
+ 'step2_system.txt',
304
+ term=item['name'],
305
+ keywords=", ".join(item['keywords']),
306
+ source_language=context.source_language,
307
+ target_language=context.target_language
308
+ )
309
+ user_prompt = get_prompt(
310
+ 'short_article',
311
+ 'step2_user.txt',
312
+ term=item['name'],
313
+ keywords=", ".join(item['keywords']),
314
+ source_language=context.source_language,
315
+ target_language=context.target_language
316
+ )
317
+
318
+ return context.model_client.call_model(
319
+ system_prompt,
320
+ user_prompt,
321
+ model,
322
+ log_call=context.log_calls,
323
+ enable_web_search=True,
324
+ web_search_context="high"
325
+ )
app.py CHANGED
@@ -1,19 +1,83 @@
 
 
 
 
 
 
 
1
  import os
2
  import tempfile
3
  import gradio as gr
4
  import toml
 
 
 
5
  from aphra import translate
6
 
 
 
7
  theme = gr.themes.Soft(
8
  primary_hue="rose",
9
  secondary_hue="pink",
10
  spacing_size="lg",
11
  )
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def create_config_file(api_key, writer_model, searcher_model, critic_model):
 
 
 
 
 
 
 
 
 
 
 
 
14
  config = {
15
  "openrouter": {"api_key": api_key},
16
- "llms": {
17
  "writer": writer_model,
18
  "searcher": searcher_model,
19
  "critiquer": critic_model
@@ -24,6 +88,22 @@ def create_config_file(api_key, writer_model, searcher_model, critic_model):
24
  return tmp.name
25
 
26
  def process_input(file, text_input, api_key, writer_model, searcher_model, critic_model, source_lang, target_lang):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  if file is not None:
28
  with open(file, 'r', encoding='utf-8') as file:
29
  text = file.read()
@@ -40,10 +120,19 @@ def process_input(file, text_input, api_key, writer_model, searcher_model, criti
40
  )
41
  finally:
42
  os.unlink(config_file)
43
-
44
  return translation
45
 
46
  def create_interface():
 
 
 
 
 
 
 
 
 
47
  with gr.Blocks(theme=theme) as demo:
48
  gr.Markdown("<font size=6.5><center>πŸŒπŸ’¬ Aphra</center></font>")
49
  gr.Markdown(
@@ -51,27 +140,27 @@ def create_interface():
51
  [<a href="https://davidlms.github.io/aphra/">Project Page</a>] | [<a href="https://github.com/DavidLMS/aphra">Github</a>]</div>
52
  """
53
  )
54
- gr.Markdown("πŸŒπŸ’¬ Aphra is an open-source translation agent designed to enhance the quality of text translations by leveraging large language models (LLMs).")
55
-
56
  with gr.Row():
57
  api_key = gr.Textbox(label="OpenRouter API Key", type="password")
58
-
59
  writer_model = gr.Dropdown(
60
- ["anthropic/claude-3.7-sonnet", "openai/chatgpt-4o-latest", "google/gemini-pro-1.5"],
61
  label="Writer Model",
62
- value="anthropic/claude-3.7-sonnet",
63
  allow_custom_value=True
64
  )
65
  searcher_model = gr.Dropdown(
66
- ["perplexity/llama-3.1-sonar-small-128k-online", "perplexity/llama-3.1-sonar-large-128k-online"],
67
  label="Searcher Model",
68
- value="perplexity/llama-3.1-sonar-large-128k-online",
69
  allow_custom_value=True
70
  )
71
  critic_model = gr.Dropdown(
72
- ["anthropic/claude-3.7-sonnet", "openai/chatgpt-4o-latest", "google/gemini-pro-1.5"],
73
  label="Critic Model",
74
- value="anthropic/claude-3.7-sonnet",
75
  allow_custom_value=True
76
  )
77
 
@@ -89,20 +178,20 @@ def create_interface():
89
  allow_custom_value=True
90
  )
91
 
92
- with gr.Row():
93
  file = gr.File(label="Upload .txt or .md file", file_types=[".txt", ".md"])
94
  text_input = gr.Textbox(label="Or paste your text here", lines=5)
95
-
96
  translate_btn = gr.Button("Translate with πŸŒπŸ’¬ Aphra")
97
-
98
  output = gr.Textbox(label="Translation by πŸŒπŸ’¬ Aphra")
99
-
100
  translate_btn.click(
101
  process_input,
102
  inputs=[file, text_input, api_key, writer_model, searcher_model, critic_model, source_lang, target_lang],
103
  outputs=[output]
104
  )
105
-
106
  return demo
107
 
108
  if __name__ == "__main__":
 
1
+ """
2
+ Gradio web interface demo for Aphra translation system.
3
+
4
+ This module provides a user-friendly web interface for the Aphra translation
5
+ system using Gradio, allowing users to configure models and translate text
6
+ through a browser interface.
7
+ """
8
  import os
9
  import tempfile
10
  import gradio as gr
11
  import toml
12
+ import requests
13
+ import logging
14
+ # Import the translate function
15
  from aphra import translate
16
 
17
+ OPENROUTER_MODELS_URL="https://openrouter.ai/api/v1/models"
18
+
19
  theme = gr.themes.Soft(
20
  primary_hue="rose",
21
  secondary_hue="pink",
22
  spacing_size="lg",
23
  )
24
 
25
+ def fetch_openrouter_models():
26
+ """
27
+ Fetch available models from OpenRouter API.
28
+ Returns a list of model IDs (names).
29
+ """
30
+ try:
31
+ response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
32
+ response.raise_for_status()
33
+ data = response.json()
34
+
35
+ # Extract model IDs from the response
36
+ models = [model['id'] for model in data.get('data', [])]
37
+ return sorted(models)
38
+ except requests.RequestException as e:
39
+ logging.warning(f"Failed to fetch models from OpenRouter: {e}")
40
+ # Fallback to default models if API fails
41
+ return [
42
+ "anthropic/claude-sonnet-4",
43
+ "perplexity/sonar"
44
+ ]
45
+
46
+ def get_default_models():
47
+ """Get default model selections for different roles."""
48
+ models = fetch_openrouter_models()
49
+
50
+ # Default selections based on common good models
51
+ writer_default = "anthropic/claude-sonnet-4"
52
+ searcher_default = "perplexity/sonar"
53
+ critic_default = "anthropic/claude-sonnet-4"
54
+
55
+ # Use fallbacks if defaults not available
56
+ if writer_default not in models and models:
57
+ writer_default = models[0]
58
+ if searcher_default not in models and models:
59
+ searcher_default = models[0]
60
+ if critic_default not in models and models:
61
+ critic_default = models[0]
62
+
63
+ return models, writer_default, searcher_default, critic_default
64
+
65
  def create_config_file(api_key, writer_model, searcher_model, critic_model):
66
+ """
67
+ Create a temporary TOML configuration file for Aphra.
68
+
69
+ Args:
70
+ api_key: OpenRouter API key
71
+ writer_model: Model to use for writing/translation
72
+ searcher_model: Model to use for searching/research
73
+ critic_model: Model to use for criticism/review
74
+
75
+ Returns:
76
+ str: Path to the temporary configuration file
77
+ """
78
  config = {
79
  "openrouter": {"api_key": api_key},
80
+ "short_article": {
81
  "writer": writer_model,
82
  "searcher": searcher_model,
83
  "critiquer": critic_model
 
88
  return tmp.name
89
 
90
  def process_input(file, text_input, api_key, writer_model, searcher_model, critic_model, source_lang, target_lang):
91
+ """
92
+ Process translation input from either file or text input.
93
+
94
+ Args:
95
+ file: Uploaded file object (if any)
96
+ text_input: Direct text input string
97
+ api_key: OpenRouter API key
98
+ writer_model: Model for writing/translation
99
+ searcher_model: Model for searching/research
100
+ critic_model: Model for criticism/review
101
+ source_lang: Source language for translation
102
+ target_lang: Target language for translation
103
+
104
+ Returns:
105
+ str: Translated text
106
+ """
107
  if file is not None:
108
  with open(file, 'r', encoding='utf-8') as file:
109
  text = file.read()
 
120
  )
121
  finally:
122
  os.unlink(config_file)
123
+
124
  return translation
125
 
126
  def create_interface():
127
+ """
128
+ Create and configure the Gradio web interface.
129
+
130
+ Returns:
131
+ gr.Blocks: Configured Gradio interface
132
+ """
133
+ # Get dynamic model list and defaults
134
+ models, writer_default, searcher_default, critic_default = get_default_models()
135
+
136
  with gr.Blocks(theme=theme) as demo:
137
  gr.Markdown("<font size=6.5><center>πŸŒπŸ’¬ Aphra</center></font>")
138
  gr.Markdown(
 
140
  [<a href="https://davidlms.github.io/aphra/">Project Page</a>] | [<a href="https://github.com/DavidLMS/aphra">Github</a>]</div>
141
  """
142
  )
143
+ gr.Markdown("πŸŒπŸ’¬ Aphra is an open-source translation agent with a workflow architecture designed to enhance the quality of text translations by leveraging large language models (LLMs).")
144
+
145
  with gr.Row():
146
  api_key = gr.Textbox(label="OpenRouter API Key", type="password")
147
+
148
  writer_model = gr.Dropdown(
149
+ models,
150
  label="Writer Model",
151
+ value=writer_default,
152
  allow_custom_value=True
153
  )
154
  searcher_model = gr.Dropdown(
155
+ models,
156
  label="Searcher Model",
157
+ value=searcher_default,
158
  allow_custom_value=True
159
  )
160
  critic_model = gr.Dropdown(
161
+ models,
162
  label="Critic Model",
163
+ value=critic_default,
164
  allow_custom_value=True
165
  )
166
 
 
178
  allow_custom_value=True
179
  )
180
 
181
+ with gr.Row():
182
  file = gr.File(label="Upload .txt or .md file", file_types=[".txt", ".md"])
183
  text_input = gr.Textbox(label="Or paste your text here", lines=5)
184
+
185
  translate_btn = gr.Button("Translate with πŸŒπŸ’¬ Aphra")
186
+
187
  output = gr.Textbox(label="Translation by πŸŒπŸ’¬ Aphra")
188
+
189
  translate_btn.click(
190
  process_input,
191
  inputs=[file, text_input, api_key, writer_model, searcher_model, critic_model, source_lang, target_lang],
192
  outputs=[output]
193
  )
194
+
195
  return demo
196
 
197
  if __name__ == "__main__":