translation: speficied / implemented the question format for OpenAI based translations

2023-11-08 19:36:43 +01:00 · 2023-11-08 19:36:43 +01:00 · a185c0db7b
commit a185c0db7b
parent c1dc152f48
2 changed files with 57 additions and 23 deletions
--- a/chatmastermind/commands/common.py
+++ b/chatmastermind/commands/common.py
@ -7,6 +7,12 @@ from pathlib import Path
 from ..message import Message, MessageError, source_code


+def read_text_file(file: Path) -> str:
+    with open(file) as r:
+        content = r.read().strip()
+    return content
+
+
 def add_file_as_text(question_parts: list[str], file: str) -> None:
    """
    Add the given file as plain text to the question part list.
@ -19,8 +25,7 @@ def add_file_as_text(question_parts: list[str], file: str) -> None:
        if message and message.answer:
            content = message.answer
    except MessageError:
-        with open(file) as r:
-            content = r.read().strip()
+        content = read_text_file(Path(file))
    if len(content) > 0:
        question_parts.append(content)

--- a/chatmastermind/commands/translation.py
+++ b/chatmastermind/commands/translation.py
@ -1,7 +1,6 @@
 import argparse
 from pathlib import Path
-from itertools import zip_longest
-from .common import invert_input_tag_args, add_file_as_text
+from .common import invert_input_tag_args, read_text_file
 from ..configuration import Config
 from ..message import MessageFilter, Message, Question
 from ..chat import ChatDB, msg_location
@ -11,32 +10,61 @@ class TranslationCmdError(Exception):
    pass


-def create_message(chat: ChatDB, args: argparse.Namespace) -> Message:
+text_separator: str = 'TEXT:'
+
+
+def translation_prompt_openai(source_lang: str, target_lang: str) -> str:
    """
-    Create a new message from the given arguments and write it
-    to the cache directory.
+    Return the prompt for GPT that tells it to do the translation.
    """
-    text_parts = []
+    # FIXME: specify the document format if known
+    return f"Translate the text below the line {text_separator} from {source_lang} to {target_lang}."
+
+
+def create_message_openai(chat: ChatDB, args: argparse.Namespace) -> Message:
+    """
+    Create a new message from the given arguments and write it to the cache directory.
+
+    Message format
+    1. Translation prompt (tells GPT to do a translation)
+    2. Glossary (if specified as an argument)
+    3. User provided prompt enhancements
+    4. Translation separator
+    5. User provided text to be translated
+
+    The text to be translated is determined as a follows:
+    - if a document is provided in the arguments, translate its content
+    - if no document is provided, translate the last text argument
+
+    The other text arguments will be put into the "header" and can be used
+    to improve the translation prompt.
+    """
+    text_args: list[str] = []
    if args.create is not None:
-        text_list = args.create
+        text_args = args.create
    elif args.ask is not None:
-        text_list = args.ask
+        text_args = args.ask
    else:
        raise TranslationCmdError("No input text found")
-    # NOTE: we currently support only one input document
-    text_files = args.input_document if args.input_document is not None else []

-    # create the full text to be translated by combining all text parts
-    # from the arguments with the content of the document
-    for text, text_file in zip_longest(text_list, text_files, fillvalue=None):
-        if text is not None and len(text.strip()) > 0:
-            text_parts.append(text)
-        if text_file is not None and len(text_file) > 0:
-            add_file_as_text(text_parts, text_file)
-    full_text = '\n\n'.join([str(s) for s in text_parts])
+    # extract user prompt and user text to be translated
+    user_text: str
+    user_prompt: str
+    if args.input_document is not None:
+        user_text = read_text_file(args.input_document)
+        user_prompt = '\n\n'.join([str(s) for s in text_args])
+    else:
+        user_text = text_args[-1]
+        user_prompt = '\n\n'.join([str(s) for s in text_args[:-1]])

-    # FIXME: prepend translation prompt and glossaries (if given)
-    message = Message(question=Question(full_text),
+    # build full question string
+    # FIXME: add glossaries if given
+    question_text: str = '\n\n'.join([translation_prompt_openai(args.source_lang, args.target_lang),
+                                      user_prompt,
+                                      text_separator,
+                                      user_text])
+    # create and write the message
+    message = Message(question=Question(question_text),
                      tags=args.output_tags,
                      ai=args.AI,
                      model=args.model)
@ -62,8 +90,9 @@ def translation_cmd(args: argparse.Namespace, config: Config) -> None:
                           glob=args.glob,
                           loc=msg_location(args.location))
    # if it's a new translation, create and store it immediately
+    # FIXME: check AI type
    if args.ask or args.create:
        # message = create_message(chat, args)
-        create_message(chat, args)
+        create_message_openai(chat, args)
    if args.create:
        return