From 75314cd7774bd802f37049da58dec7d782c3f63e Mon Sep 17 00:00:00 2001 From: juk0de Date: Wed, 8 Nov 2023 19:36:43 +0100 Subject: [PATCH] translation: speficied / implemented the question format for OpenAI based translations --- chatmastermind/commands/common.py | 9 +++- chatmastermind/commands/translation.py | 71 ++++++++++++++++++-------- 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/chatmastermind/commands/common.py b/chatmastermind/commands/common.py index 6d5e0e0..6bff54e 100644 --- a/chatmastermind/commands/common.py +++ b/chatmastermind/commands/common.py @@ -7,6 +7,12 @@ from pathlib import Path from ..message import Message, MessageError, source_code +def read_text_file(file: Path) -> str: + with open(file) as r: + content = r.read().strip() + return content + + def add_file_as_text(question_parts: list[str], file: str) -> None: """ Add the given file as plain text to the question part list. @@ -19,8 +25,7 @@ def add_file_as_text(question_parts: list[str], file: str) -> None: if message and message.answer: content = message.answer except MessageError: - with open(file) as r: - content = r.read().strip() + content = read_text_file(Path(file)) if len(content) > 0: question_parts.append(content) diff --git a/chatmastermind/commands/translation.py b/chatmastermind/commands/translation.py index 2862a1a..9ed2945 100644 --- a/chatmastermind/commands/translation.py +++ b/chatmastermind/commands/translation.py @@ -1,7 +1,6 @@ import argparse from pathlib import Path -from itertools import zip_longest -from .common import invert_input_tag_args, add_file_as_text +from .common import invert_input_tag_args, read_text_file from ..configuration import Config from ..message import MessageFilter, Message, Question from ..chat import ChatDB, msg_location @@ -11,32 +10,61 @@ class TranslationCmdError(Exception): pass -def create_message(chat: ChatDB, args: argparse.Namespace) -> Message: +text_separator: str = 'TEXT:' + + +def translation_prompt_openai(source_lang: str, target_lang: str) -> str: """ - Create a new message from the given arguments and write it - to the cache directory. + Return the prompt for GPT that tells it to do the translation. """ - text_parts = [] + # FIXME: specify the document format if known + return f"Translate the text below the line {text_separator} from {source_lang} to {target_lang}." + + +def create_message_openai(chat: ChatDB, args: argparse.Namespace) -> Message: + """ + Create a new message from the given arguments and write it to the cache directory. + + Message format + 1. Translation prompt (tells GPT to do a translation) + 2. Glossary (if specified as an argument) + 3. User provided prompt enhancements + 4. Translation separator + 5. User provided text to be translated + + The text to be translated is determined as a follows: + - if a document is provided in the arguments, translate its content + - if no document is provided, translate the last text argument + + The other text arguments will be put into the "header" and can be used + to improve the translation prompt. + """ + text_args: list[str] = [] if args.create is not None: - text_list = args.create + text_args = args.create elif args.ask is not None: - text_list = args.ask + text_args = args.ask else: raise TranslationCmdError("No input text found") - # NOTE: we currently support only one input document - text_files = args.input_document if args.input_document is not None else [] - # create the full text to be translated by combining all text parts - # from the arguments with the content of the document - for text, text_file in zip_longest(text_list, text_files, fillvalue=None): - if text is not None and len(text.strip()) > 0: - text_parts.append(text) - if text_file is not None and len(text_file) > 0: - add_file_as_text(text_parts, text_file) - full_text = '\n\n'.join([str(s) for s in text_parts]) + # extract user prompt and user text to be translated + user_text: str + user_prompt: str + if args.input_document is not None: + user_text = read_text_file(args.input_document) + user_prompt = '\n\n'.join([str(s) for s in text_args]) + else: + user_text = text_args[-1] + user_prompt = '\n\n'.join([str(s) for s in text_args[:-1]]) - # FIXME: prepend translation prompt and glossaries (if given) - message = Message(question=Question(full_text), + # build full question string + # FIXME: add glossaries if given + question_text: str = '\n\n'.join([translation_prompt_openai(args.source_lang, args.target_lang), + user_prompt, + text_separator, + user_text]) + # create and write the message + message = Message(question=Question(question_text), tags=args.output_tags, ai=args.AI, model=args.model) @@ -62,8 +90,9 @@ def translation_cmd(args: argparse.Namespace, config: Config) -> None: glob=args.glob, loc=msg_location(args.location)) # if it's a new translation, create and store it immediately + # FIXME: check AI type if args.ask or args.create: # message = create_message(chat, args) - create_message(chat, args) + create_message_openai(chat, args) if args.create: return