translation: speficied / implemented the question format for OpenAI based translations

This commit is contained in:
juk0de 2023-11-08 19:36:43 +01:00
parent ca5794788a
commit 97a82aff95
2 changed files with 57 additions and 23 deletions

View File

@ -7,6 +7,12 @@ from pathlib import Path
from ..message import Message, MessageError, source_code from ..message import Message, MessageError, source_code
def read_text_file(file: Path) -> str:
with open(file) as r:
content = r.read().strip()
return content
def add_file_as_text(question_parts: list[str], file: str) -> None: def add_file_as_text(question_parts: list[str], file: str) -> None:
""" """
Add the given file as plain text to the question part list. Add the given file as plain text to the question part list.
@ -19,8 +25,7 @@ def add_file_as_text(question_parts: list[str], file: str) -> None:
if message and message.answer: if message and message.answer:
content = message.answer content = message.answer
except MessageError: except MessageError:
with open(file) as r: content = read_text_file(Path(file))
content = r.read().strip()
if len(content) > 0: if len(content) > 0:
question_parts.append(content) question_parts.append(content)

View File

@ -1,7 +1,6 @@
import argparse import argparse
from pathlib import Path from pathlib import Path
from itertools import zip_longest from .common import invert_input_tag_args, read_text_file
from .common import invert_input_tag_args, add_file_as_text
from ..configuration import Config from ..configuration import Config
from ..message import MessageFilter, Message, Question from ..message import MessageFilter, Message, Question
from ..chat import ChatDB, msg_location from ..chat import ChatDB, msg_location
@ -11,32 +10,61 @@ class TranslationCmdError(Exception):
pass pass
def create_message(chat: ChatDB, args: argparse.Namespace) -> Message: text_separator: str = 'TEXT:'
def translation_prompt_openai(source_lang: str, target_lang: str) -> str:
""" """
Create a new message from the given arguments and write it Return the prompt for GPT that tells it to do the translation.
to the cache directory.
""" """
text_parts = [] # FIXME: specify the document format if known
return f"Translate the text below the line {text_separator} from {source_lang} to {target_lang}."
def create_message_openai(chat: ChatDB, args: argparse.Namespace) -> Message:
"""
Create a new message from the given arguments and write it to the cache directory.
Message format
1. Translation prompt (tells GPT to do a translation)
2. Glossary (if specified as an argument)
3. User provided prompt enhancements
4. Translation separator
5. User provided text to be translated
The text to be translated is determined as a follows:
- if a document is provided in the arguments, translate its content
- if no document is provided, translate the last text argument
The other text arguments will be put into the "header" and can be used
to improve the translation prompt.
"""
text_args: list[str] = []
if args.create is not None: if args.create is not None:
text_list = args.create text_args = args.create
elif args.ask is not None: elif args.ask is not None:
text_list = args.ask text_args = args.ask
else: else:
raise TranslationCmdError("No input text found") raise TranslationCmdError("No input text found")
# NOTE: we currently support only one input document
text_files = args.input_document if args.input_document is not None else []
# create the full text to be translated by combining all text parts # extract user prompt and user text to be translated
# from the arguments with the content of the document user_text: str
for text, text_file in zip_longest(text_list, text_files, fillvalue=None): user_prompt: str
if text is not None and len(text.strip()) > 0: if args.input_document is not None:
text_parts.append(text) user_text = read_text_file(args.input_document)
if text_file is not None and len(text_file) > 0: user_prompt = '\n\n'.join([str(s) for s in text_args])
add_file_as_text(text_parts, text_file) else:
full_text = '\n\n'.join([str(s) for s in text_parts]) user_text = text_args[-1]
user_prompt = '\n\n'.join([str(s) for s in text_args[:-1]])
# FIXME: prepend translation prompt and glossaries (if given) # build full question string
message = Message(question=Question(full_text), # FIXME: add glossaries if given
question_text: str = '\n\n'.join([translation_prompt_openai(args.source_lang, args.target_lang),
user_prompt,
text_separator,
user_text])
# create and write the message
message = Message(question=Question(question_text),
tags=args.output_tags, tags=args.output_tags,
ai=args.AI, ai=args.AI,
model=args.model) model=args.model)
@ -62,8 +90,9 @@ def translation_cmd(args: argparse.Namespace, config: Config) -> None:
glob=args.glob, glob=args.glob,
loc=msg_location(args.location)) loc=msg_location(args.location))
# if it's a new translation, create and store it immediately # if it's a new translation, create and store it immediately
# FIXME: check AI type
if args.ask or args.create: if args.ask or args.create:
# message = create_message(chat, args) # message = create_message(chat, args)
create_message(chat, args) create_message_openai(chat, args)
if args.create: if args.create:
return return