added module 'glossary.py'
This commit is contained in:
parent
82ad697b68
commit
be873867ea
128
chatmastermind/glossary.py
Normal file
128
chatmastermind/glossary.py
Normal file
@ -0,0 +1,128 @@
|
||||
"""
|
||||
Module implementing glossaries for translations.
|
||||
"""
|
||||
import yaml
|
||||
import tempfile
|
||||
import shutil
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Type, TypeVar
|
||||
|
||||
GlossaryInst = TypeVar('GlossaryInst', bound='Glossary')
|
||||
|
||||
|
||||
class GlossaryError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def str_presenter(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode:
|
||||
"""
|
||||
Changes the YAML dump style to multiline syntax for multiline strings.
|
||||
"""
|
||||
if len(data.splitlines()) > 1:
|
||||
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
|
||||
return dumper.represent_scalar('tag:yaml.org,2002:str', data)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Glossary:
|
||||
"""
|
||||
A glossary consists of the following parameters:
|
||||
- Name (freely selectable)
|
||||
- Path (full file path)
|
||||
- Source language
|
||||
- Target language
|
||||
- Entries (pairs of source lang and target lang terms)
|
||||
- ID (automatically generated / modified, required by DeepL)
|
||||
"""
|
||||
|
||||
name: str
|
||||
source_lang: str
|
||||
target_lang: str
|
||||
entries: dict[str, str] = field(default_factory=lambda: dict())
|
||||
file_path: Path | None = None
|
||||
ID: str | None = None
|
||||
|
||||
@classmethod
|
||||
def from_file(cls: Type[GlossaryInst], file_path: Path) -> GlossaryInst:
|
||||
"""
|
||||
Create a glossary from the given file.
|
||||
"""
|
||||
with open(file_path, "r") as fd:
|
||||
try:
|
||||
data = yaml.load(fd, Loader=yaml.FullLoader)
|
||||
# remove any quotes from the entries that YAML may have added while dumping
|
||||
# (e. g. for special keywords like 'yes')
|
||||
clean_entries = {key.strip('\"\' '): value for key, value in data['Entries'].items()}
|
||||
return cls(name=data['Name'],
|
||||
source_lang=data['SourceLang'],
|
||||
target_lang=data['TargetLang'],
|
||||
entries=clean_entries,
|
||||
file_path=file_path,
|
||||
ID=data['ID'] if data['ID'] != 'None' else None)
|
||||
except Exception:
|
||||
raise GlossaryError(f"'{file_path}' does not contain a valid glossary")
|
||||
|
||||
def to_file(self, file_path: Path | None = None) -> None:
|
||||
"""
|
||||
Write glossary to given file.
|
||||
"""
|
||||
if file_path:
|
||||
self.file_path = file_path
|
||||
if not self.file_path:
|
||||
raise GlossaryError("Got no valid path to write glossary")
|
||||
# write YAML
|
||||
with tempfile.NamedTemporaryFile(dir=self.file_path.parent, prefix=self.file_path.name, mode="w", delete=False) as temp_fd:
|
||||
temp_file_path = Path(temp_fd.name)
|
||||
data = {'Name': self.name,
|
||||
'ID': str(self.ID),
|
||||
'SourceLang': self.source_lang,
|
||||
'TargetLang': self.target_lang,
|
||||
'Entries': self.entries}
|
||||
yaml.dump(data, temp_fd, sort_keys=False)
|
||||
shutil.move(temp_file_path, self.file_path)
|
||||
|
||||
def export_csv(self, dictionary: dict[str, str], file_path: Path) -> None:
|
||||
"""
|
||||
Export the 'entries' of this glossary to a file in CSV format (compatible with DeepL).
|
||||
"""
|
||||
with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
|
||||
for source_entry, target_entry in self.entries.items():
|
||||
writer.writerow([source_entry, target_entry])
|
||||
|
||||
def export_tsv(self, entries: dict[str, str], file_path: Path) -> None:
|
||||
"""
|
||||
Export the 'entries' of this glossary to a file in TSV format (compatible with DeepL).
|
||||
"""
|
||||
with open(file_path, 'w', encoding='utf-8') as file:
|
||||
for source_entry, target_entry in self.entries.items():
|
||||
file.write(f"{source_entry}\t{target_entry}\n")
|
||||
|
||||
def import_csv(self, file_path: Path) -> None:
|
||||
"""
|
||||
Import the entries from the given CSV file to those of the current glossary.
|
||||
Existing entries are overwritten.
|
||||
"""
|
||||
try:
|
||||
with open(file_path, mode='r', encoding='utf-8') as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
|
||||
self.entries = {rows[0]: rows[1] for rows in reader if len(rows) >= 2}
|
||||
except Exception as e:
|
||||
raise GlossaryError(f"Error importing CSV: {e}")
|
||||
|
||||
def import_tsv(self, file_path: Path) -> None:
|
||||
"""
|
||||
Import the entries from the given CSV file to those of the current glossary.
|
||||
Existing entries are overwritten.
|
||||
"""
|
||||
try:
|
||||
with open(file_path, mode='r', encoding='utf-8') as tsvfile:
|
||||
self.entries = {}
|
||||
for line in tsvfile:
|
||||
parts = line.strip().split('\t')
|
||||
if len(parts) == 2:
|
||||
self.entries[parts[0]] = parts[1]
|
||||
except Exception as e:
|
||||
raise GlossaryError(f"Error importing TSV: {e}")
|
||||
Loading…
x
Reference in New Issue
Block a user