""" Module implementing glossaries for translations. """ import yaml import tempfile import shutil import csv from pathlib import Path from dataclasses import dataclass, field from typing import Type, TypeVar, ClassVar GlossaryInst = TypeVar('GlossaryInst', bound='Glossary') class GlossaryError(Exception): pass def str_presenter(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode: """ Changes the YAML dump style to multiline syntax for multiline strings. """ if len(data.splitlines()) > 1: return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') return dumper.represent_scalar('tag:yaml.org,2002:str', data) @dataclass class Glossary: """ A glossary consists of the following parameters: - Name (freely selectable) - Path (full file path, suffix is automatically generated) - Source language - Target language - Description (optional) - Entries (pairs of source lang and target lang terms) - ID (automatically generated / modified, required by DeepL) """ name: str source_lang: str target_lang: str file_path: Path | None = None desc: str | None = None entries: dict[str, str] = field(default_factory=lambda: dict()) ID: str | None = None file_suffix: ClassVar[str] = '.glo' @classmethod def from_file(cls: Type[GlossaryInst], file_path: Path) -> GlossaryInst: """ Create a glossary from the given file. """ if not file_path.exists(): raise GlossaryError(f"Glossary file '{file_path}' does not exist") if file_path.suffix != cls.file_suffix: raise GlossaryError(f"File type '{file_path.suffix}' is not supported") with open(file_path, "r") as fd: try: data = yaml.load(fd, Loader=yaml.FullLoader) clean_entries = data['Entries'] return cls(name=data['Name'], source_lang=data['SourceLang'], target_lang=data['TargetLang'], file_path=file_path, desc=data['Description'], entries=clean_entries, ID=data['ID'] if data['ID'] != 'None' else None) except Exception: raise GlossaryError(f"'{file_path}' does not contain a valid glossary") def to_file(self, file_path: Path | None = None) -> None: """ Write glossary to given file. """ if file_path: self.file_path = file_path if not self.file_path: raise GlossaryError("Got no valid path to write glossary") # check / add valid suffix if not self.file_path.suffix: self.file_path = self.file_path.with_suffix(self.file_suffix) elif self.file_path.suffix != self.file_suffix: raise GlossaryError(f"File suffix '{self.file_path.suffix}' is not supported") # write YAML with tempfile.NamedTemporaryFile(dir=self.file_path.parent, prefix=self.file_path.name, mode="w", delete=False) as temp_fd: temp_file_path = Path(temp_fd.name) data = {'Name': self.name, 'Description': self.desc, 'ID': str(self.ID), 'SourceLang': self.source_lang, 'TargetLang': self.target_lang, 'Entries': self.entries} yaml.dump(data, temp_fd, sort_keys=False) shutil.move(temp_file_path, self.file_path) def export_csv(self, dictionary: dict[str, str], file_path: Path) -> None: """ Export the 'entries' of this glossary to a file in CSV format (compatible with DeepL). """ with open(file_path, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) for source_entry, target_entry in self.entries.items(): writer.writerow([source_entry, target_entry]) def export_tsv(self, entries: dict[str, str], file_path: Path) -> None: """ Export the 'entries' of this glossary to a file in TSV format (compatible with DeepL). """ with open(file_path, 'w', encoding='utf-8') as file: for source_entry, target_entry in self.entries.items(): file.write(f"{source_entry}\t{target_entry}\n") def import_csv(self, file_path: Path) -> None: """ Import the entries from the given CSV file to those of the current glossary. Existing entries are overwritten. """ try: with open(file_path, mode='r', encoding='utf-8') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='"') self.entries = {rows[0]: rows[1] for rows in reader if len(rows) >= 2} except Exception as e: raise GlossaryError(f"Error importing CSV: {e}") def import_tsv(self, file_path: Path) -> None: """ Import the entries from the given CSV file to those of the current glossary. Existing entries are overwritten. """ try: with open(file_path, mode='r', encoding='utf-8') as tsvfile: self.entries = {} for line in tsvfile: parts = line.strip().split('\t') if len(parts) == 2: self.entries[parts[0]] = parts[1] except Exception as e: raise GlossaryError(f"Error importing TSV: {e}")