Skip to content

Commit

Permalink
refactor: store segmeter of the current project in the RealProject ob…
Browse files Browse the repository at this point in the history
…ject

- Add getSegmenter/setSegmenter to IProject
- Update Core.getSegmenter/setSegmenter to use currentProject.getSegmenter/setSegmenter

Signed-off-by: Hiroshi Miura <[email protected]>
  • Loading branch information
miurahr committed Nov 20, 2024
1 parent cbe0696 commit 2bffaf0
Show file tree
Hide file tree
Showing 10 changed files with 74 additions and 45 deletions.
6 changes: 2 additions & 4 deletions src/org/omegat/core/Core.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ private Core() {
private static MultipleTransPane multiple;
private static INotes notes;
private static IComments comments;
private static Segmenter segmenter;

private static Map<String, String> cmdLineParams = Collections.emptyMap();

Expand Down Expand Up @@ -215,11 +214,11 @@ public static IDictionaries getDictionaries() {
}

public static Segmenter getSegmenter() {
return segmenter;
return currentProject.getSegmenter();
}

public static void setSegmenter(Segmenter newSegmenter) {
segmenter = newSegmenter;
currentProject.setSegmenter(newSegmenter);
}

/**
Expand Down Expand Up @@ -268,7 +267,6 @@ static void initializeGUIimpl(IMainWindow me) throws Exception {
MarkerController.init();
LanguageToolWrapper.init();

segmenter = new Segmenter(Preferences.getSRX());
filterMaster = new FilterMaster(Preferences.getFilters());

// 4. Initialize other components. They add themselves to the main window.
Expand Down
12 changes: 5 additions & 7 deletions src/org/omegat/core/data/ExternalTMFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,20 @@ public static boolean isSupported(File file) {
}

public static ExternalTMX load(File file) throws Exception {
return load(file, Core.getProject().getProjectProperties(), Core.getSegmenter(),
Core.getFilterMaster());
return load(file, Core.getProject().getProjectProperties(), Core.getSegmenter());
}

public static ExternalTMX load(File file, ProjectProperties props, Segmenter segmenter,
FilterMaster filterMaster) throws Exception {
public static ExternalTMX load(File file, ProjectProperties props, Segmenter segmenter) throws Exception {
if (TMXLoader.isSupported(file)) {
return new TMXLoader(file, segmenter)
.setExtTmxLevel2(Preferences.isPreference(Preferences.EXT_TMX_SHOW_LEVEL2))
.setUseSlash(Preferences.isPreference(Preferences.EXT_TMX_USE_SLASH))
.setDoSegmenting(props.isSentenceSegmentingEnabled())
.setKeepForeignMatches(Preferences.isPreference(Preferences.EXT_TMX_KEEP_FOREIGN_MATCH))
.load(props.getSourceLanguage(), props.getTargetLanguage());
} else if (BifileLoader.isSupported(file, filterMaster)) {
return new BifileLoader(file, segmenter, filterMaster).setRemoveTags(props.isRemoveTags())
.setRemoveSpaces(filterMaster.getConfig().isRemoveSpacesNonseg())
} else if (BifileLoader.isSupported(file, Core.getFilterMaster())) {
return new BifileLoader(file, segmenter, Core.getFilterMaster()).setRemoveTags(props.isRemoveTags())
.setRemoveSpaces(Core.getFilterMaster().getConfig().isRemoveSpacesNonseg())
.setDoSegmenting(props.isSentenceSegmentingEnabled())
.load(props.getSourceLanguage(), props.getTargetLanguage());
} else {
Expand Down
17 changes: 15 additions & 2 deletions src/org/omegat/core/data/IProject.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.util.List;
import java.util.Map;

import org.omegat.core.segmentation.Segmenter;
import org.omegat.core.statistics.StatisticsInfo;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.util.Language;
Expand Down Expand Up @@ -283,11 +284,11 @@ class FileInfo {
public List<SourceTextEntry> entries = new ArrayList<SourceTextEntry>();
}

public interface DefaultTranslationsIterator {
interface DefaultTranslationsIterator {
void iterate(String source, TMXEntry trans);
}

public interface MultipleTranslationsIterator {
interface MultipleTranslationsIterator {
void iterate(EntryKey source, TMXEntry trans);
}

Expand Down Expand Up @@ -341,4 +342,16 @@ public AllTranslations getPrevious() {
return previous;
}
}

/**
* Set new segmenter for the current project.
* @param segmenter new segmenter.
*/
void setSegmenter(Segmenter segmenter);

/**
* Get segmenter for the current project.
* @return segmenter currently used.
*/
Segmenter getSegmenter();
}
13 changes: 13 additions & 0 deletions src/org/omegat/core/data/NotLoadedProject.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@
import java.util.Map;

import org.omegat.core.data.TMXEntry.ExternalLinked;
import org.omegat.core.segmentation.Segmenter;
import org.omegat.core.statistics.StatisticsInfo;
import org.omegat.filters2.TranslationException;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.util.Language;
import org.omegat.util.Preferences;

/**
* Project implementation when project not really loaded.
Expand All @@ -51,6 +53,8 @@ public class NotLoadedProject implements IProject {
EMPTY_TRANSLATION = new TMXEntry(empty, true, null);
}

protected Segmenter segmenter = new Segmenter(Preferences.getSRX());

@Override
public void compileProject(String sourcePattern) throws IOException, TranslationException {
}
Expand Down Expand Up @@ -172,6 +176,15 @@ public List<String> getSourceFilesOrder() {
public void setSourceFilesOrder(List<String> filesList) {
}

@Override
public void setSegmenter(final Segmenter segmenter) {
}

@Override
public Segmenter getSegmenter() {
return segmenter;
}

@Override
public String getTargetPathForSourceFile(String sourceFile) {
return null;
Expand Down
54 changes: 31 additions & 23 deletions src/org/omegat/core/data/RealProject.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
2018 Enrique Estevez Fernandez
2019 Thomas Cordonnier
2020 Briac Pilpre
2024 Hiroshi Miura
Home page: https://www.omegat.org/
Support center: https://omegat.org/support
Expand Down Expand Up @@ -166,12 +167,15 @@ enum PreparedStatus {

private final StatisticsInfo hotStat = new StatisticsInfo();

private final ITokenizer sourceTokenizer, targetTokenizer;
private final ITokenizer sourceTokenizer;
private final ITokenizer targetTokenizer;

private DirectoryMonitor tmMonitor;

private DirectoryMonitor tmOtherLanguagesMonitor;

private Segmenter segmenter;

/**
* Indicates when there is an ongoing save event. Saving might take a while
* during team sync: if a merge is required the save might be postponed
Expand Down Expand Up @@ -302,7 +306,7 @@ public void createProject() {
// Set project specific segmentation rules if they exist, or
// defaults otherwise.
SRX srx = config.getProjectSRX();
Core.setSegmenter(new Segmenter(srx == null ? Preferences.getSRX() : srx));
segmenter = new Segmenter(srx == null ? Preferences.getSRX() : srx);

loadTranslations();
setProjectModified(true);
Expand Down Expand Up @@ -475,12 +479,12 @@ private void loadFilterSettings() {
* options
*/
private void loadSegmentationSettings() {
// Set project specific segmentation rules if they exist, or defaults
// Set project-specific segmentation rules if they exist, or defaults
// otherwise.
// This MUST happen before calling loadTranslations(), because
// projectTMX needs a segmenter.
SRX srx = Optional.ofNullable(config.getProjectSRX()).orElse(Preferences.getSRX());
Core.setSegmenter(new Segmenter(srx));
segmenter = new Segmenter(srx);
}

/**
Expand All @@ -493,7 +497,7 @@ public Map<EntryKey, ITMXEntry> align(final ProjectProperties props, final File
File root = new File(config.getSourceRoot());
List<File> srcFileList = FileUtil.buildFileList(root, true);

AlignFilesCallback alignFilesCallback = new AlignFilesCallback(props);
AlignFilesCallback alignFilesCallback = new AlignFilesCallback(props, segmenter);

String srcRoot = config.getSourceRoot();
for (File file : srcFileList) {
Expand Down Expand Up @@ -1238,7 +1242,6 @@ private void loadTranslations() throws Exception {
*/
private void loadSourceFiles() throws IOException {
long st = System.currentTimeMillis();
FilterMaster fm = Core.getFilterMaster();

File root = new File(config.getSourceRoot());
List<String> srcPathList = FileUtil
Expand All @@ -1259,7 +1262,8 @@ private void loadSourceFiles() throws IOException {

try {
loadFilesCallback.setCurrentFile(fi);
IFilter filter = fm.loadFile(config.getSourceRoot() + filepath, new FilterContext(config),
IFilter filter = Core.getFilterMaster().loadFile(config.getSourceRoot() + filepath,
new FilterContext(config),
loadFilesCallback);
loadFilesCallback.fileFinished();

Expand Down Expand Up @@ -1410,7 +1414,7 @@ private void loadTM() {
newTransMemories.putAll(transMemories);
if (file.exists()) {
try {
ExternalTMX newTMX = ExternalTMFactory.load(file);
ExternalTMX newTMX = ExternalTMFactory.load(file, config, segmenter);
newTransMemories.put(file.getPath(), newTMX);

// Please note the use of "/". FileUtil.computeRelativePath
Expand Down Expand Up @@ -1716,16 +1720,12 @@ public Map<Language, ProjectTMX> getOtherTargetLanguageTMs() {
return Collections.unmodifiableMap(otherTargetLangTMs);
}

/**
* {@inheritDoc}
*/
@Override
public ITokenizer getSourceTokenizer() {
return sourceTokenizer;
}

/**
* {@inheritDoc}
*/
@Override
public ITokenizer getTargetTokenizer() {
return targetTokenizer;
}
Expand Down Expand Up @@ -1763,9 +1763,7 @@ protected ITokenizer createTokenizer(String cmdLine, Class<?> projectPref) {
return new DefaultTokenizer();
}

/**
* {@inheritDoc}
*/
@Override
public List<FileInfo> getProjectFiles() {
return Collections.unmodifiableList(projectFilesList);
}
Expand Down Expand Up @@ -1876,9 +1874,7 @@ public void fileFinished() {
tmBuilder = null;
}

/**
* {@inheritDoc}
*/
@Override
protected void addSegment(String id, short segmentIndex, String segmentSource,
List<ProtectedPart> protectedParts, String segmentTranslation,
boolean segmentTranslationFuzzy, String[] props, String prevSegment, String nextSegment,
Expand Down Expand Up @@ -1951,13 +1947,15 @@ protected String getSegmentTranslation(String id, int segmentIndex, String segme
}

static class AlignFilesCallback implements IAlignCallback {
AlignFilesCallback(ProjectProperties props) {
AlignFilesCallback(ProjectProperties props, Segmenter segmenter) {
super();
this.config = props;
this.segmenter = segmenter;
}

Map<EntryKey, ITMXEntry> data = new TreeMap<>();
private final ProjectProperties config;
private final Segmenter segmenter;
List<String> sources = new ArrayList<>();

@Override
Expand All @@ -1978,9 +1976,9 @@ public void addTranslation(String id, String source, String translation, boolean

PrepareTMXEntry tr = new PrepareTMXEntry();
if (config.isSentenceSegmentingEnabled()) {
List<String> segmentsSource = Core.getSegmenter().segment(config.getSourceLanguage(),
List<String> segmentsSource = segmenter.segment(config.getSourceLanguage(),
sourceS, null, null);
List<String> segmentsTranslation = Core.getSegmenter().segment(config.getTargetLanguage(),
List<String> segmentsTranslation = segmenter.segment(config.getTargetLanguage(),
transS, null, null);
if (segmentsTranslation.size() != segmentsSource.size()) {
if (isFuzzy) {
Expand Down Expand Up @@ -2064,4 +2062,14 @@ public void commitSourceFiles() throws Exception {
}
}
}

@Override
public void setSegmenter(Segmenter segmenter) {
this.segmenter = segmenter;
}

@Override
public Segmenter getSegmenter() {
return segmenter;
}
}
3 changes: 1 addition & 2 deletions src/org/omegat/core/statistics/FindMatches.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.omegat.core.Core;
import org.omegat.core.data.EntryKey;
import org.omegat.core.data.ExternalTMFactory;
import org.omegat.core.data.ExternalTMX;
Expand Down Expand Up @@ -145,7 +144,7 @@ public class FindMatches {
*/
public FindMatches(IProject project, int maxCount, boolean allowSeparateSegmentMatch,
boolean searchExactlyTheSame) {
this(project, Core.getSegmenter(), maxCount, allowSeparateSegmentMatch, searchExactlyTheSame, true,
this(project, project.getSegmenter(), maxCount, allowSeparateSegmentMatch, searchExactlyTheSame, true,
Preferences.getPreferenceDefault(Preferences.EXT_TMX_FUZZY_MATCH_THRESHOLD,
OConsts.FUZZY_MATCH_THRESHOLD));
}
Expand Down
2 changes: 0 additions & 2 deletions src/org/omegat/gui/main/ProjectUICommands.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
import org.omegat.core.data.ProjectProperties;
import org.omegat.core.events.IProjectEventListener;
import org.omegat.core.segmentation.SRX;
import org.omegat.core.segmentation.Segmenter;
import org.omegat.core.spellchecker.ISpellChecker;
import org.omegat.core.tagvalidation.ErrorReport;
import org.omegat.core.team2.IRemoteRepository2;
Expand Down Expand Up @@ -835,7 +834,6 @@ protected void done() {
// Restore global prefs in case project had project-specific
// ones
Core.setFilterMaster(new FilterMaster(Preferences.getFilters()));
Core.setSegmenter(new Segmenter(Preferences.getSRX()));
}
}.execute();
}
Expand Down
8 changes: 5 additions & 3 deletions test/src/org/omegat/core/data/TmxComplianceBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;

import org.omegat.core.Core;
import org.omegat.core.segmentation.SRX;
import org.omegat.core.segmentation.Segmenter;
Expand All @@ -67,14 +68,15 @@ public abstract class TmxComplianceBase {
static final Pattern RE_SEG = Pattern.compile("(<seg>.+</seg>)");

protected File outFile;
protected Segmenter segmenter;

@Rule
public TestName name = new TestName();

@Before
public final void setUp() throws Exception {
Core.setFilterMaster(new FilterMaster(FilterMaster.createDefaultFiltersConfig()));
Core.setSegmenter(new Segmenter(SRX.getDefault()));
segmenter = new Segmenter(SRX.getDefault());
TestPreferencesInitializer.init();

outFile = new File("build/testdata/" + getClass().getSimpleName() + "-" + name.getMethodName() + ".out");
Expand Down Expand Up @@ -179,7 +181,7 @@ public void addEntry(String id, String source, String translation, boolean isFuz
@Override
public void addEntryWithProperties(String id, String source, String translation, boolean isFuzzy,
String[] props, String path, IFilter filter, List<ProtectedPart> protectedParts) {
result.addAll(Core.getSegmenter().segment(context.getSourceLang(), source, null, null));
result.addAll(segmenter.segment(context.getSourceLang(), source, null, null));
}

public void linkPrevNextSegments() {
Expand All @@ -197,7 +199,7 @@ protected void align(IFilter filter, File sourceFile, String inCharset, File tra
fc.setInEncoding(inCharset);
fc.setOutEncoding(outCharset);

RealProject.AlignFilesCallback callback = new RealProject.AlignFilesCallback(props);
RealProject.AlignFilesCallback callback = new RealProject.AlignFilesCallback(props, segmenter);

filter.alignFile(sourceFile, translatedFile, null, fc, callback);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ public Map<String, ExternalTMX> getTransMemories() {
try {
ExternalTMX newTMX;
Path testTmx = Paths.get("test/data/tmx/test-match-stat-en-ca.tmx");
newTMX = ExternalTMFactory.load(testTmx.toFile(), prop, segmenter, null);
newTMX = ExternalTMFactory.load(testTmx.toFile(), prop, segmenter);
transMemories.put(testTmx.toString(), newTMX);
} catch (Exception e) {
throw new RuntimeException(e);
Expand Down
2 changes: 1 addition & 1 deletion test/src/org/omegat/core/statistics/FindMatchesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ public Map<String, ExternalTMX> getTransMemories() {

Map<String, ExternalTMX> transMemories = new TreeMap<>();
try {
ExternalTMX newTMX = ExternalTMFactory.load(externalTmx, prop, segmenter, null);
ExternalTMX newTMX = ExternalTMFactory.load(externalTmx, prop, segmenter);
transMemories.put(externalTmx.getPath(), newTMX);
} catch (Exception ignored) {
}
Expand Down

0 comments on commit 2bffaf0

Please sign in to comment.