Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
dfuchss committed Nov 29, 2024
1 parent 8be7d03 commit eca0fc3
Show file tree
Hide file tree
Showing 28 changed files with 98 additions and 17,345 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@

public class InputTextData implements PipelineStepData {

private static final long serialVersionUID = -5404851121533249349L;

public static final String ID = "InputTextData";

private String text;

public InputTextData(String text) {
super();
this.text = text;
}

public String getText() {
return text;
return this.text;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@
* {@link Text} only.
*/
public class PreprocessingData implements PipelineStepData {
private static final long serialVersionUID = 8103545017098419675L;

public static final String ID = "PreprocessingData";

private final Text preprocessedText;

public PreprocessingData(Text preprocessedText) {
super();
this.preprocessedText = preprocessedText;
}

public Text getText() {
return preprocessedText;
return this.preprocessedText;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ public class RepositoryHandler {
private static final Logger logger = LoggerFactory.getLogger(RepositoryHandler.class);

private RepositoryHandler() {
super();
}

public static boolean shallowCloneRepository(String repositoryLink, String desiredCodeLocation, String desiredHash) {
Expand All @@ -36,8 +35,9 @@ public static boolean shallowCloneRepository(String repositoryLink, String desir
List<RevCommit> commits = new ArrayList<>();
git.log().setMaxCount(1).call().forEach(commits::add);
assert commits.size() == 1;
if (commits.get(0).getId().startsWith(AbbreviatedObjectId.fromString(desiredHash)))
if (commits.get(0).getId().startsWith(AbbreviatedObjectId.fromString(desiredHash))) {
return true;
}

// Checkout correct code version
git.fetch().setUnshallow(true).call();
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
*
*/
public class EqualityMeasure implements WordSimMeasure {
private static final long serialVersionUID = -3748510855999411788L;

@Override
public boolean areWordsSimilar(ComparisonContext ctx) {
return ctx.firstTerm().equalsIgnoreCase(ctx.secondTerm());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
*/
public class GloveMeasure extends VectorBasedWordSimMeasure {

private static final long serialVersionUID = 1436951415138215284L;

private static final Logger LOGGER = LoggerFactory.getLogger(GloveMeasure.class);

private final double similarityThreshold;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public LevenshteinMeasure(int minLength, int maxDistance, double threshold) {

@Override
public boolean areWordsSimilar(ComparisonContext ctx) {
//FIXME cast to lower case seems unwarranted given that this is delegated to WordSimUtils already
// FIXME cast to lower case seems unwarranted given that this is delegated to WordSimUtils already
String firstWord = ctx.firstTerm().toLowerCase();
String secondWord = ctx.secondTerm().toLowerCase();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ public NgramMeasure(Variant variant, int n, double similarityThreshold) throws I
@Override
public boolean areWordsSimilar(ComparisonContext ctx) {
Objects.requireNonNull(ctx);
return getSimilarity(ctx) >= this.similarityThreshold;
return this.getSimilarity(ctx) >= this.similarityThreshold;
}

@Override
public double getSimilarity(ComparisonContext ctx) {
double distance = calculateDistance(ctx.firstTerm(), ctx.secondTerm());
double distance = this.calculateDistance(ctx.firstTerm(), ctx.secondTerm());

double normalizedDistance = distance / Math.max(ctx.firstTerm().length(), ctx.secondTerm().length());

Expand Down Expand Up @@ -98,15 +98,15 @@ public double calculateDistance(String x, String y) {
int l = yBuilder.length();
double[][] d = new double[k + 1][l + 1];

for (int u = 1; u <= n - 1; u++) {
if (variant == Variant.LUCENE) {
for (int u = 1; u <= this.n - 1; u++) {
if (this.variant == Variant.LUCENE) {
xBuilder = new StringBuilder().append(LUCENE_PREFIX_CHARACTER).append(xBuilder);
yBuilder = new StringBuilder().append(LUCENE_PREFIX_CHARACTER).append(yBuilder);
} else if (variant == Variant.POSITIONAL) {
} else if (this.variant == Variant.POSITIONAL) {
xBuilder = new StringBuilder().append(xBuilder.charAt(0)).append(xBuilder);
yBuilder = new StringBuilder().append(yBuilder.charAt(0)).append(yBuilder);
} else {
throw new UnsupportedOperationException("unknown variant: " + variant);
throw new UnsupportedOperationException("unknown variant: " + this.variant);
}
}

Expand All @@ -120,9 +120,9 @@ public double calculateDistance(String x, String y) {

for (int i = 1; i <= k; i++) {
for (int j = 1; j <= l; j++) {
double dN = dN(n, i - 1, j - 1, xBuilder.toString(), yBuilder.toString());
double dN = this.dN(this.n, i - 1, j - 1, xBuilder.toString(), yBuilder.toString());

d[i][j] = min(d[i - 1][j] + 1.0, d[i][j - 1] + 1.0, d[i - 1][j - 1] + dN);
d[i][j] = this.min(d[i - 1][j] + 1.0, d[i][j - 1] + 1.0, d[i - 1][j - 1] + dN);
}
}

Expand All @@ -134,11 +134,11 @@ private double dN(int n, int i, int j, String x, String y) {
double actualN = n;

for (int u = 1; u <= n; u++) {
double diff = d1(x.charAt(i + u - 1), y.charAt(j + u - 1));
double diff = this.d1(x.charAt(i + u - 1), y.charAt(j + u - 1));

sum += diff;

if (variant == Variant.LUCENE && diff == 0 && x.charAt(i + u - 1) == LUCENE_PREFIX_CHARACTER) {
if (this.variant == Variant.LUCENE && diff == 0 && x.charAt(i + u - 1) == LUCENE_PREFIX_CHARACTER) {
actualN -= 1.0; // Ignore prefix character in LUCENE mode
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public class SEWordSimMeasure implements WordSimMeasure {

private static final Logger LOGGER = LoggerFactory.getLogger(SEWordSimMeasure.class);

private transient SEWordSimDataSource dataSource;
private SEWordSimDataSource dataSource;
private final double similarityThreshold;

/**
Expand All @@ -43,7 +43,7 @@ public SEWordSimMeasure(double similarityThreshold) {

@Override
public boolean areWordsSimilar(ComparisonContext ctx) {
var similarity = getSimilarity(ctx);
var similarity = this.getSimilarity(ctx);
return !Double.isNaN(similarity) && similarity >= this.similarityThreshold;
}

Expand All @@ -52,22 +52,21 @@ public double getSimilarity(ComparisonContext ctx) {
double similarity = Double.NaN;

try {
similarity = getDataSource().getSimilarity(ctx.firstTerm(), ctx.secondTerm()).orElse(Double.NaN);
similarity = this.getDataSource().getSimilarity(ctx.firstTerm(), ctx.secondTerm()).orElse(Double.NaN);
} catch (SQLException e) {
LOGGER.error("Failed to query the SEWordSim database for word comparison: " + ctx, e);
return similarity;
}
return similarity; // words are probably missing from the database
}

private SEWordSimDataSource getDataSource() {
if (dataSource == null) {
if (this.dataSource == null) {
try {
dataSource = new SEWordSimDataSource(Path.of(CommonTextToolsConfig.SEWORDSIM_DB_FILE_PATH));
this.dataSource = new SEWordSimDataSource(Path.of(CommonTextToolsConfig.SEWORDSIM_DB_FILE_PATH));
} catch (SQLException e) {
throw new IllegalArgumentException(e);
}
}
return dataSource;
return this.dataSource;
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy;

import java.io.Serializable;
import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
Expand All @@ -10,7 +9,7 @@
/**
* A comparison strategy determines how the verdicts of multiple WSMs regarding a specific comparison are combined.
*/
public interface ComparisonStrategy extends Serializable {
public interface ComparisonStrategy {

ComparisonStrategy AT_LEAST_ONE = new AtLeastOneStrategy();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ public double getSimilarity(ComparisonContext ctx, List<WordSimMeasure> measures
var array = values.toArray(new Double[0]);
if (array.length % 2 == 0) {
return (array[array.length / 2] + array[array.length / 2 - 1]) / 2;
} else {
return array[array.length / 2];
}
return array[array.length / 2];
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy;

import java.io.Serializable;
import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;

public interface SimilarityStrategy extends Serializable {
public interface SimilarityStrategy {
/**
* Evaluates how similar the words from the given {@link ComparisonContext} are by combining the verdicts of the specified word similarity measures.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* An exception that can occur while trying to retrieve a vector.
*/
public class RetrieveVectorException extends Exception {
private static final long serialVersionUID = 6771335689887319781L;

public RetrieveVectorException(String message, Throwable cause) {
super(message, cause);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
@Deterministic
public abstract class VectorBasedWordSimMeasure implements WordSimMeasure {

private static final float[] ZERO_VECTOR = new float[0];
private static final float[] ZERO_VECTOR = {};
private final Map<String, float[]> vectorCache = new LinkedHashMap<>();

protected abstract WordVectorDataSource getVectorDataSource();
Expand All @@ -25,7 +25,7 @@ public abstract class VectorBasedWordSimMeasure implements WordSimMeasure {
* Compares the two given words by computing the cosine similarity between their respective vector representations.
* If the vector representation for one of the words is not found, a similarity score of {@code 0.0} will be
* returned.
*
*
* @param firstWord the first word
* @param secondWord the second word
* @return returns the similarity score between the two words, between 0.0 and 1.0 (inclusive)
Expand All @@ -39,13 +39,13 @@ public double compareVectors(String firstWord, String secondWord) throws Retriev
return 1.0;
}

float[] firstVec = getVectorFromCacheOrDatabase(firstWord);
float[] firstVec = this.getVectorFromCacheOrDatabase(firstWord);

if (VectorUtils.isZero(firstVec)) {
return 0.0; // no vector representation for the first word
}

float[] secondVec = getVectorFromCacheOrDatabase(secondWord);
float[] secondVec = this.getVectorFromCacheOrDatabase(secondWord);

if (VectorUtils.isZero(secondVec)) {
return 0.0; // no vector representation for the second word
Expand All @@ -58,7 +58,7 @@ private float[] getVectorFromCacheOrDatabase(String word) throws RetrieveVectorE
float[] vector = this.vectorCache.getOrDefault(word, null);

if (vector == null) {
vector = getVectorDataSource().getWordVector(word).orElse(ZERO_VECTOR);
vector = this.getVectorDataSource().getWordVector(word).orElse(ZERO_VECTOR);
this.vectorCache.put(word, vector);
}

Expand Down
Loading

0 comments on commit eca0fc3

Please sign in to comment.