Skip to content

Commit

Permalink
Refactored from GuddenTheEngine
Browse files Browse the repository at this point in the history
  • Loading branch information
kuminin committed Oct 17, 2017
1 parent 1ab5251 commit c3525cb
Show file tree
Hide file tree
Showing 27 changed files with 2,170 additions and 0 deletions.
22 changes: 22 additions & 0 deletions .classpath
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/target/
/external/
/.settings/
23 changes: 23 additions & 0 deletions .project
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>guddengine</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>
40 changes: 40 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.gudden.maven</groupId>
<artifactId>guddengine</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>

<name>guddengine</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.target>1.8</maven.compiler.target>
<maven.compiler.source>1.8</maven.compiler.source>
</properties>

<repositories>
<repository>
<id>central</id>
<name>Maven Repository</name>
<url>http://repo1.maven.org/maven2</url>
</repository>
</repositories>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.2</version>
</dependency>
</dependencies>
</project>
80 changes: 80 additions & 0 deletions src/main/java/com/gudden/maven/guddengine/App.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package com.gudden.maven.guddengine;

import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Scanner;
import java.util.Set;

import com.gudden.maven.model.Guddengine;
import com.gudden.maven.model.PositionalPosting;
import com.gudden.maven.model.Query;

/**
* Hello world!
*
*/
public class App {
private static final Guddengine GUDDEN = new Guddengine();
private static List<String> FILE_NAMES = null;
public static void main(String[] args) {

Scanner sc = new Scanner(System.in);
System.out.print("Please enter the directory path you wish to index: ");
FILE_NAMES = indexDirectory(GUDDEN, sc.nextLine());

while(true) {
String queryString = sc.nextLine().trim();
String[] specialQuery = queryString.split("\\s+");
switch(specialQuery[0]) {
case ":q":
System.out.println("Have a good day. Thank you for using guddengine.");
sc.close();
System.exit(0);
break;
case ":stem":
System.out.print("The stemmed token for \"" + specialQuery[1] + "\" is \"");
System.out.println(GUDDEN.stemToken(specialQuery[1]) + "\"");
break;
case ":index":
FILE_NAMES = indexDirectory(GUDDEN,specialQuery[1]);
break;
case ":vocab":
for (String each : GUDDEN.vocabulary()) System.out.println(each);
System.out.println("There are total of " + GUDDEN.vocabulary().length + " vocabularies.");
break;
default:
search(queryString);
break;
}
}
}

private static void search(String queryString) {
Query query = new Query(queryString);
List<PositionalPosting> results = GUDDEN.search(query);
Set<Integer> resultId = new HashSet<Integer>();
if (results != null) {
for (PositionalPosting result : results) {
if (!resultId.contains(result.getId())) {
resultId.add(result.getId());
System.out.println(FILE_NAMES.get(result.getId()));
}
}
System.out.println("Size: " + resultId.size());
}
}

private static List<String> indexDirectory(Guddengine engine, String path) {
System.out.println("Indexing the new path at \"" + path + "\"");
List<String> fileNames = null;
engine.resetIndex();
try {
fileNames = engine.indexDirectory(path);
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("Done Indexing");
return fileNames;
}
}
57 changes: 57 additions & 0 deletions src/main/java/com/gudden/maven/model/Document.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package com.gudden.maven.model;

public class Document {

private String url;
private String body;
private String title;

// ------------------------------------------------------------------------------------------------------

public String getBody() {
return body;
}

// ------------------------------------------------------------------------------------------------------

public String getTitle() {
return title;
}

// ------------------------------------------------------------------------------------------------------

public String getUrl() {
return url;
}

// ------------------------------------------------------------------------------------------------------

public void setBody(String body) {
this.body = body;
}

// ------------------------------------------------------------------------------------------------------

public void setTitle(String title) {
this.title = title;
}

// ------------------------------------------------------------------------------------------------------

public void setUrl(String url) {
this.url = url;
}

// ------------------------------------------------------------------------------------------------------

public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(url);
sb.append(" ");
sb.append(body);
sb.append(" ");
sb.append(title);
return sb.toString();
}

}
75 changes: 75 additions & 0 deletions src/main/java/com/gudden/maven/model/DocumentTokenStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package com.gudden.maven.model;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Scanner;

import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.stream.JsonReader;

public class DocumentTokenStream implements TokenStream {

private Scanner reader;

// ------------------------------------------------------------------------------------------------------

public DocumentTokenStream(File file) {
this.reader = new Scanner(process(file));
}

// ------------------------------------------------------------------------------------------------------

public DocumentTokenStream(String token) {
this.reader = new Scanner(token);
}

// ------------------------------------------------------------------------------------------------------

public boolean hasNextToken() {
return this.reader.hasNext();
}

// ------------------------------------------------------------------------------------------------------

public String nextToken() {
if (!hasNextToken())
return null;
String type = reader.next().toLowerCase();
return type.length() > 0 ? type : hasNextToken() ? nextToken() : null;
}

// ------------------------------------------------------------------------------------------------------

private Document getDocument(File file) {
Gson gson = new Gson();
JsonObject json = null;

try {
json = parseToJsonObject(new FileInputStream(file));
} catch (IOException e) {
e.printStackTrace();
return null;
}

return gson.fromJson(json, Document.class);
}

// ------------------------------------------------------------------------------------------------------

private JsonObject parseToJsonObject(InputStream in) throws IOException {
JsonParser parser = new JsonParser();
return parser.parse(new JsonReader(new InputStreamReader(in, "UTF-8"))).getAsJsonObject();
}

// ------------------------------------------------------------------------------------------------------

private String process(File file) {
return getDocument(file).toString();
}

}
Loading

0 comments on commit c3525cb

Please sign in to comment.