-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
27 changed files
with
2,170 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<classpath> | ||
<classpathentry kind="src" output="target/classes" path="src/main/java"> | ||
<attributes> | ||
<attribute name="optional" value="true"/> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="src" output="target/test-classes" path="src/test/java"> | ||
<attributes> | ||
<attribute name="optional" value="true"/> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> | ||
<attributes> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> | ||
<classpathentry kind="output" path="target/classes"/> | ||
</classpath> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
/target/ | ||
/external/ | ||
/.settings/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<projectDescription> | ||
<name>guddengine</name> | ||
<comment></comment> | ||
<projects> | ||
</projects> | ||
<buildSpec> | ||
<buildCommand> | ||
<name>org.eclipse.jdt.core.javabuilder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
<buildCommand> | ||
<name>org.eclipse.m2e.core.maven2Builder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
</buildSpec> | ||
<natures> | ||
<nature>org.eclipse.jdt.core.javanature</nature> | ||
<nature>org.eclipse.m2e.core.maven2Nature</nature> | ||
</natures> | ||
</projectDescription> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>com.gudden.maven</groupId> | ||
<artifactId>guddengine</artifactId> | ||
<version>0.0.1-SNAPSHOT</version> | ||
<packaging>jar</packaging> | ||
|
||
<name>guddengine</name> | ||
<url>http://maven.apache.org</url> | ||
|
||
<properties> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
<maven.compiler.target>1.8</maven.compiler.target> | ||
<maven.compiler.source>1.8</maven.compiler.source> | ||
</properties> | ||
|
||
<repositories> | ||
<repository> | ||
<id>central</id> | ||
<name>Maven Repository</name> | ||
<url>http://repo1.maven.org/maven2</url> | ||
</repository> | ||
</repositories> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>4.12</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.google.code.gson</groupId> | ||
<artifactId>gson</artifactId> | ||
<version>2.8.2</version> | ||
</dependency> | ||
</dependencies> | ||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package com.gudden.maven.guddengine; | ||
|
||
import java.io.IOException; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Scanner; | ||
import java.util.Set; | ||
|
||
import com.gudden.maven.model.Guddengine; | ||
import com.gudden.maven.model.PositionalPosting; | ||
import com.gudden.maven.model.Query; | ||
|
||
/** | ||
* Hello world! | ||
* | ||
*/ | ||
public class App { | ||
private static final Guddengine GUDDEN = new Guddengine(); | ||
private static List<String> FILE_NAMES = null; | ||
public static void main(String[] args) { | ||
|
||
Scanner sc = new Scanner(System.in); | ||
System.out.print("Please enter the directory path you wish to index: "); | ||
FILE_NAMES = indexDirectory(GUDDEN, sc.nextLine()); | ||
|
||
while(true) { | ||
String queryString = sc.nextLine().trim(); | ||
String[] specialQuery = queryString.split("\\s+"); | ||
switch(specialQuery[0]) { | ||
case ":q": | ||
System.out.println("Have a good day. Thank you for using guddengine."); | ||
sc.close(); | ||
System.exit(0); | ||
break; | ||
case ":stem": | ||
System.out.print("The stemmed token for \"" + specialQuery[1] + "\" is \""); | ||
System.out.println(GUDDEN.stemToken(specialQuery[1]) + "\""); | ||
break; | ||
case ":index": | ||
FILE_NAMES = indexDirectory(GUDDEN,specialQuery[1]); | ||
break; | ||
case ":vocab": | ||
for (String each : GUDDEN.vocabulary()) System.out.println(each); | ||
System.out.println("There are total of " + GUDDEN.vocabulary().length + " vocabularies."); | ||
break; | ||
default: | ||
search(queryString); | ||
break; | ||
} | ||
} | ||
} | ||
|
||
private static void search(String queryString) { | ||
Query query = new Query(queryString); | ||
List<PositionalPosting> results = GUDDEN.search(query); | ||
Set<Integer> resultId = new HashSet<Integer>(); | ||
if (results != null) { | ||
for (PositionalPosting result : results) { | ||
if (!resultId.contains(result.getId())) { | ||
resultId.add(result.getId()); | ||
System.out.println(FILE_NAMES.get(result.getId())); | ||
} | ||
} | ||
System.out.println("Size: " + resultId.size()); | ||
} | ||
} | ||
|
||
private static List<String> indexDirectory(Guddengine engine, String path) { | ||
System.out.println("Indexing the new path at \"" + path + "\""); | ||
List<String> fileNames = null; | ||
engine.resetIndex(); | ||
try { | ||
fileNames = engine.indexDirectory(path); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
System.out.println("Done Indexing"); | ||
return fileNames; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
package com.gudden.maven.model; | ||
|
||
public class Document { | ||
|
||
private String url; | ||
private String body; | ||
private String title; | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public String getBody() { | ||
return body; | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public String getTitle() { | ||
return title; | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public String getUrl() { | ||
return url; | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public void setBody(String body) { | ||
this.body = body; | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public void setTitle(String title) { | ||
this.title = title; | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public void setUrl(String url) { | ||
this.url = url; | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public String toString() { | ||
StringBuilder sb = new StringBuilder(); | ||
sb.append(url); | ||
sb.append(" "); | ||
sb.append(body); | ||
sb.append(" "); | ||
sb.append(title); | ||
return sb.toString(); | ||
} | ||
|
||
} |
75 changes: 75 additions & 0 deletions
75
src/main/java/com/gudden/maven/model/DocumentTokenStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package com.gudden.maven.model; | ||
|
||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.InputStreamReader; | ||
import java.util.Scanner; | ||
|
||
import com.google.gson.Gson; | ||
import com.google.gson.JsonObject; | ||
import com.google.gson.JsonParser; | ||
import com.google.gson.stream.JsonReader; | ||
|
||
public class DocumentTokenStream implements TokenStream { | ||
|
||
private Scanner reader; | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public DocumentTokenStream(File file) { | ||
this.reader = new Scanner(process(file)); | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public DocumentTokenStream(String token) { | ||
this.reader = new Scanner(token); | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public boolean hasNextToken() { | ||
return this.reader.hasNext(); | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
public String nextToken() { | ||
if (!hasNextToken()) | ||
return null; | ||
String type = reader.next().toLowerCase(); | ||
return type.length() > 0 ? type : hasNextToken() ? nextToken() : null; | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
private Document getDocument(File file) { | ||
Gson gson = new Gson(); | ||
JsonObject json = null; | ||
|
||
try { | ||
json = parseToJsonObject(new FileInputStream(file)); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
return null; | ||
} | ||
|
||
return gson.fromJson(json, Document.class); | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
private JsonObject parseToJsonObject(InputStream in) throws IOException { | ||
JsonParser parser = new JsonParser(); | ||
return parser.parse(new JsonReader(new InputStreamReader(in, "UTF-8"))).getAsJsonObject(); | ||
} | ||
|
||
// ------------------------------------------------------------------------------------------------------ | ||
|
||
private String process(File file) { | ||
return getDocument(file).toString(); | ||
} | ||
|
||
} |
Oops, something went wrong.