diff --git a/assemblies/plugins/pom.xml b/assemblies/plugins/pom.xml index 9f8f6ac6bf6..3d4ca51983a 100644 --- a/assemblies/plugins/pom.xml +++ b/assemblies/plugins/pom.xml @@ -486,6 +486,12 @@ ${project.version} zip + + org.apache.hop + hop-databases-impala + ${project.version} + zip + org.apache.hop hop-databases-infobright diff --git a/docs/hop-user-manual/modules/ROOT/nav.adoc b/docs/hop-user-manual/modules/ROOT/nav.adoc index 99d0ed2d445..6271beda0be 100644 --- a/docs/hop-user-manual/modules/ROOT/nav.adoc +++ b/docs/hop-user-manual/modules/ROOT/nav.adoc @@ -388,6 +388,7 @@ under the License. ** xref:database/databases/h2.adoc[H2] ** xref:database/databases/hypersonic.adoc[Hypersonic] ** xref:database/databases/universe.adoc[IBM Universe] +** xref:database/databases/cloudera-impala.adoc[Cloudera Impala] ** xref:database/databases/infobright.adoc[Infobright DB] ** xref:database/databases/informix.adoc[Informix] ** xref:database/databases/ingres.adoc[Ingres] diff --git a/docs/hop-user-manual/modules/ROOT/pages/database/databases/cloudera-impala.adoc b/docs/hop-user-manual/modules/ROOT/pages/database/databases/cloudera-impala.adoc new file mode 100644 index 00000000000..61ad353b799 --- /dev/null +++ b/docs/hop-user-manual/modules/ROOT/pages/database/databases/cloudera-impala.adoc @@ -0,0 +1,49 @@ +//// +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +//// +[[database-plugins-cloudera-impala]] +:documentationPath: /database/databases/ +:language: en_US + += Cloudera Impala + +You can query data on Hadoop using the low latency Impala implementation of SQL. +For more information see https://docs.cloudera.com/runtime/7.2.18/impala-overview/topics/impala-overview.html[Apache Impala Overview] + +Impala itself has become an Apache project which you can find at the https://impala.apache.org[Apache Impala Homepage]. + +Check the https://docs.cloudera.com/documentation/other/connectors/impala-jdbc/latest.html[documentation] to see how you can connect to Cloudera Impala using Kerberos security. Set the required options in the Options tab of your relational database connection metadata: + +* AllowSelfSignedCerts=1 +* AuthMech=1 +* KrbHostFQDN=`IMPALA HOSTNAME` +* KrbRealm=`Kerberos Realm` +* KrbServiceName=`Impala Service name` +* SSL=1 +* principal=`Principal name` + + +[cols="2*",options="header"] +|=== +| Option | Info +|Type | Relational +|Driver | Use Cloudera Impala Drivers. You can https://www.cloudera.com/downloads/connectors/impala/jdbc/2-6-35.html[Download] the latest versions from the Cloudera website. +|Version Included | None +|Hop Dependencies | Cloudera Impala Database Plugin +|Documentation | https://docs.cloudera.com/documentation/other/connectors/impala-jdbc/latest.html[Documentation Link] +|JDBC Url | jdbc:impala://hostname:port;principal=principalName +|Driver folder | /lib/jdbc +|=== diff --git a/plugins/databases/impala/pom.xml b/plugins/databases/impala/pom.xml new file mode 100644 index 00000000000..1c1c3db1194 --- /dev/null +++ b/plugins/databases/impala/pom.xml @@ -0,0 +1,31 @@ + + + + 4.0.0 + + org.apache.hop + hop-plugins-databases + 2.12.0-SNAPSHOT + + + hop-databases-impala + jar + Hop Plugins Databases Impala + + diff --git a/plugins/databases/impala/src/assembly/assembly.xml b/plugins/databases/impala/src/assembly/assembly.xml new file mode 100644 index 00000000000..7655bbdbae8 --- /dev/null +++ b/plugins/databases/impala/src/assembly/assembly.xml @@ -0,0 +1,43 @@ + + + + hop-databases-ingres + + zip + + . + + + ${project.basedir}/src/main/resources/version.xml + plugins/databases/impala + true + + + + + + + org.apache.hop:hop-databases-impala:jar + + plugins/databases/impala + + + diff --git a/plugins/databases/impala/src/main/java/org/apache/hop/databases/impala/ImpalaDatabaseMeta.java b/plugins/databases/impala/src/main/java/org/apache/hop/databases/impala/ImpalaDatabaseMeta.java new file mode 100644 index 00000000000..94f29fb44f9 --- /dev/null +++ b/plugins/databases/impala/src/main/java/org/apache/hop/databases/impala/ImpalaDatabaseMeta.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hop.databases.impala; + +import org.apache.commons.lang.StringUtils; +import org.apache.hop.core.Const; +import org.apache.hop.core.database.BaseDatabaseMeta; +import org.apache.hop.core.database.DatabaseMeta; +import org.apache.hop.core.database.DatabaseMetaPlugin; +import org.apache.hop.core.database.IDatabase; +import org.apache.hop.core.gui.plugin.GuiElementType; +import org.apache.hop.core.gui.plugin.GuiPlugin; +import org.apache.hop.core.gui.plugin.GuiWidgetElement; +import org.apache.hop.core.row.IValueMeta; +import org.apache.hop.metadata.api.HopMetadataProperty; + +/** Contains Cloudera Impala specific information through static final members */ +@DatabaseMetaPlugin( + type = "CLOUDERA-IMPALA", + typeDescription = "Cloudera Impala", + documentationUrl = "/database/databases/cloudera-impala") +@GuiPlugin(id = "GUI-ClouderaImpalaDatabaseMeta") +public class ImpalaDatabaseMeta extends BaseDatabaseMeta implements IDatabase { + public static final String CONST_ALTER_TABLE = "ALTER TABLE "; + private static final int VARCHAR_LIMIT = 65_535; + + @GuiWidgetElement( + id = "impalaPrincipal", + order = "10", + parentId = DatabaseMeta.GUI_PLUGIN_ELEMENT_PARENT_ID, + type = GuiElementType.TEXT, + label = "i18n::ImpalaDatabaseMeta.label.PrincipalName", + toolTip = "i18n::ImpalaDatabaseMeta.toolTip.PrincipalName") + @HopMetadataProperty(key = "principal") + private String principalName; + + @Override + public String getDriverClass() { + return "com.cloudera.impala.jdbc.Driver"; + } + + @Override + public int getDefaultDatabasePort() { + return 21050; + } + + public int[] getAccessTypeList() { + return new int[] {DatabaseMeta.TYPE_ACCESS_NATIVE}; + } + + @Override + public boolean isSupportsAutoInc() { + return false; + } + + @Override + public int getMaxTextFieldLength() { + return VARCHAR_LIMIT; + } + + @Override + public int getMaxVARCHARLength() { + return VARCHAR_LIMIT; + } + + @Override + public boolean isRequiresName() { + return false; + } + + @Override + public String getURL(String hostname, String port, String databaseName) { + String url = "jdbc:impala://" + hostname + ":" + port + "/"; + if (StringUtils.isNotEmpty(databaseName)) { + url += databaseName + "/"; + } + + if (StringUtils.isNotEmpty(principalName)) { + // Kerberos auth: add host and port and the principal in the options + // + url += ";principal=" + principalName; + } + + return url; + } + + @Override + public String getAddColumnStatement( + String tableName, + IValueMeta v, + String tk, + boolean useAutoIncrement, + String pk, + boolean semicolon) { + return CONST_ALTER_TABLE + + tableName + + " ADD COLUMN " + + getFieldDefinition(v, tk, pk, useAutoIncrement, true, false); + } + + @Override + public String getModifyColumnStatement( + String tableName, + IValueMeta v, + String tk, + boolean useAutoIncrement, + String pk, + boolean semicolon) { + return CONST_ALTER_TABLE + + tableName + + " MODIFY " + + getFieldDefinition(v, tk, pk, useAutoIncrement, true, false); + } + + @Override + public String getDropColumnStatement( + String tableName, + IValueMeta v, + String tk, + boolean useAutoIncrement, + String pk, + boolean semicolon) { + return CONST_ALTER_TABLE + tableName + " DROP COLUMN " + v.getName(); + } + + @Override + public String getFieldDefinition( + IValueMeta v, + String tk, + String pk, + boolean useAutoIncrement, + boolean addFieldName, + boolean addCR) { + String fieldClause = ""; + + String fieldName = v.getName(); + if (v.getLength() == DatabaseMeta.CLOB_LENGTH) { + v.setLength(getMaxTextFieldLength()); + } + int length = v.getLength(); + int precision = v.getPrecision(); + + if (addFieldName) { + fieldClause += fieldName + " "; + } + + int type = v.getType(); + switch (type) { + case IValueMeta.TYPE_DATE: + case IValueMeta.TYPE_TIMESTAMP: + fieldClause += "TIMESTAMP"; + break; + case IValueMeta.TYPE_BOOLEAN: + if (isSupportsBooleanDataType()) { + fieldClause += "BOOLEAN"; + } else { + fieldClause += "CHAR(1)"; + } + break; + + case IValueMeta.TYPE_NUMBER, IValueMeta.TYPE_INTEGER, IValueMeta.TYPE_BIGNUMBER: + if (fieldName.equalsIgnoreCase(tk) + || // Technical key + fieldName.equalsIgnoreCase(pk) // Primary key + ) { + fieldClause += "BIGINT NOT NULL PRIMARY KEY"; + } else { + // Integer values... + if (precision == 0) { + if (length > 9) { + if (length < 19) { + // can hold signed values between -9223372036854775808 and 9223372036854775807 + // 18 significant digits + fieldClause += "BIGINT"; + } else { + fieldClause += "DECIMAL(" + length + ")"; + } + } else { + fieldClause += "INT"; + } + } else { + // Floating point values... + if (length > 15) { + fieldClause += "DECIMAL(" + length; + if (precision > 0) { + fieldClause += ", " + precision; + } + fieldClause += ")"; + } else { + // A double-precision floating-point number is accurate to approximately 15 decimal + // places. + // The values are stored in 8 bytes, using IEEE 754 Double Precision Binary Floating + // Point format + // + fieldClause += "DOUBLE"; + } + } + } + break; + case IValueMeta.TYPE_STRING: + if (length > 0) { + if (length == 1) { + fieldClause += "CHAR(1)"; + } else if (length < 65536) { + fieldClause += "VARCHAR(" + length + ")"; + } else { + fieldClause += "VARCHAR(65535)"; + } + } else { + fieldClause += "VARCHAR(65535)"; + } + break; + case IValueMeta.TYPE_BINARY: + fieldClause += "!! BINARY DATA TYPE ARE NOT SUPPORTED ON IMPALA !!"; + break; + default: + fieldClause += " !! UNKNOWN DATA TYPE FOR IMPALA !!"; + break; + } + + if (addCR) { + fieldClause += Const.CR; + } + + return fieldClause; + } + + public String getPrincipalName() { + return principalName; + } + + public void setPrincipalName(String principalName) { + this.principalName = principalName; + } +} diff --git a/plugins/databases/impala/src/main/resources/org/apache/hop/databases/impala/messages/messages_en_US.properties b/plugins/databases/impala/src/main/resources/org/apache/hop/databases/impala/messages/messages_en_US.properties new file mode 100644 index 00000000000..5059401ef01 --- /dev/null +++ b/plugins/databases/impala/src/main/resources/org/apache/hop/databases/impala/messages/messages_en_US.properties @@ -0,0 +1,21 @@ +# +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# +ImpalaDatabaseMeta.label.PrincipalName = Principal name +ImpalaDatabaseMeta.toolTip.PrincipalName = Specify the name of the principal to use. diff --git a/plugins/databases/impala/src/main/resources/version.xml b/plugins/databases/impala/src/main/resources/version.xml new file mode 100644 index 00000000000..6be576acae9 --- /dev/null +++ b/plugins/databases/impala/src/main/resources/version.xml @@ -0,0 +1,20 @@ + + + +${project.version} \ No newline at end of file diff --git a/plugins/databases/pom.xml b/plugins/databases/pom.xml index 8189df539c5..e5164d9bfde 100644 --- a/plugins/databases/pom.xml +++ b/plugins/databases/pom.xml @@ -47,6 +47,7 @@ h2 hive hypersonic + impala infobright informix ingres diff --git a/plugins/tech/avro/src/main/java/org/apache/hop/avro/transforms/avroinput/AvroFileInput.java b/plugins/tech/avro/src/main/java/org/apache/hop/avro/transforms/avroinput/AvroFileInput.java index 2584142fa88..213cf72ba9b 100644 --- a/plugins/tech/avro/src/main/java/org/apache/hop/avro/transforms/avroinput/AvroFileInput.java +++ b/plugins/tech/avro/src/main/java/org/apache/hop/avro/transforms/avroinput/AvroFileInput.java @@ -95,7 +95,7 @@ public boolean processRow() throws HopException { try (InputStream inputStream = HopVfs.getInputStream(filename, variables)) { GenericDatumReader datumReader = new GenericDatumReader<>(); DataFileStream fileStream = new DataFileStream<>(inputStream, datumReader); - while (fileStream.hasNext()) { + while (fileStream.hasNext() && !isStopped()) { GenericRecord genericRecord = fileStream.next(); incrementLinesInput();