From 60453c623957fbef78fe13ae47888a8f8570bba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20N=C3=BCtzi?= Date: Fri, 12 Jul 2024 10:30:43 +0200 Subject: [PATCH] fix: correct all clippy errors --- src/crypto.rs | 20 +++++------------- src/io.rs | 6 ++---- src/log.rs | 1 - src/model.rs | 1 - src/pass_first.rs | 52 ++++++++++++++++++++++++++++------------------ src/pass_second.rs | 14 ++++++------- src/rdf_types.rs | 33 ++++++++++++----------------- src/rules.rs | 18 +++++++--------- tools/lint-rust.sh | 2 +- 9 files changed, 67 insertions(+), 80 deletions(-) diff --git a/src/crypto.rs b/src/crypto.rs index 500ad17..162858e 100644 --- a/src/crypto.rs +++ b/src/crypto.rs @@ -1,6 +1,5 @@ use super::model::Entity; use crate::{model::TripleMask, rdf_types::*}; -use blake3; pub trait Pseudonymize { // Pseudonymize parts of a triple set by its mask @@ -26,9 +25,9 @@ pub trait Pseudonymize { fn pseudo_entity(&self, e: &Entity) -> Entity { match e { - Entity::Literal(l) => Entity::Literal(self.pseudo_literal(&l)), - Entity::NamedNode(n) => Entity::NamedNode(self.pseudo_named_node(&n)), - Entity::BlankNode(b) => Entity::BlankNode(self.pseudo_blank_node(&b)), + Entity::Literal(l) => Entity::Literal(self.pseudo_literal(l)), + Entity::NamedNode(n) => Entity::NamedNode(self.pseudo_named_node(n)), + Entity::BlankNode(b) => Entity::BlankNode(self.pseudo_blank_node(b)), } } // private methods? Blanket implementations @@ -42,17 +41,8 @@ pub trait Pseudonymize { // return u.clone() } -pub struct DefaultHasher { - hasher: blake3::Hasher, -} - -impl DefaultHasher { - pub fn new() -> Self { - return DefaultHasher { - hasher: blake3::Hasher::new(), - }; - } -} +#[derive(Default)] +pub struct DefaultHasher {} impl Pseudonymize for DefaultHasher { fn pseudo_named_node(&self, t: &NamedNode) -> NamedNode { diff --git a/src/io.rs b/src/io.rs index 36e1cca..9f84fe6 100644 --- a/src/io.rs +++ b/src/io.rs @@ -1,10 +1,8 @@ use crate::rules::Rules; use rio_turtle::NTriplesParser; -use serde_yml; use std::{ - boxed::Box, fs::File, - io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Write}, + io::{self, stdin, stdout, BufRead, BufReader, BufWriter}, path::Path, }; @@ -46,7 +44,7 @@ pub fn parse_ntriples(reader: impl BufRead) -> NTriplesParser { // Parse yaml configuration file. pub fn parse_config(path: &Path) -> Rules { - return match File::open(&path) { + return match File::open(path) { Ok(file) => serde_yml::from_reader(file).expect("Error parsing config file."), Err(e) => panic!("Cannot open file '{:?}': '{}'.", path, e), }; diff --git a/src/log.rs b/src/log.rs index eb5a10b..d766846 100644 --- a/src/log.rs +++ b/src/log.rs @@ -1,5 +1,4 @@ use slog::{self, o, Drain}; -use slog_async; use std::{io, sync::Arc}; pub type Logger = slog::Logger; diff --git a/src/model.rs b/src/model.rs index 8b164fb..2567a36 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,7 +1,6 @@ use std::hash::Hash; use crate::rdf_types::*; -use bitflags; #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum Entity { diff --git a/src/pass_first.rs b/src/pass_first.rs index 148db6d..64b0f6a 100644 --- a/src/pass_first.rs +++ b/src/pass_first.rs @@ -1,30 +1,42 @@ -use rio_api::{model::Triple, parser::TriplesParser}; +use rayon::prelude::*; +use rio_api::parser::TriplesParser; use rio_turtle::TurtleError; -use std::{ - io::{stdin, BufRead, BufReader, Write}, - path::Path, -}; +use std::{io::Write, path::Path, sync::Mutex}; -use crate::io; +use crate::{ + io, + rdf_types::{Triple, TripleView}, +}; -fn index_triple(t: Triple, out: &mut impl Write) -> Result<(), TurtleError> { - match t.predicate.iri { - "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" => { - let _ = out.write(&format!("{} .\n", &t.to_string()).into_bytes()); +fn index_triple(t: Triple, out: &mut impl Write) { + if t.predicate.iri.as_str() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" { + if let Err(e) = out.write(&format!("{} .\n", &t.to_string()).into_bytes()) { + panic!("Error writting to out buffer: {e}"); } - _ => {} } - - Ok(()) } pub fn create_type_map(input: &Path, output: &Path) { let buf_in = io::get_reader(input); - let mut buf_out = io::get_writer(output); - let mut triples = io::parse_ntriples(buf_in); - while !triples.is_end() { - triples - .parse_step(&mut |t| index_triple(t, &mut buf_out)) - .unwrap(); - } + let buf_out = Mutex::new(io::get_writer(output)); + let triples = io::parse_ntriples(buf_in); + + // Make a parallel triple iterator over `rdf_types::Triple`. + // We have to wrap the `buf_out` with a `Mutex` to make it + // writable by multiple threads. + // NOTE: Weird `rio_api::into_iter` implementation, why does it use a full-blown + // `Vec`, this could be simpler. + // + let it = triples + .into_iter(|t: TripleView| Result::::Ok(t.into())) + .par_bridge(); + + // Iterate in parallel over the triples. + it.for_each(|t| match t { + Ok(t) => { + let mut guard = buf_out.lock().unwrap(); + index_triple(t, guard.by_ref()) + } + Err(t) => panic!("Parsing error occured: {t}"), + }) } diff --git a/src/pass_second.rs b/src/pass_second.rs index 7843933..e4a950e 100644 --- a/src/pass_second.rs +++ b/src/pass_second.rs @@ -2,7 +2,6 @@ use rio_api::parser::TriplesParser; use rio_turtle::TurtleError; use std::{ collections::HashMap, - fmt::{Debug, Display}, io::{BufRead, Write}, path::Path, }; @@ -39,10 +38,9 @@ fn process_triple( node_to_type: &HashMap, out: &mut impl Write, ) -> Result<(), TurtleError> { - let mask = match_rules(triple.clone(), &rules_config, &node_to_type); - let hasher = DefaultHasher::new(); - let _ = - out.write(&format!("{} .\n", hasher.pseudo_triple(&triple, mask).to_string()).into_bytes()); + let mask = match_rules(triple.clone(), rules_config, node_to_type); + let hasher = DefaultHasher::default(); + let _ = out.write(&format!("{} .\n", hasher.pseudo_triple(&triple, mask)).into_bytes()); Ok(()) } @@ -55,8 +53,8 @@ fn load_type_map(input: impl BufRead) -> HashMap { while !triples.is_end() { let _: Result<(), TurtleError> = triples.parse_step(&mut |t| { node_to_type.insert( - t.subject.to_string().replace(&['<', '>'], ""), - t.object.to_string().replace(&['<', '>'], ""), + t.subject.to_string().replace(['<', '>'], ""), + t.object.to_string().replace(['<', '>'], ""), ); Ok(()) }); @@ -65,7 +63,7 @@ fn load_type_map(input: impl BufRead) -> HashMap { return node_to_type; } -pub fn pseudonymize_graph(log: &Logger, input: &Path, config: &Path, output: &Path, index: &Path) { +pub fn pseudonymize_graph(_: &Logger, input: &Path, config: &Path, output: &Path, index: &Path) { let buf_input = io::get_reader(input); let buf_index = io::get_reader(index); let mut buf_output = io::get_writer(output); diff --git a/src/rdf_types.rs b/src/rdf_types.rs index 1f659a6..8cf9b2a 100644 --- a/src/rdf_types.rs +++ b/src/rdf_types.rs @@ -1,18 +1,17 @@ -use super::model::{Entity, TripleMask}; -use rio_api; -use std::{fmt, fmt::Write, ops::Sub}; +use super::model::Entity; +use std::{fmt, fmt::Write}; // Rewrite all the rio types to be able to instanciate triples // Rename rio types as XXXView to distinguish them from our types // Use rio types for parsing and serializing // Define mappers between the two types // -type NamedNodeView<'a> = rio_api::model::NamedNode<'a>; -type LiteralView<'a> = rio_api::model::Literal<'a>; -type TermView<'a> = rio_api::model::Term<'a>; -type TripleView<'a> = rio_api::model::Triple<'a>; -type BlankNodeView<'a> = rio_api::model::BlankNode<'a>; -type SubjectView<'a> = rio_api::model::Subject<'a>; +pub type NamedNodeView<'a> = rio_api::model::NamedNode<'a>; +pub type LiteralView<'a> = rio_api::model::Literal<'a>; +pub type TermView<'a> = rio_api::model::Term<'a>; +pub type TripleView<'a> = rio_api::model::Triple<'a>; +pub type BlankNodeView<'a> = rio_api::model::BlankNode<'a>; +pub type SubjectView<'a> = rio_api::model::Subject<'a>; #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct Triple { @@ -130,16 +129,10 @@ impl fmt::Display for BlankNode { impl<'a> From> for Triple { fn from(t: TripleView<'a>) -> Self { - match t { - TripleView { - subject, - predicate, - object, - } => Triple { - subject: subject.into(), - predicate: predicate.into(), - object: object.into(), - }, + Triple { + subject: t.subject.into(), + predicate: t.predicate.into(), + object: t.object.into(), } } } @@ -247,7 +240,7 @@ impl From for Term { } #[inline] -fn fmt_quoted_str(string: &String, f: &mut fmt::Formatter<'_>) -> fmt::Result { +fn fmt_quoted_str(string: &str, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_char('"')?; for c in string.chars() { match c { diff --git a/src/rules.rs b/src/rules.rs index 134f0a6..0cf55e7 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -50,7 +50,7 @@ pub fn match_type_rule_subject( ) -> TripleMask { match subject { Subject::NamedNode(n) => { - return mask | match_type_rule_named_node(true, &n, mask, rules, type_map); + return mask | match_type_rule_named_node(true, n, mask, rules, type_map); } Subject::BlankNode(_) => return mask, } @@ -64,21 +64,19 @@ pub fn match_type_rule_object( ) -> TripleMask { match object { Term::NamedNode(n) => { - return mask | match_type_rule_named_node(false, &n, mask, rules, type_map); + return mask | match_type_rule_named_node(false, n, mask, rules, type_map); } _ => return mask, } } pub fn match_predicate_rule(predicate: &NamedNode, mask: TripleMask, rules: &Rules) -> TripleMask { - match predicate { - NamedNode { iri: n } => { - if rules.replace_value_of_predicate.contains(n) { - return mask | TripleMask::OBJECT; - } else { - return mask; - } - } + let NamedNode { iri: i } = predicate; + + if rules.replace_value_of_predicate.contains(i) { + return mask | TripleMask::OBJECT; + } else { + return mask; } } diff --git a/tools/lint-rust.sh b/tools/lint-rust.sh index d81569f..3c8c6c8 100755 --- a/tools/lint-rust.sh +++ b/tools/lint-rust.sh @@ -14,7 +14,7 @@ cargo clippy --version print_info "Run Rust Clippy linter." print_warning "Currently warnings are not errors!" -cargo clippy --no-deps -- -A clippy::needless_return "$@" || +cargo clippy --no-deps -- -D warnings -A clippy::needless_return "$@" || { git diff --name-status || true die "Rust clippy failed."