diff --git a/Cargo.lock b/Cargo.lock index 31b72ff..fcfcbac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -135,9 +135,16 @@ version = "0.1.0" dependencies = [ "clap", "env_logger", + "glob", "log", ] +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "heck" version = "0.5.0" diff --git a/Cargo.toml b/Cargo.toml index bc37b1d..19e89a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,4 +7,5 @@ description = "Combines blocklists for ActivityPub software" [dependencies] clap = { version = "4.5.4", features = ["derive"] } env_logger = "0.11.3" +glob = "0.3.1" log = "0.4.21" diff --git a/block.txt b/block.txt new file mode 100644 index 0000000..6bfed29 --- /dev/null +++ b/block.txt @@ -0,0 +1,6 @@ +example.com +example.net +example.org +test.com +test.net +test.org diff --git a/blocks/anothertest.txt b/blocks/anothertest.txt new file mode 100644 index 0000000..b640eaa --- /dev/null +++ b/blocks/anothertest.txt @@ -0,0 +1,5 @@ +example.com +example.org +test.net +test.com +test.org diff --git a/blocks/test.txt b/blocks/test.txt new file mode 100644 index 0000000..dc71069 --- /dev/null +++ b/blocks/test.txt @@ -0,0 +1,6 @@ +example.net +example.com +example.org +test.net +test.com +test.org diff --git a/test/test_mutes.txt b/blocks/yetanothertest.txt similarity index 57% rename from test/test_mutes.txt rename to blocks/yetanothertest.txt index db90029..8e3dfb3 100644 --- a/test/test_mutes.txt +++ b/blocks/yetanothertest.txt @@ -1,2 +1,4 @@ example.net example.org +test.net +test.org diff --git a/mute.txt b/mute.txt new file mode 100644 index 0000000..5aa05d6 --- /dev/null +++ b/mute.txt @@ -0,0 +1,3 @@ +example.org +test.com +test.net diff --git a/mutes/anothertest.txt b/mutes/anothertest.txt new file mode 100644 index 0000000..5778335 --- /dev/null +++ b/mutes/anothertest.txt @@ -0,0 +1 @@ +example.org diff --git a/mutes/test.txt b/mutes/test.txt new file mode 100644 index 0000000..37c7591 --- /dev/null +++ b/mutes/test.txt @@ -0,0 +1,3 @@ +example.org +test.net +test.com diff --git a/mutes/yetanothertest.txt b/mutes/yetanothertest.txt new file mode 100644 index 0000000..5e686a1 --- /dev/null +++ b/mutes/yetanothertest.txt @@ -0,0 +1,2 @@ +example.org +test.net diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..ff4bdde --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,16 @@ +use std::path::PathBuf; + +use clap::Parser; + +#[derive(Parser)] +#[command(version, about, long_about = None)] +pub struct Cli { + /// Selects a custom config file + pub config: Option, + + /// Sets output directory (optional, defaults to current directory) + #[arg(last = true)] + pub output: Option, + // TODO more options + // TODO verbose mode +} diff --git a/src/main.rs b/src/main.rs index 37fe027..cba3a55 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,54 +1,50 @@ // src/main.rs +mod cli; mod manip; -mod tests; -use std::path::PathBuf; +use glob::glob; +use manip::{Limit, LimitList, MergedLimitList}; -use clap::Parser; -use log::error; - -#[derive(Parser)] -#[command(version, about, long_about = None)] -struct Cli { - /// Selects a custom config file - config: Option, - - /// Specifies files/directories for blocks - #[arg(short = 'B', long)] - block: Vec, - - /// Specifies files/directories for silences - #[arg(short = 'M', long)] - mute: Vec, - - /// Specifies a source (*.block.txt, *.mute.txt) - #[arg(short = 'S', long)] - src: Vec, - - /// Specifies confidence in a source. Default = 100 - #[arg(short, long)] - trust: Vec, - - /// Sets output directory (optional, defaults to current directory) - #[arg(last = true)] - path: Option, - // TODO more options - // TODO verbose mode -} - -fn main() { +fn main() -> std::io::Result<()> { env_logger::init(); // TODO add more logging - let cli = Cli::parse(); + // TODO utilize CLI + // let _cli = cli::Cli::parse(); - if cli.block.is_empty() && cli.mute.is_empty() && cli.config.is_none() { - error!("No lists or configuration provided."); - } - // TODO parse config file if one is provided - // TODO read modsources from files - // TODO combine modsources into modmap - // TODO write modmap to files + let mut merged_list = MergedLimitList::new(); + + // Crawl /blocks for block lists -> glob pattern: "blocks/**/*.txt" + { + let mut blocklists = vec![]; + let block_paths = glob("blocks/**/*.txt").expect("Bad glob pattern"); + + for path in block_paths.filter_map(|x| x.ok()) { + blocklists.extend(LimitList::from_file(path)); + // TODO Check if path contains trust value as part of filename when building limitlist + } + + for list in blocklists { + merged_list.add_limit_list(list, Limit::Block); + } + } + + // Crawl /silences for silence lists -> glob pattern: "mutes/**/*.txt" + { + let mut mutelists = vec![]; + let mute_paths = glob("mutes/**/*.txt").expect("Bad glob pattern"); + + for path in mute_paths.filter_map(|x| x.ok()) { + mutelists.extend(LimitList::from_file(path)); + // TODO Check if path contains trust value as part of filename when building limitlist + } + + for list in mutelists { + merged_list.add_limit_list(list, Limit::Silence); + } + } + + merged_list.export_file("block.txt", "mute.txt", (50, 33)) } diff --git a/src/manip.rs b/src/manip.rs index c9b252e..60d61a7 100644 --- a/src/manip.rs +++ b/src/manip.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, fs}; +use std::{collections::HashMap, fs, path::PathBuf}; use log::error; @@ -28,6 +28,13 @@ impl LimitIndices { } self } + + pub fn normalize(&self, max: (u16, u16)) -> (u16, u16) { + let block = (self.block as f32) / (max.0 as f32) * 100f32; + let mute = (self.silence as f32) / (max.1 as f32) * 100f32; + + (block as u16, mute as u16) + } } impl From<(u16, u16)> for LimitIndices { @@ -45,65 +52,52 @@ impl From<(u16, u16)> for LimitIndices { /// used to weight limits when building a merged list #[derive(Debug, PartialEq, Eq)] pub struct LimitList { - pub limits: HashMap, + pub hosts: Vec, pub trust: u16, } -impl Default for LimitList { - fn default() -> Self { - Self { - limits: HashMap::new(), - trust: 100, - } - } -} - -impl From> for LimitList { - fn from(map: HashMap) -> Self { - Self { - limits: map, - trust: 100, - } - } -} - impl LimitList { - fn add_host(&mut self, host: &str, limit: Limit) -> &mut Self { - self.limits.insert(host.to_string(), limit); - self - } - - pub fn build(map: HashMap, trust: u16) -> Self { - let mut src = Self::from(map); - src.trust = trust; - src - } - - pub fn import_file(&mut self, path: &str, limit: Limit) -> std::io::Result<&mut Self> { + pub fn from_file(path: PathBuf) -> std::io::Result { + let mut list = LimitList { + hosts: vec![], + trust: 100, + }; let contents = fs::read_to_string(path)?; + for host in contents.lines().filter(|line| !line.is_empty()) { - self.add_host(host, limit); + list.hosts.push(host.to_string()); } - Ok(self) + Ok(list) } } /// A map of hosts (as strings) to their limit weights #[derive(Debug, Default, Eq, PartialEq)] pub struct MergedLimitList { - pub map: HashMap, - pub max: u16, + pub hostmap: HashMap, + pub trusts: (u16, u16), } impl MergedLimitList { - pub fn add_limit_list(&mut self, src: LimitList) -> &mut Self { - for (host, limit) in src.limits.into_iter() { - let entry = self.map.entry(host).or_default(); - entry.add_limit(limit, src.trust); + pub fn new() -> Self { + Self { + hostmap: HashMap::new(), + trusts: (0, 0), + } + } + + pub fn add_limit_list(&mut self, src: LimitList, limit_type: Limit) -> &mut Self { + for host in src.hosts.into_iter() { + let entry = self.hostmap.entry(host).or_default(); + entry.add_limit(limit_type, src.trust); + } + + match limit_type { + Limit::Block => self.trusts.0 += src.trust, + Limit::Silence => self.trusts.1 += src.trust, } - self.max += src.trust; self } @@ -113,7 +107,7 @@ impl MergedLimitList { mute_path: &str, indices: (u16, u16), ) -> std::io::Result<()> { - if self.map.is_empty() { + if self.hostmap.is_empty() { error!("Nothing to export!"); return Ok(()); } @@ -122,8 +116,8 @@ impl MergedLimitList { let mut block_output: String = String::default(); let mut mute_output: String = String::default(); - for item in self.map.into_iter() { - let (block_trust, mute_trust) = (item.1.block, item.1.silence); + for item in self.hostmap.into_iter() { + let (block_trust, mute_trust) = item.1.normalize(self.trusts); if block_trust >= block_thresh { block_output.push_str(&(item.0.clone() + "\n")); diff --git a/src/tests.rs b/src/tests.rs deleted file mode 100644 index 5ac1536..0000000 --- a/src/tests.rs +++ /dev/null @@ -1 +0,0 @@ -mod manip; \ No newline at end of file diff --git a/src/tests/manip.rs b/src/tests/manip.rs deleted file mode 100644 index 3ea0565..0000000 --- a/src/tests/manip.rs +++ /dev/null @@ -1,200 +0,0 @@ -#![cfg(test)] - -use std::{collections::HashMap, fs}; - -use crate::manip::*; - -#[test] -fn limit_add() { - let mut at = LimitIndices::default(); - - at.add_limit(Limit::Block, 123) - .add_limit(Limit::Silence, 456); - - let test_at = LimitIndices { - block: 123, - silence: 456, - }; - - assert_eq!(at, test_at); -} - -#[test] -fn limit_combine() { - let mut at = LimitIndices::default(); - - at.add_limit(Limit::Block, 123) - .add_limit(Limit::Block, 333) - .add_limit(Limit::Silence, 123); - - let test_at = LimitIndices { - block: 456, - silence: 123, - }; - - assert_eq!(at, test_at); -} - -#[test] -fn limitlist_from_map() { - let src1 = LimitList::from(HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.org"), Limit::Silence), - (String::from("example.net"), Limit::Block), - ])); - - assert_eq!( - src1.limits, - HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.org"), Limit::Silence), - (String::from("example.net"), Limit::Block), - ]) - ); - - assert_eq!(src1.trust, 100); -} - -#[test] -fn limitlist_from_map_and_trust() { - let src2 = LimitList::build( - HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.org"), Limit::Silence), - (String::from("example.net"), Limit::Block), - ]), - 123, - ); - - assert_eq!( - src2.limits, - HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.org"), Limit::Silence), - (String::from("example.net"), Limit::Block), - ]) - ); - assert_eq!(src2.trust, 123); -} - -#[test] -fn limitlist_from_file() -> std::io::Result<()> { - let mut src = LimitList::default(); - src.import_file("test/example_blocklist.txt", Limit::Block)? - .import_file("test/example_mutelist.txt", Limit::Silence)?; - - let test_src = LimitList::from(HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.org"), Limit::Block), - (String::from("example.net"), Limit::Silence), - ])); - - assert_eq!(test_src, src); - - Ok(()) -} - -#[test] -fn mergedlist_from_limitlist() -> std::io::Result<()> { - let mut ml = MergedLimitList::default(); - - let src1 = LimitList::from(HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.org"), Limit::Silence), - (String::from("example.net"), Limit::Block), - ])); - - let mut src2 = LimitList::default(); - src2.import_file("test/example_blocklist.txt", Limit::Block)? - .import_file("test/example_mutelist.txt", Limit::Silence)?; - - ml.add_limit_list(src1).add_limit_list(src2); - - let test_ml = MergedLimitList { - map: HashMap::from([ - (String::from("example.com"), LimitIndices::from((200, 0))), - (String::from("example.org"), LimitIndices::from((100, 100))), - (String::from("example.net"), LimitIndices::from((100, 100))), - ]), - max: 200, - }; - - assert_eq!(ml, test_ml); - - let src3 = LimitList::build( - HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.org"), Limit::Silence), - ]), - 200, - ); - - let src4 = LimitList::build( - HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.net"), Limit::Silence), - ]), - 50, - ); - - ml.add_limit_list(src3).add_limit_list(src4); - - let test_ml = MergedLimitList { - map: HashMap::from([ - (String::from("example.com"), LimitIndices::from((450, 0))), - (String::from("example.org"), LimitIndices::from((100, 300))), - (String::from("example.net"), LimitIndices::from((100, 150))), - ]), - max: 450, - }; - - assert_eq!(ml, test_ml); - - Ok(()) -} - -#[test] -fn mergedlist_export_txt() -> std::io::Result<()> { - let mut ml = MergedLimitList::default(); - - let src1 = LimitList::from(HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.org"), Limit::Silence), - (String::from("example.net"), Limit::Block), - ])); - - let mut src2 = LimitList::default(); - src2.import_file("test/example_blocklist.txt", Limit::Block)? - .import_file("test/example_mutelist.txt", Limit::Silence)?; - - let src3 = LimitList::build( - HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.org"), Limit::Silence), - ]), - 200, - ); - - let src4 = LimitList::build( - HashMap::from([ - (String::from("example.com"), Limit::Block), - (String::from("example.net"), Limit::Silence), - ]), - 50, - ); - - ml.add_limit_list(src1) - .add_limit_list(src2) - .add_limit_list(src3) - .add_limit_list(src4); - - let _ = ml.export_file("test/test_blocks.txt", "test/test_mutes.txt", (200, 150)); - - let file_blocks: String = fs::read_to_string("test/test_blocks.txt")?; - let file_mutes: String = fs::read_to_string("test/test_mutes.txt")?; - - assert_eq!(file_blocks, "example.com\n"); - assert_eq!(file_mutes, "example.net\nexample.org\n"); - - Ok(()) -} diff --git a/test/example_blocklist.txt b/test/example_blocklist.txt deleted file mode 100644 index 9abbe58..0000000 --- a/test/example_blocklist.txt +++ /dev/null @@ -1,5 +0,0 @@ -example.com - -example.org - - diff --git a/test/example_mutelist.txt b/test/example_mutelist.txt deleted file mode 100644 index f3ec5c9..0000000 --- a/test/example_mutelist.txt +++ /dev/null @@ -1,3 +0,0 @@ - - -example.net diff --git a/test/test_blocks.txt b/test/test_blocks.txt deleted file mode 100644 index de54ac6..0000000 --- a/test/test_blocks.txt +++ /dev/null @@ -1 +0,0 @@ -example.com