Rewrote (again), removed tests, split max values between limit types

This commit is contained in:
gil 2024-06-02 23:56:42 -05:00
parent d5bb2be949
commit 5c0dfa367e
18 changed files with 129 additions and 297 deletions

7
Cargo.lock generated
View file

@ -135,9 +135,16 @@ version = "0.1.0"
dependencies = [
"clap",
"env_logger",
"glob",
"log",
]
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "heck"
version = "0.5.0"

View file

@ -7,4 +7,5 @@ description = "Combines blocklists for ActivityPub software"
[dependencies]
clap = { version = "4.5.4", features = ["derive"] }
env_logger = "0.11.3"
glob = "0.3.1"
log = "0.4.21"

6
block.txt Normal file
View file

@ -0,0 +1,6 @@
example.com
example.net
example.org
test.com
test.net
test.org

5
blocks/anothertest.txt Normal file
View file

@ -0,0 +1,5 @@
example.com
example.org
test.net
test.com
test.org

6
blocks/test.txt Normal file
View file

@ -0,0 +1,6 @@
example.net
example.com
example.org
test.net
test.com
test.org

View file

@ -1,2 +1,4 @@
example.net
example.org
test.net
test.org

3
mute.txt Normal file
View file

@ -0,0 +1,3 @@
example.org
test.com
test.net

1
mutes/anothertest.txt Normal file
View file

@ -0,0 +1 @@
example.org

3
mutes/test.txt Normal file
View file

@ -0,0 +1,3 @@
example.org
test.net
test.com

2
mutes/yetanothertest.txt Normal file
View file

@ -0,0 +1,2 @@
example.org
test.net

16
src/cli.rs Normal file
View file

@ -0,0 +1,16 @@
use std::path::PathBuf;
use clap::Parser;
#[derive(Parser)]
#[command(version, about, long_about = None)]
pub struct Cli {
/// Selects a custom config file
pub config: Option<PathBuf>,
/// Sets output directory (optional, defaults to current directory)
#[arg(last = true)]
pub output: Option<PathBuf>,
// TODO more options
// TODO verbose mode
}

View file

@ -1,54 +1,50 @@
// src/main.rs
mod cli;
mod manip;
mod tests;
use std::path::PathBuf;
use glob::glob;
use manip::{Limit, LimitList, MergedLimitList};
use clap::Parser;
use log::error;
#[derive(Parser)]
#[command(version, about, long_about = None)]
struct Cli {
/// Selects a custom config file
config: Option<PathBuf>,
/// Specifies files/directories for blocks
#[arg(short = 'B', long)]
block: Vec<PathBuf>,
/// Specifies files/directories for silences
#[arg(short = 'M', long)]
mute: Vec<PathBuf>,
/// Specifies a source (*.block.txt, *.mute.txt)
#[arg(short = 'S', long)]
src: Vec<PathBuf>,
/// Specifies confidence in a source. Default = 100
#[arg(short, long)]
trust: Vec<u16>,
/// Sets output directory (optional, defaults to current directory)
#[arg(last = true)]
path: Option<PathBuf>,
// TODO more options
// TODO verbose mode
}
fn main() {
fn main() -> std::io::Result<()> {
env_logger::init(); // TODO add more logging
let cli = Cli::parse();
// TODO utilize CLI
// let _cli = cli::Cli::parse();
if cli.block.is_empty() && cli.mute.is_empty() && cli.config.is_none() {
error!("No lists or configuration provided.");
}
// TODO parse config file if one is provided
// TODO read modsources from files
// TODO combine modsources into modmap
// TODO write modmap to files
let mut merged_list = MergedLimitList::new();
// Crawl /blocks for block lists -> glob pattern: "blocks/**/*.txt"
{
let mut blocklists = vec![];
let block_paths = glob("blocks/**/*.txt").expect("Bad glob pattern");
for path in block_paths.filter_map(|x| x.ok()) {
blocklists.extend(LimitList::from_file(path));
// TODO Check if path contains trust value as part of filename when building limitlist
}
for list in blocklists {
merged_list.add_limit_list(list, Limit::Block);
}
}
// Crawl /silences for silence lists -> glob pattern: "mutes/**/*.txt"
{
let mut mutelists = vec![];
let mute_paths = glob("mutes/**/*.txt").expect("Bad glob pattern");
for path in mute_paths.filter_map(|x| x.ok()) {
mutelists.extend(LimitList::from_file(path));
// TODO Check if path contains trust value as part of filename when building limitlist
}
for list in mutelists {
merged_list.add_limit_list(list, Limit::Silence);
}
}
merged_list.export_file("block.txt", "mute.txt", (50, 33))
}

View file

@ -1,4 +1,4 @@
use std::{collections::HashMap, fs};
use std::{collections::HashMap, fs, path::PathBuf};
use log::error;
@ -28,6 +28,13 @@ impl LimitIndices {
}
self
}
pub fn normalize(&self, max: (u16, u16)) -> (u16, u16) {
let block = (self.block as f32) / (max.0 as f32) * 100f32;
let mute = (self.silence as f32) / (max.1 as f32) * 100f32;
(block as u16, mute as u16)
}
}
impl From<(u16, u16)> for LimitIndices {
@ -45,65 +52,52 @@ impl From<(u16, u16)> for LimitIndices {
/// used to weight limits when building a merged list
#[derive(Debug, PartialEq, Eq)]
pub struct LimitList {
pub limits: HashMap<String, Limit>,
pub hosts: Vec<String>,
pub trust: u16,
}
impl Default for LimitList {
fn default() -> Self {
Self {
limits: HashMap::new(),
trust: 100,
}
}
}
impl From<HashMap<String, Limit>> for LimitList {
fn from(map: HashMap<String, Limit>) -> Self {
Self {
limits: map,
trust: 100,
}
}
}
impl LimitList {
fn add_host(&mut self, host: &str, limit: Limit) -> &mut Self {
self.limits.insert(host.to_string(), limit);
self
}
pub fn build(map: HashMap<String, Limit>, trust: u16) -> Self {
let mut src = Self::from(map);
src.trust = trust;
src
}
pub fn import_file(&mut self, path: &str, limit: Limit) -> std::io::Result<&mut Self> {
pub fn from_file(path: PathBuf) -> std::io::Result<Self> {
let mut list = LimitList {
hosts: vec![],
trust: 100,
};
let contents = fs::read_to_string(path)?;
for host in contents.lines().filter(|line| !line.is_empty()) {
self.add_host(host, limit);
list.hosts.push(host.to_string());
}
Ok(self)
Ok(list)
}
}
/// A map of hosts (as strings) to their limit weights
#[derive(Debug, Default, Eq, PartialEq)]
pub struct MergedLimitList {
pub map: HashMap<String, LimitIndices>,
pub max: u16,
pub hostmap: HashMap<String, LimitIndices>,
pub trusts: (u16, u16),
}
impl MergedLimitList {
pub fn add_limit_list(&mut self, src: LimitList) -> &mut Self {
for (host, limit) in src.limits.into_iter() {
let entry = self.map.entry(host).or_default();
entry.add_limit(limit, src.trust);
pub fn new() -> Self {
Self {
hostmap: HashMap::new(),
trusts: (0, 0),
}
}
pub fn add_limit_list(&mut self, src: LimitList, limit_type: Limit) -> &mut Self {
for host in src.hosts.into_iter() {
let entry = self.hostmap.entry(host).or_default();
entry.add_limit(limit_type, src.trust);
}
match limit_type {
Limit::Block => self.trusts.0 += src.trust,
Limit::Silence => self.trusts.1 += src.trust,
}
self.max += src.trust;
self
}
@ -113,7 +107,7 @@ impl MergedLimitList {
mute_path: &str,
indices: (u16, u16),
) -> std::io::Result<()> {
if self.map.is_empty() {
if self.hostmap.is_empty() {
error!("Nothing to export!");
return Ok(());
}
@ -122,8 +116,8 @@ impl MergedLimitList {
let mut block_output: String = String::default();
let mut mute_output: String = String::default();
for item in self.map.into_iter() {
let (block_trust, mute_trust) = (item.1.block, item.1.silence);
for item in self.hostmap.into_iter() {
let (block_trust, mute_trust) = item.1.normalize(self.trusts);
if block_trust >= block_thresh {
block_output.push_str(&(item.0.clone() + "\n"));

View file

@ -1 +0,0 @@
mod manip;

View file

@ -1,200 +0,0 @@
#![cfg(test)]
use std::{collections::HashMap, fs};
use crate::manip::*;
#[test]
fn limit_add() {
let mut at = LimitIndices::default();
at.add_limit(Limit::Block, 123)
.add_limit(Limit::Silence, 456);
let test_at = LimitIndices {
block: 123,
silence: 456,
};
assert_eq!(at, test_at);
}
#[test]
fn limit_combine() {
let mut at = LimitIndices::default();
at.add_limit(Limit::Block, 123)
.add_limit(Limit::Block, 333)
.add_limit(Limit::Silence, 123);
let test_at = LimitIndices {
block: 456,
silence: 123,
};
assert_eq!(at, test_at);
}
#[test]
fn limitlist_from_map() {
let src1 = LimitList::from(HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.org"), Limit::Silence),
(String::from("example.net"), Limit::Block),
]));
assert_eq!(
src1.limits,
HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.org"), Limit::Silence),
(String::from("example.net"), Limit::Block),
])
);
assert_eq!(src1.trust, 100);
}
#[test]
fn limitlist_from_map_and_trust() {
let src2 = LimitList::build(
HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.org"), Limit::Silence),
(String::from("example.net"), Limit::Block),
]),
123,
);
assert_eq!(
src2.limits,
HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.org"), Limit::Silence),
(String::from("example.net"), Limit::Block),
])
);
assert_eq!(src2.trust, 123);
}
#[test]
fn limitlist_from_file() -> std::io::Result<()> {
let mut src = LimitList::default();
src.import_file("test/example_blocklist.txt", Limit::Block)?
.import_file("test/example_mutelist.txt", Limit::Silence)?;
let test_src = LimitList::from(HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.org"), Limit::Block),
(String::from("example.net"), Limit::Silence),
]));
assert_eq!(test_src, src);
Ok(())
}
#[test]
fn mergedlist_from_limitlist() -> std::io::Result<()> {
let mut ml = MergedLimitList::default();
let src1 = LimitList::from(HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.org"), Limit::Silence),
(String::from("example.net"), Limit::Block),
]));
let mut src2 = LimitList::default();
src2.import_file("test/example_blocklist.txt", Limit::Block)?
.import_file("test/example_mutelist.txt", Limit::Silence)?;
ml.add_limit_list(src1).add_limit_list(src2);
let test_ml = MergedLimitList {
map: HashMap::from([
(String::from("example.com"), LimitIndices::from((200, 0))),
(String::from("example.org"), LimitIndices::from((100, 100))),
(String::from("example.net"), LimitIndices::from((100, 100))),
]),
max: 200,
};
assert_eq!(ml, test_ml);
let src3 = LimitList::build(
HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.org"), Limit::Silence),
]),
200,
);
let src4 = LimitList::build(
HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.net"), Limit::Silence),
]),
50,
);
ml.add_limit_list(src3).add_limit_list(src4);
let test_ml = MergedLimitList {
map: HashMap::from([
(String::from("example.com"), LimitIndices::from((450, 0))),
(String::from("example.org"), LimitIndices::from((100, 300))),
(String::from("example.net"), LimitIndices::from((100, 150))),
]),
max: 450,
};
assert_eq!(ml, test_ml);
Ok(())
}
#[test]
fn mergedlist_export_txt() -> std::io::Result<()> {
let mut ml = MergedLimitList::default();
let src1 = LimitList::from(HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.org"), Limit::Silence),
(String::from("example.net"), Limit::Block),
]));
let mut src2 = LimitList::default();
src2.import_file("test/example_blocklist.txt", Limit::Block)?
.import_file("test/example_mutelist.txt", Limit::Silence)?;
let src3 = LimitList::build(
HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.org"), Limit::Silence),
]),
200,
);
let src4 = LimitList::build(
HashMap::from([
(String::from("example.com"), Limit::Block),
(String::from("example.net"), Limit::Silence),
]),
50,
);
ml.add_limit_list(src1)
.add_limit_list(src2)
.add_limit_list(src3)
.add_limit_list(src4);
let _ = ml.export_file("test/test_blocks.txt", "test/test_mutes.txt", (200, 150));
let file_blocks: String = fs::read_to_string("test/test_blocks.txt")?;
let file_mutes: String = fs::read_to_string("test/test_mutes.txt")?;
assert_eq!(file_blocks, "example.com\n");
assert_eq!(file_mutes, "example.net\nexample.org\n");
Ok(())
}

View file

@ -1,5 +0,0 @@
example.com
example.org

View file

@ -1,3 +0,0 @@
example.net

View file

@ -1 +0,0 @@
example.com