Rewrote (again), removed tests, split max values between limit types
This commit is contained in:
parent
d5bb2be949
commit
5c0dfa367e
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -135,9 +135,16 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"clap",
|
||||
"env_logger",
|
||||
"glob",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
|
|
|
@ -7,4 +7,5 @@ description = "Combines blocklists for ActivityPub software"
|
|||
[dependencies]
|
||||
clap = { version = "4.5.4", features = ["derive"] }
|
||||
env_logger = "0.11.3"
|
||||
glob = "0.3.1"
|
||||
log = "0.4.21"
|
||||
|
|
6
block.txt
Normal file
6
block.txt
Normal file
|
@ -0,0 +1,6 @@
|
|||
example.com
|
||||
example.net
|
||||
example.org
|
||||
test.com
|
||||
test.net
|
||||
test.org
|
5
blocks/anothertest.txt
Normal file
5
blocks/anothertest.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
example.com
|
||||
example.org
|
||||
test.net
|
||||
test.com
|
||||
test.org
|
6
blocks/test.txt
Normal file
6
blocks/test.txt
Normal file
|
@ -0,0 +1,6 @@
|
|||
example.net
|
||||
example.com
|
||||
example.org
|
||||
test.net
|
||||
test.com
|
||||
test.org
|
|
@ -1,2 +1,4 @@
|
|||
example.net
|
||||
example.org
|
||||
test.net
|
||||
test.org
|
1
mutes/anothertest.txt
Normal file
1
mutes/anothertest.txt
Normal file
|
@ -0,0 +1 @@
|
|||
example.org
|
3
mutes/test.txt
Normal file
3
mutes/test.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
example.org
|
||||
test.net
|
||||
test.com
|
2
mutes/yetanothertest.txt
Normal file
2
mutes/yetanothertest.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
example.org
|
||||
test.net
|
16
src/cli.rs
Normal file
16
src/cli.rs
Normal file
|
@ -0,0 +1,16 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(version, about, long_about = None)]
|
||||
pub struct Cli {
|
||||
/// Selects a custom config file
|
||||
pub config: Option<PathBuf>,
|
||||
|
||||
/// Sets output directory (optional, defaults to current directory)
|
||||
#[arg(last = true)]
|
||||
pub output: Option<PathBuf>,
|
||||
// TODO more options
|
||||
// TODO verbose mode
|
||||
}
|
82
src/main.rs
82
src/main.rs
|
@ -1,54 +1,50 @@
|
|||
// src/main.rs
|
||||
|
||||
mod cli;
|
||||
mod manip;
|
||||
mod tests;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use glob::glob;
|
||||
use manip::{Limit, LimitList, MergedLimitList};
|
||||
|
||||
use clap::Parser;
|
||||
use log::error;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(version, about, long_about = None)]
|
||||
struct Cli {
|
||||
/// Selects a custom config file
|
||||
config: Option<PathBuf>,
|
||||
|
||||
/// Specifies files/directories for blocks
|
||||
#[arg(short = 'B', long)]
|
||||
block: Vec<PathBuf>,
|
||||
|
||||
/// Specifies files/directories for silences
|
||||
#[arg(short = 'M', long)]
|
||||
mute: Vec<PathBuf>,
|
||||
|
||||
/// Specifies a source (*.block.txt, *.mute.txt)
|
||||
#[arg(short = 'S', long)]
|
||||
src: Vec<PathBuf>,
|
||||
|
||||
/// Specifies confidence in a source. Default = 100
|
||||
#[arg(short, long)]
|
||||
trust: Vec<u16>,
|
||||
|
||||
/// Sets output directory (optional, defaults to current directory)
|
||||
#[arg(last = true)]
|
||||
path: Option<PathBuf>,
|
||||
// TODO more options
|
||||
// TODO verbose mode
|
||||
}
|
||||
|
||||
fn main() {
|
||||
fn main() -> std::io::Result<()> {
|
||||
env_logger::init(); // TODO add more logging
|
||||
|
||||
let cli = Cli::parse();
|
||||
// TODO utilize CLI
|
||||
// let _cli = cli::Cli::parse();
|
||||
|
||||
if cli.block.is_empty() && cli.mute.is_empty() && cli.config.is_none() {
|
||||
error!("No lists or configuration provided.");
|
||||
}
|
||||
|
||||
// TODO parse config file if one is provided
|
||||
|
||||
// TODO read modsources from files
|
||||
// TODO combine modsources into modmap
|
||||
// TODO write modmap to files
|
||||
let mut merged_list = MergedLimitList::new();
|
||||
|
||||
// Crawl /blocks for block lists -> glob pattern: "blocks/**/*.txt"
|
||||
{
|
||||
let mut blocklists = vec![];
|
||||
let block_paths = glob("blocks/**/*.txt").expect("Bad glob pattern");
|
||||
|
||||
for path in block_paths.filter_map(|x| x.ok()) {
|
||||
blocklists.extend(LimitList::from_file(path));
|
||||
// TODO Check if path contains trust value as part of filename when building limitlist
|
||||
}
|
||||
|
||||
for list in blocklists {
|
||||
merged_list.add_limit_list(list, Limit::Block);
|
||||
}
|
||||
}
|
||||
|
||||
// Crawl /silences for silence lists -> glob pattern: "mutes/**/*.txt"
|
||||
{
|
||||
let mut mutelists = vec![];
|
||||
let mute_paths = glob("mutes/**/*.txt").expect("Bad glob pattern");
|
||||
|
||||
for path in mute_paths.filter_map(|x| x.ok()) {
|
||||
mutelists.extend(LimitList::from_file(path));
|
||||
// TODO Check if path contains trust value as part of filename when building limitlist
|
||||
}
|
||||
|
||||
for list in mutelists {
|
||||
merged_list.add_limit_list(list, Limit::Silence);
|
||||
}
|
||||
}
|
||||
|
||||
merged_list.export_file("block.txt", "mute.txt", (50, 33))
|
||||
}
|
||||
|
|
82
src/manip.rs
82
src/manip.rs
|
@ -1,4 +1,4 @@
|
|||
use std::{collections::HashMap, fs};
|
||||
use std::{collections::HashMap, fs, path::PathBuf};
|
||||
|
||||
use log::error;
|
||||
|
||||
|
@ -28,6 +28,13 @@ impl LimitIndices {
|
|||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn normalize(&self, max: (u16, u16)) -> (u16, u16) {
|
||||
let block = (self.block as f32) / (max.0 as f32) * 100f32;
|
||||
let mute = (self.silence as f32) / (max.1 as f32) * 100f32;
|
||||
|
||||
(block as u16, mute as u16)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(u16, u16)> for LimitIndices {
|
||||
|
@ -45,65 +52,52 @@ impl From<(u16, u16)> for LimitIndices {
|
|||
/// used to weight limits when building a merged list
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct LimitList {
|
||||
pub limits: HashMap<String, Limit>,
|
||||
pub hosts: Vec<String>,
|
||||
pub trust: u16,
|
||||
}
|
||||
|
||||
impl Default for LimitList {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
limits: HashMap::new(),
|
||||
trust: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<HashMap<String, Limit>> for LimitList {
|
||||
fn from(map: HashMap<String, Limit>) -> Self {
|
||||
Self {
|
||||
limits: map,
|
||||
trust: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LimitList {
|
||||
fn add_host(&mut self, host: &str, limit: Limit) -> &mut Self {
|
||||
self.limits.insert(host.to_string(), limit);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(map: HashMap<String, Limit>, trust: u16) -> Self {
|
||||
let mut src = Self::from(map);
|
||||
src.trust = trust;
|
||||
src
|
||||
}
|
||||
|
||||
pub fn import_file(&mut self, path: &str, limit: Limit) -> std::io::Result<&mut Self> {
|
||||
pub fn from_file(path: PathBuf) -> std::io::Result<Self> {
|
||||
let mut list = LimitList {
|
||||
hosts: vec![],
|
||||
trust: 100,
|
||||
};
|
||||
let contents = fs::read_to_string(path)?;
|
||||
|
||||
for host in contents.lines().filter(|line| !line.is_empty()) {
|
||||
self.add_host(host, limit);
|
||||
list.hosts.push(host.to_string());
|
||||
}
|
||||
|
||||
Ok(self)
|
||||
Ok(list)
|
||||
}
|
||||
}
|
||||
|
||||
/// A map of hosts (as strings) to their limit weights
|
||||
#[derive(Debug, Default, Eq, PartialEq)]
|
||||
pub struct MergedLimitList {
|
||||
pub map: HashMap<String, LimitIndices>,
|
||||
pub max: u16,
|
||||
pub hostmap: HashMap<String, LimitIndices>,
|
||||
pub trusts: (u16, u16),
|
||||
}
|
||||
|
||||
impl MergedLimitList {
|
||||
pub fn add_limit_list(&mut self, src: LimitList) -> &mut Self {
|
||||
for (host, limit) in src.limits.into_iter() {
|
||||
let entry = self.map.entry(host).or_default();
|
||||
entry.add_limit(limit, src.trust);
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
hostmap: HashMap::new(),
|
||||
trusts: (0, 0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_limit_list(&mut self, src: LimitList, limit_type: Limit) -> &mut Self {
|
||||
for host in src.hosts.into_iter() {
|
||||
let entry = self.hostmap.entry(host).or_default();
|
||||
entry.add_limit(limit_type, src.trust);
|
||||
}
|
||||
|
||||
match limit_type {
|
||||
Limit::Block => self.trusts.0 += src.trust,
|
||||
Limit::Silence => self.trusts.1 += src.trust,
|
||||
}
|
||||
|
||||
self.max += src.trust;
|
||||
self
|
||||
}
|
||||
|
||||
|
@ -113,7 +107,7 @@ impl MergedLimitList {
|
|||
mute_path: &str,
|
||||
indices: (u16, u16),
|
||||
) -> std::io::Result<()> {
|
||||
if self.map.is_empty() {
|
||||
if self.hostmap.is_empty() {
|
||||
error!("Nothing to export!");
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -122,8 +116,8 @@ impl MergedLimitList {
|
|||
let mut block_output: String = String::default();
|
||||
let mut mute_output: String = String::default();
|
||||
|
||||
for item in self.map.into_iter() {
|
||||
let (block_trust, mute_trust) = (item.1.block, item.1.silence);
|
||||
for item in self.hostmap.into_iter() {
|
||||
let (block_trust, mute_trust) = item.1.normalize(self.trusts);
|
||||
|
||||
if block_trust >= block_thresh {
|
||||
block_output.push_str(&(item.0.clone() + "\n"));
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
mod manip;
|
|
@ -1,200 +0,0 @@
|
|||
#![cfg(test)]
|
||||
|
||||
use std::{collections::HashMap, fs};
|
||||
|
||||
use crate::manip::*;
|
||||
|
||||
#[test]
|
||||
fn limit_add() {
|
||||
let mut at = LimitIndices::default();
|
||||
|
||||
at.add_limit(Limit::Block, 123)
|
||||
.add_limit(Limit::Silence, 456);
|
||||
|
||||
let test_at = LimitIndices {
|
||||
block: 123,
|
||||
silence: 456,
|
||||
};
|
||||
|
||||
assert_eq!(at, test_at);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn limit_combine() {
|
||||
let mut at = LimitIndices::default();
|
||||
|
||||
at.add_limit(Limit::Block, 123)
|
||||
.add_limit(Limit::Block, 333)
|
||||
.add_limit(Limit::Silence, 123);
|
||||
|
||||
let test_at = LimitIndices {
|
||||
block: 456,
|
||||
silence: 123,
|
||||
};
|
||||
|
||||
assert_eq!(at, test_at);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn limitlist_from_map() {
|
||||
let src1 = LimitList::from(HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.org"), Limit::Silence),
|
||||
(String::from("example.net"), Limit::Block),
|
||||
]));
|
||||
|
||||
assert_eq!(
|
||||
src1.limits,
|
||||
HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.org"), Limit::Silence),
|
||||
(String::from("example.net"), Limit::Block),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(src1.trust, 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn limitlist_from_map_and_trust() {
|
||||
let src2 = LimitList::build(
|
||||
HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.org"), Limit::Silence),
|
||||
(String::from("example.net"), Limit::Block),
|
||||
]),
|
||||
123,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
src2.limits,
|
||||
HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.org"), Limit::Silence),
|
||||
(String::from("example.net"), Limit::Block),
|
||||
])
|
||||
);
|
||||
assert_eq!(src2.trust, 123);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn limitlist_from_file() -> std::io::Result<()> {
|
||||
let mut src = LimitList::default();
|
||||
src.import_file("test/example_blocklist.txt", Limit::Block)?
|
||||
.import_file("test/example_mutelist.txt", Limit::Silence)?;
|
||||
|
||||
let test_src = LimitList::from(HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.org"), Limit::Block),
|
||||
(String::from("example.net"), Limit::Silence),
|
||||
]));
|
||||
|
||||
assert_eq!(test_src, src);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mergedlist_from_limitlist() -> std::io::Result<()> {
|
||||
let mut ml = MergedLimitList::default();
|
||||
|
||||
let src1 = LimitList::from(HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.org"), Limit::Silence),
|
||||
(String::from("example.net"), Limit::Block),
|
||||
]));
|
||||
|
||||
let mut src2 = LimitList::default();
|
||||
src2.import_file("test/example_blocklist.txt", Limit::Block)?
|
||||
.import_file("test/example_mutelist.txt", Limit::Silence)?;
|
||||
|
||||
ml.add_limit_list(src1).add_limit_list(src2);
|
||||
|
||||
let test_ml = MergedLimitList {
|
||||
map: HashMap::from([
|
||||
(String::from("example.com"), LimitIndices::from((200, 0))),
|
||||
(String::from("example.org"), LimitIndices::from((100, 100))),
|
||||
(String::from("example.net"), LimitIndices::from((100, 100))),
|
||||
]),
|
||||
max: 200,
|
||||
};
|
||||
|
||||
assert_eq!(ml, test_ml);
|
||||
|
||||
let src3 = LimitList::build(
|
||||
HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.org"), Limit::Silence),
|
||||
]),
|
||||
200,
|
||||
);
|
||||
|
||||
let src4 = LimitList::build(
|
||||
HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.net"), Limit::Silence),
|
||||
]),
|
||||
50,
|
||||
);
|
||||
|
||||
ml.add_limit_list(src3).add_limit_list(src4);
|
||||
|
||||
let test_ml = MergedLimitList {
|
||||
map: HashMap::from([
|
||||
(String::from("example.com"), LimitIndices::from((450, 0))),
|
||||
(String::from("example.org"), LimitIndices::from((100, 300))),
|
||||
(String::from("example.net"), LimitIndices::from((100, 150))),
|
||||
]),
|
||||
max: 450,
|
||||
};
|
||||
|
||||
assert_eq!(ml, test_ml);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mergedlist_export_txt() -> std::io::Result<()> {
|
||||
let mut ml = MergedLimitList::default();
|
||||
|
||||
let src1 = LimitList::from(HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.org"), Limit::Silence),
|
||||
(String::from("example.net"), Limit::Block),
|
||||
]));
|
||||
|
||||
let mut src2 = LimitList::default();
|
||||
src2.import_file("test/example_blocklist.txt", Limit::Block)?
|
||||
.import_file("test/example_mutelist.txt", Limit::Silence)?;
|
||||
|
||||
let src3 = LimitList::build(
|
||||
HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.org"), Limit::Silence),
|
||||
]),
|
||||
200,
|
||||
);
|
||||
|
||||
let src4 = LimitList::build(
|
||||
HashMap::from([
|
||||
(String::from("example.com"), Limit::Block),
|
||||
(String::from("example.net"), Limit::Silence),
|
||||
]),
|
||||
50,
|
||||
);
|
||||
|
||||
ml.add_limit_list(src1)
|
||||
.add_limit_list(src2)
|
||||
.add_limit_list(src3)
|
||||
.add_limit_list(src4);
|
||||
|
||||
let _ = ml.export_file("test/test_blocks.txt", "test/test_mutes.txt", (200, 150));
|
||||
|
||||
let file_blocks: String = fs::read_to_string("test/test_blocks.txt")?;
|
||||
let file_mutes: String = fs::read_to_string("test/test_mutes.txt")?;
|
||||
|
||||
assert_eq!(file_blocks, "example.com\n");
|
||||
assert_eq!(file_mutes, "example.net\nexample.org\n");
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
example.com
|
||||
|
||||
example.org
|
||||
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
|
||||
|
||||
example.net
|
|
@ -1 +0,0 @@
|
|||
example.com
|
Loading…
Reference in a new issue