Rewrote (again), removed tests, split max values between limit types
This commit is contained in:
parent
d5bb2be949
commit
5c0dfa367e
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -135,9 +135,16 @@ version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
|
"glob",
|
||||||
"log",
|
"log",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
|
|
|
@ -7,4 +7,5 @@ description = "Combines blocklists for ActivityPub software"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { version = "4.5.4", features = ["derive"] }
|
clap = { version = "4.5.4", features = ["derive"] }
|
||||||
env_logger = "0.11.3"
|
env_logger = "0.11.3"
|
||||||
|
glob = "0.3.1"
|
||||||
log = "0.4.21"
|
log = "0.4.21"
|
||||||
|
|
6
block.txt
Normal file
6
block.txt
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
example.com
|
||||||
|
example.net
|
||||||
|
example.org
|
||||||
|
test.com
|
||||||
|
test.net
|
||||||
|
test.org
|
5
blocks/anothertest.txt
Normal file
5
blocks/anothertest.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
example.com
|
||||||
|
example.org
|
||||||
|
test.net
|
||||||
|
test.com
|
||||||
|
test.org
|
6
blocks/test.txt
Normal file
6
blocks/test.txt
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
example.net
|
||||||
|
example.com
|
||||||
|
example.org
|
||||||
|
test.net
|
||||||
|
test.com
|
||||||
|
test.org
|
|
@ -1,2 +1,4 @@
|
||||||
example.net
|
example.net
|
||||||
example.org
|
example.org
|
||||||
|
test.net
|
||||||
|
test.org
|
1
mutes/anothertest.txt
Normal file
1
mutes/anothertest.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
example.org
|
3
mutes/test.txt
Normal file
3
mutes/test.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
example.org
|
||||||
|
test.net
|
||||||
|
test.com
|
2
mutes/yetanothertest.txt
Normal file
2
mutes/yetanothertest.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
example.org
|
||||||
|
test.net
|
16
src/cli.rs
Normal file
16
src/cli.rs
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[command(version, about, long_about = None)]
|
||||||
|
pub struct Cli {
|
||||||
|
/// Selects a custom config file
|
||||||
|
pub config: Option<PathBuf>,
|
||||||
|
|
||||||
|
/// Sets output directory (optional, defaults to current directory)
|
||||||
|
#[arg(last = true)]
|
||||||
|
pub output: Option<PathBuf>,
|
||||||
|
// TODO more options
|
||||||
|
// TODO verbose mode
|
||||||
|
}
|
82
src/main.rs
82
src/main.rs
|
@ -1,54 +1,50 @@
|
||||||
// src/main.rs
|
// src/main.rs
|
||||||
|
|
||||||
|
mod cli;
|
||||||
mod manip;
|
mod manip;
|
||||||
mod tests;
|
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use glob::glob;
|
||||||
|
use manip::{Limit, LimitList, MergedLimitList};
|
||||||
|
|
||||||
use clap::Parser;
|
fn main() -> std::io::Result<()> {
|
||||||
use log::error;
|
|
||||||
|
|
||||||
#[derive(Parser)]
|
|
||||||
#[command(version, about, long_about = None)]
|
|
||||||
struct Cli {
|
|
||||||
/// Selects a custom config file
|
|
||||||
config: Option<PathBuf>,
|
|
||||||
|
|
||||||
/// Specifies files/directories for blocks
|
|
||||||
#[arg(short = 'B', long)]
|
|
||||||
block: Vec<PathBuf>,
|
|
||||||
|
|
||||||
/// Specifies files/directories for silences
|
|
||||||
#[arg(short = 'M', long)]
|
|
||||||
mute: Vec<PathBuf>,
|
|
||||||
|
|
||||||
/// Specifies a source (*.block.txt, *.mute.txt)
|
|
||||||
#[arg(short = 'S', long)]
|
|
||||||
src: Vec<PathBuf>,
|
|
||||||
|
|
||||||
/// Specifies confidence in a source. Default = 100
|
|
||||||
#[arg(short, long)]
|
|
||||||
trust: Vec<u16>,
|
|
||||||
|
|
||||||
/// Sets output directory (optional, defaults to current directory)
|
|
||||||
#[arg(last = true)]
|
|
||||||
path: Option<PathBuf>,
|
|
||||||
// TODO more options
|
|
||||||
// TODO verbose mode
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
env_logger::init(); // TODO add more logging
|
env_logger::init(); // TODO add more logging
|
||||||
|
|
||||||
let cli = Cli::parse();
|
// TODO utilize CLI
|
||||||
|
// let _cli = cli::Cli::parse();
|
||||||
if cli.block.is_empty() && cli.mute.is_empty() && cli.config.is_none() {
|
|
||||||
error!("No lists or configuration provided.");
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO parse config file if one is provided
|
// TODO parse config file if one is provided
|
||||||
|
|
||||||
// TODO read modsources from files
|
let mut merged_list = MergedLimitList::new();
|
||||||
// TODO combine modsources into modmap
|
|
||||||
// TODO write modmap to files
|
// Crawl /blocks for block lists -> glob pattern: "blocks/**/*.txt"
|
||||||
|
{
|
||||||
|
let mut blocklists = vec![];
|
||||||
|
let block_paths = glob("blocks/**/*.txt").expect("Bad glob pattern");
|
||||||
|
|
||||||
|
for path in block_paths.filter_map(|x| x.ok()) {
|
||||||
|
blocklists.extend(LimitList::from_file(path));
|
||||||
|
// TODO Check if path contains trust value as part of filename when building limitlist
|
||||||
|
}
|
||||||
|
|
||||||
|
for list in blocklists {
|
||||||
|
merged_list.add_limit_list(list, Limit::Block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Crawl /silences for silence lists -> glob pattern: "mutes/**/*.txt"
|
||||||
|
{
|
||||||
|
let mut mutelists = vec![];
|
||||||
|
let mute_paths = glob("mutes/**/*.txt").expect("Bad glob pattern");
|
||||||
|
|
||||||
|
for path in mute_paths.filter_map(|x| x.ok()) {
|
||||||
|
mutelists.extend(LimitList::from_file(path));
|
||||||
|
// TODO Check if path contains trust value as part of filename when building limitlist
|
||||||
|
}
|
||||||
|
|
||||||
|
for list in mutelists {
|
||||||
|
merged_list.add_limit_list(list, Limit::Silence);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
merged_list.export_file("block.txt", "mute.txt", (50, 33))
|
||||||
}
|
}
|
||||||
|
|
82
src/manip.rs
82
src/manip.rs
|
@ -1,4 +1,4 @@
|
||||||
use std::{collections::HashMap, fs};
|
use std::{collections::HashMap, fs, path::PathBuf};
|
||||||
|
|
||||||
use log::error;
|
use log::error;
|
||||||
|
|
||||||
|
@ -28,6 +28,13 @@ impl LimitIndices {
|
||||||
}
|
}
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn normalize(&self, max: (u16, u16)) -> (u16, u16) {
|
||||||
|
let block = (self.block as f32) / (max.0 as f32) * 100f32;
|
||||||
|
let mute = (self.silence as f32) / (max.1 as f32) * 100f32;
|
||||||
|
|
||||||
|
(block as u16, mute as u16)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<(u16, u16)> for LimitIndices {
|
impl From<(u16, u16)> for LimitIndices {
|
||||||
|
@ -45,65 +52,52 @@ impl From<(u16, u16)> for LimitIndices {
|
||||||
/// used to weight limits when building a merged list
|
/// used to weight limits when building a merged list
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub struct LimitList {
|
pub struct LimitList {
|
||||||
pub limits: HashMap<String, Limit>,
|
pub hosts: Vec<String>,
|
||||||
pub trust: u16,
|
pub trust: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for LimitList {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
limits: HashMap::new(),
|
|
||||||
trust: 100,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<HashMap<String, Limit>> for LimitList {
|
|
||||||
fn from(map: HashMap<String, Limit>) -> Self {
|
|
||||||
Self {
|
|
||||||
limits: map,
|
|
||||||
trust: 100,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LimitList {
|
impl LimitList {
|
||||||
fn add_host(&mut self, host: &str, limit: Limit) -> &mut Self {
|
pub fn from_file(path: PathBuf) -> std::io::Result<Self> {
|
||||||
self.limits.insert(host.to_string(), limit);
|
let mut list = LimitList {
|
||||||
self
|
hosts: vec![],
|
||||||
}
|
trust: 100,
|
||||||
|
};
|
||||||
pub fn build(map: HashMap<String, Limit>, trust: u16) -> Self {
|
|
||||||
let mut src = Self::from(map);
|
|
||||||
src.trust = trust;
|
|
||||||
src
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn import_file(&mut self, path: &str, limit: Limit) -> std::io::Result<&mut Self> {
|
|
||||||
let contents = fs::read_to_string(path)?;
|
let contents = fs::read_to_string(path)?;
|
||||||
|
|
||||||
for host in contents.lines().filter(|line| !line.is_empty()) {
|
for host in contents.lines().filter(|line| !line.is_empty()) {
|
||||||
self.add_host(host, limit);
|
list.hosts.push(host.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(self)
|
Ok(list)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A map of hosts (as strings) to their limit weights
|
/// A map of hosts (as strings) to their limit weights
|
||||||
#[derive(Debug, Default, Eq, PartialEq)]
|
#[derive(Debug, Default, Eq, PartialEq)]
|
||||||
pub struct MergedLimitList {
|
pub struct MergedLimitList {
|
||||||
pub map: HashMap<String, LimitIndices>,
|
pub hostmap: HashMap<String, LimitIndices>,
|
||||||
pub max: u16,
|
pub trusts: (u16, u16),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MergedLimitList {
|
impl MergedLimitList {
|
||||||
pub fn add_limit_list(&mut self, src: LimitList) -> &mut Self {
|
pub fn new() -> Self {
|
||||||
for (host, limit) in src.limits.into_iter() {
|
Self {
|
||||||
let entry = self.map.entry(host).or_default();
|
hostmap: HashMap::new(),
|
||||||
entry.add_limit(limit, src.trust);
|
trusts: (0, 0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_limit_list(&mut self, src: LimitList, limit_type: Limit) -> &mut Self {
|
||||||
|
for host in src.hosts.into_iter() {
|
||||||
|
let entry = self.hostmap.entry(host).or_default();
|
||||||
|
entry.add_limit(limit_type, src.trust);
|
||||||
|
}
|
||||||
|
|
||||||
|
match limit_type {
|
||||||
|
Limit::Block => self.trusts.0 += src.trust,
|
||||||
|
Limit::Silence => self.trusts.1 += src.trust,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.max += src.trust;
|
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,7 +107,7 @@ impl MergedLimitList {
|
||||||
mute_path: &str,
|
mute_path: &str,
|
||||||
indices: (u16, u16),
|
indices: (u16, u16),
|
||||||
) -> std::io::Result<()> {
|
) -> std::io::Result<()> {
|
||||||
if self.map.is_empty() {
|
if self.hostmap.is_empty() {
|
||||||
error!("Nothing to export!");
|
error!("Nothing to export!");
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
@ -122,8 +116,8 @@ impl MergedLimitList {
|
||||||
let mut block_output: String = String::default();
|
let mut block_output: String = String::default();
|
||||||
let mut mute_output: String = String::default();
|
let mut mute_output: String = String::default();
|
||||||
|
|
||||||
for item in self.map.into_iter() {
|
for item in self.hostmap.into_iter() {
|
||||||
let (block_trust, mute_trust) = (item.1.block, item.1.silence);
|
let (block_trust, mute_trust) = item.1.normalize(self.trusts);
|
||||||
|
|
||||||
if block_trust >= block_thresh {
|
if block_trust >= block_thresh {
|
||||||
block_output.push_str(&(item.0.clone() + "\n"));
|
block_output.push_str(&(item.0.clone() + "\n"));
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
mod manip;
|
|
|
@ -1,200 +0,0 @@
|
||||||
#![cfg(test)]
|
|
||||||
|
|
||||||
use std::{collections::HashMap, fs};
|
|
||||||
|
|
||||||
use crate::manip::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn limit_add() {
|
|
||||||
let mut at = LimitIndices::default();
|
|
||||||
|
|
||||||
at.add_limit(Limit::Block, 123)
|
|
||||||
.add_limit(Limit::Silence, 456);
|
|
||||||
|
|
||||||
let test_at = LimitIndices {
|
|
||||||
block: 123,
|
|
||||||
silence: 456,
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(at, test_at);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn limit_combine() {
|
|
||||||
let mut at = LimitIndices::default();
|
|
||||||
|
|
||||||
at.add_limit(Limit::Block, 123)
|
|
||||||
.add_limit(Limit::Block, 333)
|
|
||||||
.add_limit(Limit::Silence, 123);
|
|
||||||
|
|
||||||
let test_at = LimitIndices {
|
|
||||||
block: 456,
|
|
||||||
silence: 123,
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(at, test_at);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn limitlist_from_map() {
|
|
||||||
let src1 = LimitList::from(HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.org"), Limit::Silence),
|
|
||||||
(String::from("example.net"), Limit::Block),
|
|
||||||
]));
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
src1.limits,
|
|
||||||
HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.org"), Limit::Silence),
|
|
||||||
(String::from("example.net"), Limit::Block),
|
|
||||||
])
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(src1.trust, 100);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn limitlist_from_map_and_trust() {
|
|
||||||
let src2 = LimitList::build(
|
|
||||||
HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.org"), Limit::Silence),
|
|
||||||
(String::from("example.net"), Limit::Block),
|
|
||||||
]),
|
|
||||||
123,
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
src2.limits,
|
|
||||||
HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.org"), Limit::Silence),
|
|
||||||
(String::from("example.net"), Limit::Block),
|
|
||||||
])
|
|
||||||
);
|
|
||||||
assert_eq!(src2.trust, 123);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn limitlist_from_file() -> std::io::Result<()> {
|
|
||||||
let mut src = LimitList::default();
|
|
||||||
src.import_file("test/example_blocklist.txt", Limit::Block)?
|
|
||||||
.import_file("test/example_mutelist.txt", Limit::Silence)?;
|
|
||||||
|
|
||||||
let test_src = LimitList::from(HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.org"), Limit::Block),
|
|
||||||
(String::from("example.net"), Limit::Silence),
|
|
||||||
]));
|
|
||||||
|
|
||||||
assert_eq!(test_src, src);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn mergedlist_from_limitlist() -> std::io::Result<()> {
|
|
||||||
let mut ml = MergedLimitList::default();
|
|
||||||
|
|
||||||
let src1 = LimitList::from(HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.org"), Limit::Silence),
|
|
||||||
(String::from("example.net"), Limit::Block),
|
|
||||||
]));
|
|
||||||
|
|
||||||
let mut src2 = LimitList::default();
|
|
||||||
src2.import_file("test/example_blocklist.txt", Limit::Block)?
|
|
||||||
.import_file("test/example_mutelist.txt", Limit::Silence)?;
|
|
||||||
|
|
||||||
ml.add_limit_list(src1).add_limit_list(src2);
|
|
||||||
|
|
||||||
let test_ml = MergedLimitList {
|
|
||||||
map: HashMap::from([
|
|
||||||
(String::from("example.com"), LimitIndices::from((200, 0))),
|
|
||||||
(String::from("example.org"), LimitIndices::from((100, 100))),
|
|
||||||
(String::from("example.net"), LimitIndices::from((100, 100))),
|
|
||||||
]),
|
|
||||||
max: 200,
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(ml, test_ml);
|
|
||||||
|
|
||||||
let src3 = LimitList::build(
|
|
||||||
HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.org"), Limit::Silence),
|
|
||||||
]),
|
|
||||||
200,
|
|
||||||
);
|
|
||||||
|
|
||||||
let src4 = LimitList::build(
|
|
||||||
HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.net"), Limit::Silence),
|
|
||||||
]),
|
|
||||||
50,
|
|
||||||
);
|
|
||||||
|
|
||||||
ml.add_limit_list(src3).add_limit_list(src4);
|
|
||||||
|
|
||||||
let test_ml = MergedLimitList {
|
|
||||||
map: HashMap::from([
|
|
||||||
(String::from("example.com"), LimitIndices::from((450, 0))),
|
|
||||||
(String::from("example.org"), LimitIndices::from((100, 300))),
|
|
||||||
(String::from("example.net"), LimitIndices::from((100, 150))),
|
|
||||||
]),
|
|
||||||
max: 450,
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(ml, test_ml);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn mergedlist_export_txt() -> std::io::Result<()> {
|
|
||||||
let mut ml = MergedLimitList::default();
|
|
||||||
|
|
||||||
let src1 = LimitList::from(HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.org"), Limit::Silence),
|
|
||||||
(String::from("example.net"), Limit::Block),
|
|
||||||
]));
|
|
||||||
|
|
||||||
let mut src2 = LimitList::default();
|
|
||||||
src2.import_file("test/example_blocklist.txt", Limit::Block)?
|
|
||||||
.import_file("test/example_mutelist.txt", Limit::Silence)?;
|
|
||||||
|
|
||||||
let src3 = LimitList::build(
|
|
||||||
HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.org"), Limit::Silence),
|
|
||||||
]),
|
|
||||||
200,
|
|
||||||
);
|
|
||||||
|
|
||||||
let src4 = LimitList::build(
|
|
||||||
HashMap::from([
|
|
||||||
(String::from("example.com"), Limit::Block),
|
|
||||||
(String::from("example.net"), Limit::Silence),
|
|
||||||
]),
|
|
||||||
50,
|
|
||||||
);
|
|
||||||
|
|
||||||
ml.add_limit_list(src1)
|
|
||||||
.add_limit_list(src2)
|
|
||||||
.add_limit_list(src3)
|
|
||||||
.add_limit_list(src4);
|
|
||||||
|
|
||||||
let _ = ml.export_file("test/test_blocks.txt", "test/test_mutes.txt", (200, 150));
|
|
||||||
|
|
||||||
let file_blocks: String = fs::read_to_string("test/test_blocks.txt")?;
|
|
||||||
let file_mutes: String = fs::read_to_string("test/test_mutes.txt")?;
|
|
||||||
|
|
||||||
assert_eq!(file_blocks, "example.com\n");
|
|
||||||
assert_eq!(file_mutes, "example.net\nexample.org\n");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
|
@ -1,5 +0,0 @@
|
||||||
example.com
|
|
||||||
|
|
||||||
example.org
|
|
||||||
|
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
|
|
||||||
|
|
||||||
example.net
|
|
|
@ -1 +0,0 @@
|
||||||
example.com
|
|
Loading…
Reference in a new issue