@@ -0,0 +1,113 @@ | |||||
# Created by https://www.toptal.com/developers/gitignore/api/rust,intellij+all | |||||
# Edit at https://www.toptal.com/developers/gitignore?templates=rust,intellij+all | |||||
### Intellij+all ### | |||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider | |||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 | |||||
# User-specific stuff | |||||
.idea/**/workspace.xml | |||||
.idea/**/tasks.xml | |||||
.idea/**/usage.statistics.xml | |||||
.idea/**/dictionaries | |||||
.idea/**/shelf | |||||
# AWS User-specific | |||||
.idea/**/aws.xml | |||||
# Generated files | |||||
.idea/**/contentModel.xml | |||||
# Sensitive or high-churn files | |||||
.idea/**/dataSources/ | |||||
.idea/**/dataSources.ids | |||||
.idea/**/dataSources.local.xml | |||||
.idea/**/sqlDataSources.xml | |||||
.idea/**/dynamic.xml | |||||
.idea/**/uiDesigner.xml | |||||
.idea/**/dbnavigator.xml | |||||
# Gradle | |||||
.idea/**/gradle.xml | |||||
.idea/**/libraries | |||||
# Gradle and Maven with auto-import | |||||
# When using Gradle or Maven with auto-import, you should exclude module files, | |||||
# since they will be recreated, and may cause churn. Uncomment if using | |||||
# auto-import. | |||||
# .idea/artifacts | |||||
# .idea/compiler.xml | |||||
# .idea/jarRepositories.xml | |||||
# .idea/modules.xml | |||||
# .idea/*.iml | |||||
# .idea/modules | |||||
# *.iml | |||||
# *.ipr | |||||
# CMake | |||||
cmake-build-*/ | |||||
# Mongo Explorer plugin | |||||
.idea/**/mongoSettings.xml | |||||
# File-based project format | |||||
*.iws | |||||
# IntelliJ | |||||
out/ | |||||
# mpeltonen/sbt-idea plugin | |||||
.idea_modules/ | |||||
# JIRA plugin | |||||
atlassian-ide-plugin.xml | |||||
# Cursive Clojure plugin | |||||
.idea/replstate.xml | |||||
# Crashlytics plugin (for Android Studio and IntelliJ) | |||||
com_crashlytics_export_strings.xml | |||||
crashlytics.properties | |||||
crashlytics-build.properties | |||||
fabric.properties | |||||
# Editor-based Rest Client | |||||
.idea/httpRequests | |||||
# Android studio 3.1+ serialized cache file | |||||
.idea/caches/build_file_checksums.ser | |||||
### Intellij+all Patch ### | |||||
# Ignores the whole .idea folder and all .iml files | |||||
# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 | |||||
.idea/ | |||||
# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 | |||||
*.iml | |||||
modules.xml | |||||
.idea/misc.xml | |||||
*.ipr | |||||
# Sonarlint plugin | |||||
.idea/sonarlint | |||||
### Rust ### | |||||
# Generated by Cargo | |||||
# will have compiled files and executables | |||||
debug/ | |||||
target/ | |||||
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries | |||||
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html | |||||
Cargo.lock | |||||
# These are backup files generated by rustfmt | |||||
**/*.rs.bk | |||||
# MSVC Windows builds of rustc generate these, which store debugging information | |||||
*.pdb | |||||
# End of https://www.toptal.com/developers/gitignore/api/rust,intellij+all |
@@ -0,0 +1,27 @@ | |||||
[package] | |||||
name = "queuectl" | |||||
version = "0.1.0" | |||||
edition = "2021" | |||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |||||
[dependencies] | |||||
tokio = { version = "1.14.0", features = ["full"] } | |||||
anyhow = "1.0.51" | |||||
tracing = "0.1" | |||||
tracing-subscriber = "0.2" | |||||
clap = { version = "3.0.0-rc.0", features = ["derive"] } | |||||
redis = { version = "0.21.4", features = ["tokio-comp"] } | |||||
futures = "0.3" | |||||
tokio-stream = { version = "0.1.8", features = ["io-util"] } | |||||
async-compression = { version = "0.3.8", features = ["tokio", "all-algorithms"] } | |||||
serde = { version = "1.0", features = ["derive"] } | |||||
serde_json = "1.0" | |||||
url = "2.2.2" | |||||
reqwest = { version = "0.11.7", features = ["rustls-tls-webpki-roots", "stream"], default-features = false } | |||||
tokio-util = { version = "0.6.9", features = ["io"] } | |||||
chrono = "0.4.19" | |||||
chrono-humanize = "0.2.1" | |||||
async-read-progress = "0.2.0" | |||||
indicatif = { version = "0.16.2", features = ["improved_unicode"] } | |||||
once_cell = "1.8.0" |
@@ -0,0 +1,34 @@ | |||||
use clap::Parser; | |||||
use once_cell::sync::Lazy; | |||||
#[derive(Parser)] | |||||
#[clap(name = "queuectl")] | |||||
#[clap(version = "1.0")] | |||||
pub struct Cli { | |||||
#[clap(short, long)] | |||||
/// Input path. Can be a file or a url that starts with http:// or https://. | |||||
pub input: String, | |||||
#[clap(short, long)] | |||||
/// Tracker slug for the project. | |||||
pub project: String, | |||||
#[clap(short, long, default_value = "todo")] | |||||
/// The queue where items will be queued into. | |||||
pub queue: String, | |||||
#[clap(long, default_value = "8192")] | |||||
/// How many items to send per SADD command. | |||||
pub chunk_size: usize, | |||||
#[clap(long, default_value = "redis://127.0.0.1/")] | |||||
/// URL of the redis to connect to. | |||||
pub redis: String, | |||||
#[clap(long, short, default_value = "auto", arg_enum)] | |||||
/// Specify the compression of the input file. By default will autodetect by file extension. | |||||
pub compression: crate::compression::CompressionMode, | |||||
#[clap(long, default_value = "32")] | |||||
/// How many commands to pipeline into redis. | |||||
pub pipeline_size: usize, | |||||
#[clap(long)] | |||||
/// Do not show a progress bar. | |||||
pub no_progressbar: bool, | |||||
} | |||||
pub static ARGS: Lazy<Cli> = Lazy::new(|| Cli::parse()); |
@@ -0,0 +1,53 @@ | |||||
use async_compression::tokio::bufread::{GzipDecoder, XzDecoder, ZstdDecoder}; | |||||
use clap::ArgEnum; | |||||
use tokio::io::{AsyncRead, BufReader}; | |||||
#[derive(Copy, Clone, PartialEq, Ord, PartialOrd, Eq, ArgEnum, Debug)] | |||||
pub enum CompressionMode { | |||||
AUTO, | |||||
NONE, | |||||
ZSTD, | |||||
GZIP, | |||||
XZ, | |||||
} | |||||
impl CompressionMode { | |||||
pub fn parse(filename: &str) -> Self { | |||||
if filename.to_ascii_lowercase().ends_with("zst") { | |||||
return Self::ZSTD; | |||||
} | |||||
if filename.to_ascii_lowercase().ends_with("zstd") { | |||||
return Self::ZSTD; | |||||
} | |||||
if filename.to_ascii_lowercase().ends_with("xz") { | |||||
return Self::XZ; | |||||
} | |||||
if filename.to_ascii_lowercase().ends_with("gz") { | |||||
return Self::GZIP; | |||||
} | |||||
return Self::NONE; | |||||
} | |||||
} | |||||
pub async fn get_decompressed_reader( | |||||
compression_mode: CompressionMode, | |||||
reader: Box<dyn AsyncRead + Unpin>, | |||||
) -> anyhow::Result<Box<dyn AsyncRead + Unpin>> { | |||||
match compression_mode { | |||||
CompressionMode::AUTO => unreachable!(), | |||||
CompressionMode::NONE => Ok(Box::new(reader)), | |||||
CompressionMode::ZSTD => { | |||||
let reader = BufReader::new(reader); | |||||
Ok(Box::new(ZstdDecoder::new(reader))) | |||||
} | |||||
CompressionMode::GZIP => { | |||||
let reader = BufReader::new(reader); | |||||
Ok(Box::new(GzipDecoder::new(reader))) | |||||
} | |||||
CompressionMode::XZ => { | |||||
let reader = BufReader::new(reader); | |||||
Ok(Box::new(XzDecoder::new(reader))) | |||||
} | |||||
} | |||||
} |
@@ -0,0 +1,75 @@ | |||||
use anyhow::{anyhow, Context}; | |||||
use async_read_progress::*; | |||||
use futures::stream::StreamExt; | |||||
use tokio::fs::File; | |||||
use tokio::io::AsyncRead; | |||||
use tokio_util::io::StreamReader; | |||||
use tracing::info; | |||||
use crate::args::ARGS; | |||||
use crate::compression; | |||||
use crate::pbar; | |||||
pub async fn open_http( | |||||
url: &str, | |||||
) -> anyhow::Result<(Box<dyn AsyncRead + Unpin>, Option<u64>, String)> { | |||||
info!("Requesting url {:?}...", url); | |||||
let client = reqwest::Client::new(); | |||||
let resp = client | |||||
.get(url) | |||||
.send() | |||||
.await | |||||
.context("http client get request")?; | |||||
let filename = String::from(resp.url().path_segments().unwrap().last().unwrap()); | |||||
info!("Completed request with code {}!", resp.status()); | |||||
if !resp.status().is_success() { | |||||
return Err(anyhow!("incorrect status code attempting to retrieve file")); | |||||
} | |||||
let size = resp.content_length(); | |||||
let stream = resp.bytes_stream(); | |||||
let stream = stream.map(|v| v.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))); | |||||
Ok((Box::new(StreamReader::new(stream)), size, filename)) | |||||
} | |||||
pub async fn open( | |||||
update_progressbar: bool, | |||||
) -> anyhow::Result<(Box<dyn AsyncRead + Unpin>, Option<u64>)> { | |||||
let (reader, total_bytes, filename): (Box<dyn AsyncRead + Unpin>, Option<u64>, String) = | |||||
if ARGS.input == "-" { | |||||
info!("Reading from stdin..."); | |||||
(Box::new(tokio::io::stdin()), None, "dummy.txt".to_string()) | |||||
} else { | |||||
if ARGS.input.starts_with("http") { | |||||
open_http(&ARGS.input).await? | |||||
} else { | |||||
info!("Opening file {}...", ARGS.input); | |||||
let file = File::open(&ARGS.input).await.context("open input file")?; | |||||
let meta = file.metadata().await.context("read input file metadata")?; | |||||
(Box::new(file), Some(meta.len()), ARGS.input.clone()) | |||||
} | |||||
}; | |||||
//Hook into the progress bar. | |||||
let reader = { | |||||
Box::new( | |||||
reader.report_progress(std::time::Duration::from_millis(20), move |bytes_read| { | |||||
if update_progressbar { | |||||
pbar::update_progress(bytes_read as u64); | |||||
} | |||||
}), | |||||
) | |||||
}; | |||||
let compression_mode = if ARGS.compression == compression::CompressionMode::AUTO { | |||||
info!("Attempting to guess compression mode..."); | |||||
compression::CompressionMode::parse(&filename) | |||||
} else { | |||||
ARGS.compression | |||||
}; | |||||
info!("Using compression mode {:?}.", compression_mode); | |||||
info!("Attempting decompression..."); | |||||
let reader = compression::get_decompressed_reader(compression_mode, reader).await?; | |||||
info!("Ok!"); | |||||
Ok((reader, total_bytes)) | |||||
} |
@@ -0,0 +1,131 @@ | |||||
use anyhow::Context; | |||||
use chrono::prelude::*; | |||||
use chrono_humanize::{Accuracy, HumanTime, Tense}; | |||||
use futures::stream::StreamExt; | |||||
use tokio::io::AsyncBufReadExt; | |||||
use tokio::io::BufReader; | |||||
use tokio_stream::wrappers::LinesStream; | |||||
use tracing::{debug, info}; | |||||
use args::ARGS; | |||||
mod args; | |||||
mod compression; | |||||
mod connector; | |||||
mod pbar; | |||||
mod project_config; | |||||
#[tokio::main] | |||||
async fn main() -> anyhow::Result<()> { | |||||
pbar::setup_logging(); | |||||
let mut con = { | |||||
let redis_url = project_config::get_redis_url() | |||||
.await | |||||
.context("getting project redis url")?; | |||||
info!("Connecting to project redis..."); | |||||
let proj_client = redis::Client::open(redis_url).context("project redis client connect")?; | |||||
let c = proj_client | |||||
.get_async_connection() | |||||
.await | |||||
.context("project redis get_async_connection")?; | |||||
info!("Connected!"); | |||||
c | |||||
}; | |||||
let (reader, total_bytes) = connector::open(true).await.context("opening input")?; | |||||
//Turn bytes into lines. | |||||
let reader = BufReader::new(reader); | |||||
let reader = reader.lines(); | |||||
let reader = LinesStream::new(reader); | |||||
//Gather the lines into chunks of given size. | |||||
let reader = reader.chunks(ARGS.chunk_size); | |||||
//Gather the chunks into the pipelines. | |||||
let mut reader = reader.chunks(ARGS.pipeline_size); | |||||
//Keep track of some statistics. | |||||
let mut counter = 0usize; | |||||
let mut last_stamp: DateTime<Utc> = Utc::now(); | |||||
let start_stamp = last_stamp.clone(); | |||||
//Initialize the progress bar. | |||||
if !ARGS.no_progressbar { | |||||
if let Some(total_size) = total_bytes { | |||||
pbar::create_progress_bar(total_size); | |||||
} | |||||
} | |||||
let redis_key = format!("{}:{}", ARGS.project, ARGS.queue); | |||||
info!( | |||||
"Writing into key {:?} using {} items per SADD and {} SADDS per pipeline...", | |||||
redis_key, ARGS.chunk_size, ARGS.pipeline_size | |||||
); | |||||
//Main processing loop | |||||
while let Some(chunks) = reader.next().await { | |||||
let mut pipeline = &mut redis::pipe(); | |||||
let mut pipeline_counter = 0usize; | |||||
//Create the pipeline commands. | |||||
for chunk in chunks.into_iter() { | |||||
let items: Vec<String> = chunk | |||||
.into_iter() | |||||
.collect::<std::io::Result<Vec<String>>>() | |||||
.context("reading items")?; | |||||
debug!("Queueing chunk of {} items into pipeline...", items.len()); | |||||
pipeline_counter += items.len(); | |||||
pipeline = pipeline.sadd(&redis_key, items).ignore(); | |||||
} | |||||
//Submit pipeline | |||||
debug!("Submitting pipeline with {} items...", pipeline_counter); | |||||
pipeline | |||||
.query_async(&mut con) | |||||
.await | |||||
.context("performing pipeline query")?; | |||||
counter += pipeline_counter; | |||||
//Compute statistics. | |||||
let curr_stamp: DateTime<Utc> = Utc::now(); | |||||
let elapsed = curr_stamp.signed_duration_since(last_stamp); | |||||
let elapsed_millis = elapsed.num_milliseconds(); | |||||
let elapsed_secs = (elapsed_millis as f64) / 1000.0f64; | |||||
last_stamp = curr_stamp; | |||||
let ips = (pipeline_counter as f64) / elapsed_secs; | |||||
//Update the progressbar if it exists or print a message instead. | |||||
{ | |||||
let pb = pbar::PROGRESS_BAR.lock().unwrap(); | |||||
if let Some(pb) = pb.as_ref() { | |||||
pb.set_message(format!("{} items ({:.02} items/s)", counter, ips)); | |||||
} else { | |||||
let ht = HumanTime::from(curr_stamp.signed_duration_since(start_stamp)); | |||||
info!("Items queued! Inserted {} items so far in {}. Inserted {} items this round in {:.03} seconds. ({:.03} items/s)", | |||||
counter, | |||||
ht.to_text_en(Accuracy::Rough, Tense::Present), | |||||
pipeline_counter, | |||||
elapsed_secs, | |||||
ips | |||||
); | |||||
} | |||||
} | |||||
} | |||||
//We're done, close off the progress bar. | |||||
pbar::finish_progress_bar(); | |||||
//Print some last information. | |||||
{ | |||||
let end_stamp: DateTime<Utc> = Utc::now(); | |||||
let ht = HumanTime::from(end_stamp.signed_duration_since(start_stamp)); | |||||
info!( | |||||
"Finished queueing in {}.", | |||||
ht.to_text_en(Accuracy::Precise, Tense::Present) | |||||
); | |||||
} | |||||
Ok(()) | |||||
} |
@@ -0,0 +1,67 @@ | |||||
use indicatif::{ProgressBar, ProgressStyle}; | |||||
use once_cell::sync::Lazy; | |||||
use std::io::LineWriter; | |||||
use std::sync::Mutex; | |||||
pub static PROGRESS_BAR: Lazy<Mutex<Option<ProgressBar>>> = Lazy::new(|| Mutex::new(None)); | |||||
pub fn update_progress(position: u64) { | |||||
let pb = PROGRESS_BAR.lock().unwrap(); | |||||
if let Some(pb) = pb.as_ref() { | |||||
pb.set_position(position); | |||||
} | |||||
} | |||||
pub fn create_progress_bar(total_size: u64) { | |||||
let pb = ProgressBar::new(total_size); | |||||
pb.enable_steady_tick(100); | |||||
pb.set_style(ProgressStyle::default_bar() | |||||
.template("{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} {msg} ({eta} remaining)") | |||||
.progress_chars("=>-")); | |||||
*PROGRESS_BAR.lock().unwrap() = Some(pb); | |||||
} | |||||
pub fn finish_progress_bar() { | |||||
let mut pb = PROGRESS_BAR.lock().unwrap(); | |||||
if let Some(pb) = pb.as_ref() { | |||||
pb.finish(); | |||||
} | |||||
*pb = None; | |||||
} | |||||
pub struct PBWriter {} | |||||
impl PBWriter { | |||||
pub fn new() -> Self { | |||||
PBWriter {} | |||||
} | |||||
} | |||||
impl std::io::Write for PBWriter { | |||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { | |||||
let pb = PROGRESS_BAR.lock().unwrap(); | |||||
if let Some(pb) = pb.as_ref() { | |||||
pb.println(std::str::from_utf8(buf).unwrap()); | |||||
Ok(buf.len()) | |||||
} else { | |||||
std::io::stderr().write(buf) | |||||
} | |||||
} | |||||
fn flush(&mut self) -> std::io::Result<()> { | |||||
std::io::stderr().flush() | |||||
} | |||||
} | |||||
pub fn setup_logging() { | |||||
std::env::set_var( | |||||
"RUST_LOG", | |||||
std::env::var("RUST_LOG").unwrap_or("info".to_string()), | |||||
); | |||||
tracing_subscriber::fmt::fmt() | |||||
.with_writer(move || -> Box<dyn std::io::Write> { | |||||
Box::new(LineWriter::new(PBWriter::new())) | |||||
}) | |||||
.init(); | |||||
} |
@@ -0,0 +1,23 @@ | |||||
use serde::{Deserialize, Serialize}; | |||||
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] | |||||
pub struct Root { | |||||
pub max_claims_soft: Option<i64>, | |||||
pub max_claims_hard: Option<i64>, | |||||
pub moving_average_interval: Option<i64>, | |||||
pub min_script_version: Option<String>, | |||||
pub title: String, | |||||
pub ignore_global_blocked: bool, | |||||
pub item_type: String, | |||||
pub history_length: i64, | |||||
pub domains: Option<serde_json::Value>, | |||||
pub valid_item_regexp: String, | |||||
pub redis: Option<Redis>, | |||||
} | |||||
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] | |||||
pub struct Redis { | |||||
pub host: String, | |||||
pub port: u16, | |||||
pub pass: String, | |||||
} |
@@ -0,0 +1,47 @@ | |||||
use crate::args::ARGS; | |||||
use anyhow::Context; | |||||
use redis::AsyncCommands; | |||||
use tracing::info; | |||||
use url::Url; | |||||
pub mod config_json; | |||||
pub async fn get_redis_url() -> anyhow::Result<Url> { | |||||
let base_redis_url = Url::parse(&ARGS.redis).context("parsing redis url")?; | |||||
info!( | |||||
"Connecting to redis {} to find project config...", | |||||
base_redis_url | |||||
); | |||||
let client = redis::Client::open(base_redis_url.clone()).context("connecting to redis")?; | |||||
let mut base_con = client | |||||
.get_async_connection() | |||||
.await | |||||
.context("get_async_connection")?; | |||||
info!("Connected!"); | |||||
info!("Attempting to retrieve project configuration..."); | |||||
let config: Option<String> = base_con | |||||
.hget("trackers", &ARGS.project) | |||||
.await | |||||
.context("hget trackers")?; | |||||
if config.is_none() { | |||||
panic!("Unable to get project config!"); | |||||
} | |||||
let config: config_json::Root = | |||||
serde_json::from_str(&config.unwrap()).context("parsing project config")?; | |||||
info!("Read config:\n{:#?}", config); | |||||
if let Some(r) = config.redis { | |||||
let mut u = Url::parse("redis://127.0.0.1/")?; | |||||
u.set_host(Some(&r.host)).unwrap(); | |||||
u.set_port(Some(r.port)).unwrap(); | |||||
u.set_username("default").unwrap(); | |||||
u.set_password(Some(&r.pass)).unwrap(); | |||||
info!("Found project redis server at {}!", u); | |||||
Ok(u) | |||||
} else { | |||||
info!("No project-specific redis config found; staying on this redis!"); | |||||
Ok(base_redis_url) | |||||
} | |||||
} |