Browse Source

Initial commit

master
Roelf Wichertjes 2 years ago
commit
7fdcab7d6c
Signed by: rewby GPG Key ID: 4C2B6D2972EE5423
9 changed files with 570 additions and 0 deletions
  1. +113
    -0
      .gitignore
  2. +27
    -0
      Cargo.toml
  3. +34
    -0
      src/args.rs
  4. +53
    -0
      src/compression.rs
  5. +75
    -0
      src/connector.rs
  6. +131
    -0
      src/main.rs
  7. +67
    -0
      src/pbar.rs
  8. +23
    -0
      src/project_config/config_json.rs
  9. +47
    -0
      src/project_config/mod.rs

+ 113
- 0
.gitignore View File

@@ -0,0 +1,113 @@

# Created by https://www.toptal.com/developers/gitignore/api/rust,intellij+all
# Edit at https://www.toptal.com/developers/gitignore?templates=rust,intellij+all

### Intellij+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# AWS User-specific
.idea/**/aws.xml

# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

### Intellij+all Patch ###
# Ignores the whole .idea folder and all .iml files
# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360

.idea/

# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023

*.iml
modules.xml
.idea/misc.xml
*.ipr

# Sonarlint plugin
.idea/sonarlint

### Rust ###
# Generated by Cargo
# will have compiled files and executables
debug/
target/

# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock

# These are backup files generated by rustfmt
**/*.rs.bk

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

# End of https://www.toptal.com/developers/gitignore/api/rust,intellij+all

+ 27
- 0
Cargo.toml View File

@@ -0,0 +1,27 @@
[package]
name = "queuectl"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
tokio = { version = "1.14.0", features = ["full"] }
anyhow = "1.0.51"
tracing = "0.1"
tracing-subscriber = "0.2"
clap = { version = "3.0.0-rc.0", features = ["derive"] }
redis = { version = "0.21.4", features = ["tokio-comp"] }
futures = "0.3"
tokio-stream = { version = "0.1.8", features = ["io-util"] }
async-compression = { version = "0.3.8", features = ["tokio", "all-algorithms"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
url = "2.2.2"
reqwest = { version = "0.11.7", features = ["rustls-tls-webpki-roots", "stream"], default-features = false }
tokio-util = { version = "0.6.9", features = ["io"] }
chrono = "0.4.19"
chrono-humanize = "0.2.1"
async-read-progress = "0.2.0"
indicatif = { version = "0.16.2", features = ["improved_unicode"] }
once_cell = "1.8.0"

+ 34
- 0
src/args.rs View File

@@ -0,0 +1,34 @@
use clap::Parser;
use once_cell::sync::Lazy;

#[derive(Parser)]
#[clap(name = "queuectl")]
#[clap(version = "1.0")]
pub struct Cli {
#[clap(short, long)]
/// Input path. Can be a file or a url that starts with http:// or https://.
pub input: String,
#[clap(short, long)]
/// Tracker slug for the project.
pub project: String,
#[clap(short, long, default_value = "todo")]
/// The queue where items will be queued into.
pub queue: String,
#[clap(long, default_value = "8192")]
/// How many items to send per SADD command.
pub chunk_size: usize,
#[clap(long, default_value = "redis://127.0.0.1/")]
/// URL of the redis to connect to.
pub redis: String,
#[clap(long, short, default_value = "auto", arg_enum)]
/// Specify the compression of the input file. By default will autodetect by file extension.
pub compression: crate::compression::CompressionMode,
#[clap(long, default_value = "32")]
/// How many commands to pipeline into redis.
pub pipeline_size: usize,
#[clap(long)]
/// Do not show a progress bar.
pub no_progressbar: bool,
}

pub static ARGS: Lazy<Cli> = Lazy::new(|| Cli::parse());

+ 53
- 0
src/compression.rs View File

@@ -0,0 +1,53 @@
use async_compression::tokio::bufread::{GzipDecoder, XzDecoder, ZstdDecoder};
use clap::ArgEnum;
use tokio::io::{AsyncRead, BufReader};

#[derive(Copy, Clone, PartialEq, Ord, PartialOrd, Eq, ArgEnum, Debug)]
pub enum CompressionMode {
AUTO,
NONE,
ZSTD,
GZIP,
XZ,
}

impl CompressionMode {
pub fn parse(filename: &str) -> Self {
if filename.to_ascii_lowercase().ends_with("zst") {
return Self::ZSTD;
}
if filename.to_ascii_lowercase().ends_with("zstd") {
return Self::ZSTD;
}
if filename.to_ascii_lowercase().ends_with("xz") {
return Self::XZ;
}
if filename.to_ascii_lowercase().ends_with("gz") {
return Self::GZIP;
}

return Self::NONE;
}
}

pub async fn get_decompressed_reader(
compression_mode: CompressionMode,
reader: Box<dyn AsyncRead + Unpin>,
) -> anyhow::Result<Box<dyn AsyncRead + Unpin>> {
match compression_mode {
CompressionMode::AUTO => unreachable!(),
CompressionMode::NONE => Ok(Box::new(reader)),
CompressionMode::ZSTD => {
let reader = BufReader::new(reader);
Ok(Box::new(ZstdDecoder::new(reader)))
}
CompressionMode::GZIP => {
let reader = BufReader::new(reader);
Ok(Box::new(GzipDecoder::new(reader)))
}
CompressionMode::XZ => {
let reader = BufReader::new(reader);
Ok(Box::new(XzDecoder::new(reader)))
}
}
}

+ 75
- 0
src/connector.rs View File

@@ -0,0 +1,75 @@
use anyhow::{anyhow, Context};
use async_read_progress::*;
use futures::stream::StreamExt;
use tokio::fs::File;
use tokio::io::AsyncRead;
use tokio_util::io::StreamReader;
use tracing::info;

use crate::args::ARGS;
use crate::compression;
use crate::pbar;

pub async fn open_http(
url: &str,
) -> anyhow::Result<(Box<dyn AsyncRead + Unpin>, Option<u64>, String)> {
info!("Requesting url {:?}...", url);
let client = reqwest::Client::new();
let resp = client
.get(url)
.send()
.await
.context("http client get request")?;
let filename = String::from(resp.url().path_segments().unwrap().last().unwrap());
info!("Completed request with code {}!", resp.status());
if !resp.status().is_success() {
return Err(anyhow!("incorrect status code attempting to retrieve file"));
}
let size = resp.content_length();
let stream = resp.bytes_stream();
let stream = stream.map(|v| v.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)));
Ok((Box::new(StreamReader::new(stream)), size, filename))
}

pub async fn open(
update_progressbar: bool,
) -> anyhow::Result<(Box<dyn AsyncRead + Unpin>, Option<u64>)> {
let (reader, total_bytes, filename): (Box<dyn AsyncRead + Unpin>, Option<u64>, String) =
if ARGS.input == "-" {
info!("Reading from stdin...");
(Box::new(tokio::io::stdin()), None, "dummy.txt".to_string())
} else {
if ARGS.input.starts_with("http") {
open_http(&ARGS.input).await?
} else {
info!("Opening file {}...", ARGS.input);
let file = File::open(&ARGS.input).await.context("open input file")?;
let meta = file.metadata().await.context("read input file metadata")?;
(Box::new(file), Some(meta.len()), ARGS.input.clone())
}
};

//Hook into the progress bar.
let reader = {
Box::new(
reader.report_progress(std::time::Duration::from_millis(20), move |bytes_read| {
if update_progressbar {
pbar::update_progress(bytes_read as u64);
}
}),
)
};

let compression_mode = if ARGS.compression == compression::CompressionMode::AUTO {
info!("Attempting to guess compression mode...");
compression::CompressionMode::parse(&filename)
} else {
ARGS.compression
};
info!("Using compression mode {:?}.", compression_mode);

info!("Attempting decompression...");
let reader = compression::get_decompressed_reader(compression_mode, reader).await?;
info!("Ok!");
Ok((reader, total_bytes))
}

+ 131
- 0
src/main.rs View File

@@ -0,0 +1,131 @@
use anyhow::Context;
use chrono::prelude::*;
use chrono_humanize::{Accuracy, HumanTime, Tense};
use futures::stream::StreamExt;
use tokio::io::AsyncBufReadExt;
use tokio::io::BufReader;
use tokio_stream::wrappers::LinesStream;
use tracing::{debug, info};

use args::ARGS;

mod args;
mod compression;
mod connector;
mod pbar;
mod project_config;

#[tokio::main]
async fn main() -> anyhow::Result<()> {
pbar::setup_logging();

let mut con = {
let redis_url = project_config::get_redis_url()
.await
.context("getting project redis url")?;
info!("Connecting to project redis...");
let proj_client = redis::Client::open(redis_url).context("project redis client connect")?;
let c = proj_client
.get_async_connection()
.await
.context("project redis get_async_connection")?;
info!("Connected!");
c
};

let (reader, total_bytes) = connector::open(true).await.context("opening input")?;

//Turn bytes into lines.
let reader = BufReader::new(reader);
let reader = reader.lines();
let reader = LinesStream::new(reader);

//Gather the lines into chunks of given size.
let reader = reader.chunks(ARGS.chunk_size);

//Gather the chunks into the pipelines.
let mut reader = reader.chunks(ARGS.pipeline_size);

//Keep track of some statistics.
let mut counter = 0usize;
let mut last_stamp: DateTime<Utc> = Utc::now();
let start_stamp = last_stamp.clone();

//Initialize the progress bar.
if !ARGS.no_progressbar {
if let Some(total_size) = total_bytes {
pbar::create_progress_bar(total_size);
}
}

let redis_key = format!("{}:{}", ARGS.project, ARGS.queue);
info!(
"Writing into key {:?} using {} items per SADD and {} SADDS per pipeline...",
redis_key, ARGS.chunk_size, ARGS.pipeline_size
);

//Main processing loop
while let Some(chunks) = reader.next().await {
let mut pipeline = &mut redis::pipe();
let mut pipeline_counter = 0usize;

//Create the pipeline commands.
for chunk in chunks.into_iter() {
let items: Vec<String> = chunk
.into_iter()
.collect::<std::io::Result<Vec<String>>>()
.context("reading items")?;

debug!("Queueing chunk of {} items into pipeline...", items.len());
pipeline_counter += items.len();
pipeline = pipeline.sadd(&redis_key, items).ignore();
}

//Submit pipeline
debug!("Submitting pipeline with {} items...", pipeline_counter);
pipeline
.query_async(&mut con)
.await
.context("performing pipeline query")?;
counter += pipeline_counter;

//Compute statistics.
let curr_stamp: DateTime<Utc> = Utc::now();
let elapsed = curr_stamp.signed_duration_since(last_stamp);
let elapsed_millis = elapsed.num_milliseconds();
let elapsed_secs = (elapsed_millis as f64) / 1000.0f64;
last_stamp = curr_stamp;
let ips = (pipeline_counter as f64) / elapsed_secs;

//Update the progressbar if it exists or print a message instead.
{
let pb = pbar::PROGRESS_BAR.lock().unwrap();
if let Some(pb) = pb.as_ref() {
pb.set_message(format!("{} items ({:.02} items/s)", counter, ips));
} else {
let ht = HumanTime::from(curr_stamp.signed_duration_since(start_stamp));
info!("Items queued! Inserted {} items so far in {}. Inserted {} items this round in {:.03} seconds. ({:.03} items/s)",
counter,
ht.to_text_en(Accuracy::Rough, Tense::Present),
pipeline_counter,
elapsed_secs,
ips
);
}
}
}

//We're done, close off the progress bar.
pbar::finish_progress_bar();

//Print some last information.
{
let end_stamp: DateTime<Utc> = Utc::now();
let ht = HumanTime::from(end_stamp.signed_duration_since(start_stamp));
info!(
"Finished queueing in {}.",
ht.to_text_en(Accuracy::Precise, Tense::Present)
);
}
Ok(())
}

+ 67
- 0
src/pbar.rs View File

@@ -0,0 +1,67 @@
use indicatif::{ProgressBar, ProgressStyle};
use once_cell::sync::Lazy;
use std::io::LineWriter;
use std::sync::Mutex;

pub static PROGRESS_BAR: Lazy<Mutex<Option<ProgressBar>>> = Lazy::new(|| Mutex::new(None));

pub fn update_progress(position: u64) {
let pb = PROGRESS_BAR.lock().unwrap();
if let Some(pb) = pb.as_ref() {
pb.set_position(position);
}
}

pub fn create_progress_bar(total_size: u64) {
let pb = ProgressBar::new(total_size);
pb.enable_steady_tick(100);
pb.set_style(ProgressStyle::default_bar()
.template("{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} {msg} ({eta} remaining)")
.progress_chars("=>-"));
*PROGRESS_BAR.lock().unwrap() = Some(pb);
}

pub fn finish_progress_bar() {
let mut pb = PROGRESS_BAR.lock().unwrap();
if let Some(pb) = pb.as_ref() {
pb.finish();
}
*pb = None;
}

pub struct PBWriter {}

impl PBWriter {
pub fn new() -> Self {
PBWriter {}
}
}

impl std::io::Write for PBWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let pb = PROGRESS_BAR.lock().unwrap();
if let Some(pb) = pb.as_ref() {
pb.println(std::str::from_utf8(buf).unwrap());
Ok(buf.len())
} else {
std::io::stderr().write(buf)
}
}

fn flush(&mut self) -> std::io::Result<()> {
std::io::stderr().flush()
}
}

pub fn setup_logging() {
std::env::set_var(
"RUST_LOG",
std::env::var("RUST_LOG").unwrap_or("info".to_string()),
);

tracing_subscriber::fmt::fmt()
.with_writer(move || -> Box<dyn std::io::Write> {
Box::new(LineWriter::new(PBWriter::new()))
})
.init();
}

+ 23
- 0
src/project_config/config_json.rs View File

@@ -0,0 +1,23 @@
use serde::{Deserialize, Serialize};

#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Root {
pub max_claims_soft: Option<i64>,
pub max_claims_hard: Option<i64>,
pub moving_average_interval: Option<i64>,
pub min_script_version: Option<String>,
pub title: String,
pub ignore_global_blocked: bool,
pub item_type: String,
pub history_length: i64,
pub domains: Option<serde_json::Value>,
pub valid_item_regexp: String,
pub redis: Option<Redis>,
}

#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Redis {
pub host: String,
pub port: u16,
pub pass: String,
}

+ 47
- 0
src/project_config/mod.rs View File

@@ -0,0 +1,47 @@
use crate::args::ARGS;
use anyhow::Context;
use redis::AsyncCommands;
use tracing::info;
use url::Url;

pub mod config_json;

pub async fn get_redis_url() -> anyhow::Result<Url> {
let base_redis_url = Url::parse(&ARGS.redis).context("parsing redis url")?;
info!(
"Connecting to redis {} to find project config...",
base_redis_url
);
let client = redis::Client::open(base_redis_url.clone()).context("connecting to redis")?;
let mut base_con = client
.get_async_connection()
.await
.context("get_async_connection")?;
info!("Connected!");

info!("Attempting to retrieve project configuration...");
let config: Option<String> = base_con
.hget("trackers", &ARGS.project)
.await
.context("hget trackers")?;
if config.is_none() {
panic!("Unable to get project config!");
}

let config: config_json::Root =
serde_json::from_str(&config.unwrap()).context("parsing project config")?;
info!("Read config:\n{:#?}", config);

if let Some(r) = config.redis {
let mut u = Url::parse("redis://127.0.0.1/")?;
u.set_host(Some(&r.host)).unwrap();
u.set_port(Some(r.port)).unwrap();
u.set_username("default").unwrap();
u.set_password(Some(&r.pass)).unwrap();
info!("Found project redis server at {}!", u);
Ok(u)
} else {
info!("No project-specific redis config found; staying on this redis!");
Ok(base_redis_url)
}
}

Loading…
Cancel
Save