use std::{
fmt::Display,
path::{Path, PathBuf},
str::FromStr,
};
use crate::{
networks::NetworkChain,
utils::{retry, RetryArgs},
};
use anyhow::{bail, Context as _};
use chrono::NaiveDate;
use tracing::event;
use url::Url;
use crate::cli_shared::snapshot::parse::ParsedFilename;
#[derive(
Debug,
Clone,
Copy,
Hash,
PartialEq,
Eq,
Default,
strum::EnumString, strum::Display, clap::ValueEnum, )]
#[strum(serialize_all = "kebab-case")]
pub enum TrustedVendor {
#[default]
Forest,
}
pub fn filename(
vendor: impl Display,
chain: impl Display,
date: NaiveDate,
height: i64,
forest_format: bool,
) -> String {
let vendor = vendor.to_string();
let chain = chain.to_string();
ParsedFilename::Full {
vendor: &vendor,
chain: &chain,
date,
height,
forest_format,
}
.to_string()
}
pub async fn fetch(
directory: &Path,
chain: &NetworkChain,
vendor: TrustedVendor,
) -> anyhow::Result<PathBuf> {
let (url, _len, path) = peek(vendor, chain).await?;
let (date, height, forest_format) = ParsedFilename::parse_str(&path)
.context("unexpected path format")?
.date_and_height_and_forest();
let filename = filename(vendor, chain, date, height, forest_format);
download_file_with_retry(&url, directory, &filename).await
}
pub async fn download_file_with_retry(
url: &Url,
directory: &Path,
filename: &str,
) -> anyhow::Result<PathBuf> {
Ok(retry(
RetryArgs {
timeout: None,
..Default::default()
},
|| download_http(url, directory, filename),
)
.await?)
}
pub async fn peek(
vendor: TrustedVendor,
chain: &NetworkChain,
) -> anyhow::Result<(Url, u64, String)> {
let stable_url = stable_url(vendor, chain)?;
let response = reqwest::get(stable_url)
.await?
.error_for_status()
.context("server returned an error response")?;
let final_url = response.url().clone();
let cd_path = response
.headers()
.get(reqwest::header::CONTENT_DISPOSITION)
.and_then(parse_content_disposition);
Ok((
final_url,
response
.content_length()
.context("no content-length header")?,
cd_path.context("no content-disposition filepath")?,
))
}
fn parse_content_disposition(value: &reqwest::header::HeaderValue) -> Option<String> {
use regex::Regex;
let re = Regex::new("filename=\"([^\"]+)\"").ok()?;
let cap = re.captures(value.to_str().ok()?)?;
Some(cap.get(1)?.as_str().to_owned())
}
async fn download_http(url: &Url, directory: &Path, filename: &str) -> anyhow::Result<PathBuf> {
let dst_path = directory.join(filename);
let destination = dst_path.display();
event!(target: "forest::snapshot", tracing::Level::INFO, %url, %destination, "downloading snapshot");
let mut reader = crate::utils::net::reader(url.as_str()).await?;
let tmp_dst_path = {
const DOWNLOAD_EXTENSION: &str = "frdownload";
let mut path = dst_path.clone();
if let Some(ext) = path.extension() {
path.set_extension(format!(
"{}.{DOWNLOAD_EXTENSION}",
ext.to_str().unwrap_or_default()
));
} else {
path.set_extension(DOWNLOAD_EXTENSION);
}
path
};
let mut tempfile = tokio::fs::File::create(&tmp_dst_path)
.await
.context("couldn't create destination file")?;
tokio::io::copy(&mut reader, &mut tempfile)
.await
.context("couldn't download file")?;
std::fs::rename(&tmp_dst_path, &dst_path).context("couldn't rename file")?;
Ok(dst_path)
}
macro_rules! define_urls {
($($vis:vis const $name:ident: &str = $value:literal;)* $(,)?) => {
$($vis const $name: &str = $value;)*
#[cfg(test)]
const ALL_URLS: &[&str] = [
$($name,)*
].as_slice();
};
}
define_urls!(
const FOREST_MAINNET_COMPRESSED: &str = "https://forest-archive.chainsafe.dev/latest/mainnet/";
const FOREST_CALIBNET_COMPRESSED: &str =
"https://forest-archive.chainsafe.dev/latest/calibnet/";
);
pub fn stable_url(vendor: TrustedVendor, chain: &NetworkChain) -> anyhow::Result<Url> {
let s = match (vendor, chain) {
(TrustedVendor::Forest, NetworkChain::Mainnet) => FOREST_MAINNET_COMPRESSED,
(TrustedVendor::Forest, NetworkChain::Calibnet) => FOREST_CALIBNET_COMPRESSED,
(TrustedVendor::Forest, NetworkChain::Butterflynet | NetworkChain::Devnet(_)) => {
bail!("unsupported chain {chain}")
}
};
Ok(Url::from_str(s).unwrap())
}
#[test]
fn parse_stable_urls() {
for url in ALL_URLS {
let _did_not_panic = Url::from_str(url).unwrap();
}
}
mod parse {
use std::{fmt::Display, str::FromStr};
use anyhow::{anyhow, bail};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use nom::{
branch::alt,
bytes::complete::{tag, take_until},
character::complete::digit1,
combinator::{map_res, recognize},
error::ErrorKind,
error_position,
multi::many1,
sequence::tuple,
Err,
};
use crate::db::car::forest::FOREST_CAR_FILE_EXTENSION;
#[derive(PartialEq, Debug, Clone, Hash)]
pub(super) enum ParsedFilename<'a> {
Short {
date: NaiveDate,
time: NaiveTime,
height: i64,
},
Full {
vendor: &'a str,
chain: &'a str,
date: NaiveDate,
height: i64,
forest_format: bool,
},
}
impl Display for ParsedFilename<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ParsedFilename::Short { date, time, height } => f.write_fmt(format_args!(
"{height}_{}.car.zst",
NaiveDateTime::new(*date, *time).format("%Y_%m_%dT%H_%M_%SZ")
)),
ParsedFilename::Full {
vendor,
chain,
date,
height,
forest_format,
} => f.write_fmt(format_args!(
"{vendor}_snapshot_{chain}_{}_height_{height}{}.car.zst",
date.format("%Y-%m-%d"),
if *forest_format { ".forest" } else { "" }
)),
}
}
}
impl<'a> ParsedFilename<'a> {
pub fn date_and_height_and_forest(&self) -> (NaiveDate, i64, bool) {
match self {
ParsedFilename::Short { date, height, .. } => (*date, *height, false),
ParsedFilename::Full {
date,
height,
forest_format,
..
} => (*date, *height, *forest_format),
}
}
pub fn parse_str(input: &'a str) -> anyhow::Result<Self> {
enter_nom(alt((short, full)), input)
}
}
fn number<T>(input: &str) -> nom::IResult<&str, T>
where
T: FromStr,
{
map_res(recognize(many1(digit1)), T::from_str)(input)
}
fn ymd(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveDate> + '_ {
move |input| {
let (rest, (year, _, month, _, day)) =
tuple((number, tag(separator), number, tag(separator), number))(input)?;
match NaiveDate::from_ymd_opt(year, month, day) {
Some(date) => Ok((rest, date)),
None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
}
}
}
fn hms(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveTime> + '_ {
move |input| {
let (rest, (hour, _, minute, _, second)) =
tuple((number, tag(separator), number, tag(separator), number))(input)?;
match NaiveTime::from_hms_opt(hour, minute, second) {
Some(date) => Ok((rest, date)),
None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
}
}
}
fn full(input: &str) -> nom::IResult<&str, ParsedFilename> {
let (rest, (vendor, _snapshot_, chain, _, date, _height_, height, car_zst)) =
tuple((
take_until("_snapshot_"),
tag("_snapshot_"),
take_until("_"),
tag("_"),
ymd("-"),
tag("_height_"),
number,
alt((tag(".car.zst"), tag(FOREST_CAR_FILE_EXTENSION))),
))(input)?;
Ok((
rest,
ParsedFilename::Full {
vendor,
chain,
date,
height,
forest_format: car_zst == FOREST_CAR_FILE_EXTENSION,
},
))
}
fn short(input: &str) -> nom::IResult<&str, ParsedFilename> {
let (rest, (height, _, date, _, time, _)) = tuple((
number,
tag("_"),
ymd("_"),
tag("T"),
hms("_"),
tag("Z.car.zst"),
))(input)?;
Ok((rest, ParsedFilename::Short { date, time, height }))
}
fn enter_nom<'a, T>(
mut parser: impl nom::Parser<&'a str, T, nom::error::Error<&'a str>>,
input: &'a str,
) -> anyhow::Result<T> {
let (rest, t) = parser
.parse(input)
.map_err(|e| anyhow!("Parser error: {e}"))?;
if !rest.is_empty() {
bail!("Unexpected trailing input: {rest}")
}
Ok(t)
}
#[test]
fn test_serialization() {
impl ParsedFilename<'static> {
fn short(
height: i64,
year: i32,
month: u32,
day: u32,
hour: u32,
min: u32,
sec: u32,
) -> Self {
Self::Short {
date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
time: NaiveTime::from_hms_opt(hour, min, sec).unwrap(),
height,
}
}
}
impl<'a> ParsedFilename<'a> {
fn full(
vendor: &'a str,
chain: &'a str,
year: i32,
month: u32,
day: u32,
height: i64,
forest_format: bool,
) -> Self {
Self::Full {
vendor,
chain,
date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
height,
forest_format,
}
}
}
for (text, value) in [
(
"forest_snapshot_mainnet_2023-05-30_height_2905376.car.zst",
ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, false),
),
(
"forest_snapshot_calibnet_2023-05-30_height_604419.car.zst",
ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, false),
),
(
"forest_snapshot_mainnet_2023-05-30_height_2905376.forest.car.zst",
ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, true),
),
(
"forest_snapshot_calibnet_2023-05-30_height_604419.forest.car.zst",
ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, true),
),
(
"2905920_2023_05_30T22_00_00Z.car.zst",
ParsedFilename::short(2905920, 2023, 5, 30, 22, 0, 0),
),
(
"605520_2023_05_31T00_13_00Z.car.zst",
ParsedFilename::short(605520, 2023, 5, 31, 0, 13, 0),
),
(
"filecoin_snapshot_calibnet_2023-06-13_height_643680.car.zst",
ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, false),
),
(
"venus_snapshot_pineconenet_2045-01-01_height_2.car.zst",
ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, false),
),
(
"filecoin_snapshot_calibnet_2023-06-13_height_643680.forest.car.zst",
ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, true),
),
(
"venus_snapshot_pineconenet_2045-01-01_height_2.forest.car.zst",
ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, true),
),
] {
assert_eq!(
value,
ParsedFilename::parse_str(text).unwrap(),
"mismatch in deserialize"
);
assert_eq!(value.to_string(), text, "mismatch in serialize");
}
}
}
#[cfg(test)]
mod tests {
use super::parse_content_disposition;
use reqwest::header::HeaderValue;
#[test]
fn content_disposition_forest() {
assert_eq!(
parse_content_disposition(&HeaderValue::from_static(
"attachment; filename*=UTF-8''forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst; \
filename=\"forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst\""
)).unwrap(),
"forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst"
);
}
}