3184 lines
100 KiB
Rust
3184 lines
100 KiB
Rust
use std::collections::{BTreeMap, HashMap};
|
|
use std::fs;
|
|
use std::path::Path;
|
|
|
|
use chrono::{NaiveDate, NaiveDateTime};
|
|
use serde::{Deserialize, Serialize};
|
|
use thiserror::Error;
|
|
|
|
use crate::calendar::TradingCalendar;
|
|
use crate::futures::{FuturesCommissionType, FuturesTradingParameter};
|
|
use crate::instrument::Instrument;
|
|
|
|
mod date_format {
|
|
use chrono::NaiveDate;
|
|
use serde::{self, Deserialize, Deserializer, Serializer};
|
|
|
|
const FORMAT: &str = "%Y-%m-%d";
|
|
|
|
pub fn serialize<S>(date: &NaiveDate, serializer: S) -> Result<S::Ok, S::Error>
|
|
where
|
|
S: Serializer,
|
|
{
|
|
serializer.serialize_str(&date.format(FORMAT).to_string())
|
|
}
|
|
|
|
pub fn deserialize<'de, D>(deserializer: D) -> Result<NaiveDate, D::Error>
|
|
where
|
|
D: Deserializer<'de>,
|
|
{
|
|
let text = String::deserialize(deserializer)?;
|
|
NaiveDate::parse_from_str(&text, FORMAT).map_err(serde::de::Error::custom)
|
|
}
|
|
}
|
|
|
|
mod datetime_format {
|
|
use chrono::NaiveDateTime;
|
|
use serde::{self, Deserialize, Deserializer, Serializer};
|
|
|
|
const FORMAT: &str = "%Y-%m-%d %H:%M:%S";
|
|
|
|
pub fn serialize<S>(date: &NaiveDateTime, serializer: S) -> Result<S::Ok, S::Error>
|
|
where
|
|
S: Serializer,
|
|
{
|
|
serializer.serialize_str(&date.format(FORMAT).to_string())
|
|
}
|
|
|
|
pub fn deserialize<'de, D>(deserializer: D) -> Result<NaiveDateTime, D::Error>
|
|
where
|
|
D: Deserializer<'de>,
|
|
{
|
|
let text = String::deserialize(deserializer)?;
|
|
NaiveDateTime::parse_from_str(&text, FORMAT).map_err(serde::de::Error::custom)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Error)]
|
|
pub enum DataSetError {
|
|
#[error("failed to read file {path}: {source}")]
|
|
Io {
|
|
path: String,
|
|
#[source]
|
|
source: std::io::Error,
|
|
},
|
|
#[error("invalid csv row in {path} at line {line}: {message}")]
|
|
InvalidRow {
|
|
path: String,
|
|
line: usize,
|
|
message: String,
|
|
},
|
|
#[error("benchmark file contains multiple benchmark codes")]
|
|
MultipleBenchmarks,
|
|
#[error("missing data for {kind} on {date} / {symbol}")]
|
|
MissingSnapshot {
|
|
kind: &'static str,
|
|
date: NaiveDate,
|
|
symbol: String,
|
|
},
|
|
#[error("benchmark snapshot missing for {date}")]
|
|
MissingBenchmark { date: NaiveDate },
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum PriceField {
|
|
DayOpen,
|
|
Open,
|
|
Close,
|
|
Last,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct DailyMarketSnapshot {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub symbol: String,
|
|
pub timestamp: Option<String>,
|
|
pub day_open: f64,
|
|
pub open: f64,
|
|
pub high: f64,
|
|
pub low: f64,
|
|
pub close: f64,
|
|
pub last_price: f64,
|
|
pub bid1: f64,
|
|
pub ask1: f64,
|
|
pub prev_close: f64,
|
|
pub volume: u64,
|
|
pub tick_volume: u64,
|
|
pub bid1_volume: u64,
|
|
pub ask1_volume: u64,
|
|
pub trading_phase: Option<String>,
|
|
pub paused: bool,
|
|
pub upper_limit: f64,
|
|
pub lower_limit: f64,
|
|
pub price_tick: f64,
|
|
}
|
|
|
|
impl DailyMarketSnapshot {
|
|
pub fn price(&self, field: PriceField) -> f64 {
|
|
match field {
|
|
PriceField::DayOpen => self.day_open,
|
|
PriceField::Open => self.open,
|
|
PriceField::Close => self.close,
|
|
PriceField::Last => self.last_price,
|
|
}
|
|
}
|
|
|
|
pub fn buy_price(&self, field: PriceField) -> f64 {
|
|
match field {
|
|
PriceField::Last if self.ask1.is_finite() && self.ask1 > 0.0 => self.ask1,
|
|
_ => self.price(field),
|
|
}
|
|
}
|
|
|
|
pub fn sell_price(&self, field: PriceField) -> f64 {
|
|
match field {
|
|
PriceField::Last if self.bid1.is_finite() && self.bid1 > 0.0 => self.bid1,
|
|
_ => self.price(field),
|
|
}
|
|
}
|
|
|
|
pub fn liquidity_for_buy(&self) -> u64 {
|
|
self.ask1_volume
|
|
}
|
|
|
|
pub fn liquidity_for_sell(&self) -> u64 {
|
|
self.bid1_volume
|
|
}
|
|
|
|
pub fn effective_price_tick(&self) -> f64 {
|
|
if self.price_tick.is_finite() && self.price_tick > 0.0 {
|
|
self.price_tick
|
|
} else {
|
|
0.01
|
|
}
|
|
}
|
|
|
|
pub fn is_at_upper_limit_price(&self, price: f64) -> bool {
|
|
if !self.upper_limit.is_finite() || self.upper_limit <= 0.0 {
|
|
return false;
|
|
}
|
|
price >= self.upper_limit - self.effective_price_tick() + 1e-6
|
|
}
|
|
|
|
pub fn is_at_lower_limit_price(&self, price: f64) -> bool {
|
|
if !self.lower_limit.is_finite() || self.lower_limit <= 0.0 {
|
|
return false;
|
|
}
|
|
price <= self.lower_limit + self.effective_price_tick() - 1e-6
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct DailyFactorSnapshot {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub symbol: String,
|
|
pub market_cap_bn: f64,
|
|
pub free_float_cap_bn: f64,
|
|
pub pe_ttm: f64,
|
|
pub turnover_ratio: Option<f64>,
|
|
pub effective_turnover_ratio: Option<f64>,
|
|
#[serde(default)]
|
|
pub extra_factors: BTreeMap<String, f64>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct BenchmarkSnapshot {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub benchmark: String,
|
|
pub open: f64,
|
|
pub close: f64,
|
|
pub prev_close: f64,
|
|
pub volume: u64,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct CandidateEligibility {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub symbol: String,
|
|
pub is_st: bool,
|
|
pub is_new_listing: bool,
|
|
pub is_paused: bool,
|
|
pub allow_buy: bool,
|
|
pub allow_sell: bool,
|
|
pub is_kcb: bool,
|
|
pub is_one_yuan: bool,
|
|
}
|
|
|
|
impl CandidateEligibility {
|
|
pub fn eligible_for_selection(&self) -> bool {
|
|
!self.is_st
|
|
&& !self.is_new_listing
|
|
&& !self.is_paused
|
|
&& !self.is_kcb
|
|
&& !self.is_one_yuan
|
|
&& self.allow_buy
|
|
&& self.allow_sell
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct CorporateAction {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub symbol: String,
|
|
#[serde(default, with = "optional_date_format")]
|
|
pub payable_date: Option<NaiveDate>,
|
|
pub share_cash: f64,
|
|
pub share_bonus: f64,
|
|
pub share_gift: f64,
|
|
pub issue_quantity: f64,
|
|
pub issue_price: f64,
|
|
pub reform: bool,
|
|
pub adjust_factor: Option<f64>,
|
|
#[serde(default)]
|
|
pub successor_symbol: Option<String>,
|
|
#[serde(default)]
|
|
pub successor_ratio: Option<f64>,
|
|
#[serde(default)]
|
|
pub successor_cash: Option<f64>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct IntradayExecutionQuote {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub symbol: String,
|
|
#[serde(with = "datetime_format")]
|
|
pub timestamp: NaiveDateTime,
|
|
pub last_price: f64,
|
|
pub bid1: f64,
|
|
pub ask1: f64,
|
|
pub bid1_volume: u64,
|
|
pub ask1_volume: u64,
|
|
#[serde(default)]
|
|
pub volume_delta: u64,
|
|
#[serde(default)]
|
|
pub amount_delta: f64,
|
|
pub trading_phase: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct IntradayOrderBookDepthLevel {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub symbol: String,
|
|
#[serde(with = "datetime_format")]
|
|
pub timestamp: NaiveDateTime,
|
|
pub level: u8,
|
|
pub bid_price: f64,
|
|
pub bid_volume: u64,
|
|
pub ask_price: f64,
|
|
pub ask_volume: u64,
|
|
}
|
|
|
|
impl IntradayOrderBookDepthLevel {
|
|
pub fn executable_price(&self, side: crate::events::OrderSide) -> Option<f64> {
|
|
match side {
|
|
crate::events::OrderSide::Buy if self.ask_price.is_finite() && self.ask_price > 0.0 => {
|
|
Some(self.ask_price)
|
|
}
|
|
crate::events::OrderSide::Sell
|
|
if self.bid_price.is_finite() && self.bid_price > 0.0 =>
|
|
{
|
|
Some(self.bid_price)
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
pub fn executable_volume(&self, side: crate::events::OrderSide) -> u64 {
|
|
match side {
|
|
crate::events::OrderSide::Buy => self.ask_volume,
|
|
crate::events::OrderSide::Sell => self.bid_volume,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl IntradayExecutionQuote {
|
|
pub fn buy_price(&self) -> Option<f64> {
|
|
if self.ask1.is_finite() && self.ask1 > 0.0 {
|
|
Some(self.ask1)
|
|
} else if self.last_price.is_finite() && self.last_price > 0.0 {
|
|
Some(self.last_price)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
pub fn sell_price(&self) -> Option<f64> {
|
|
if self.bid1.is_finite() && self.bid1 > 0.0 {
|
|
Some(self.bid1)
|
|
} else if self.last_price.is_finite() && self.last_price > 0.0 {
|
|
Some(self.last_price)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
impl CorporateAction {
|
|
pub fn split_ratio(&self) -> f64 {
|
|
1.0 + self.share_bonus.max(0.0) + self.share_gift.max(0.0)
|
|
}
|
|
|
|
pub fn has_effect(&self) -> bool {
|
|
self.share_cash.abs() > f64::EPSILON
|
|
|| (self.split_ratio() - 1.0).abs() > f64::EPSILON
|
|
|| self.issue_quantity.abs() > f64::EPSILON
|
|
|| self.reform
|
|
|| self.has_successor_conversion()
|
|
}
|
|
|
|
pub fn has_successor_conversion(&self) -> bool {
|
|
self.successor_symbol
|
|
.as_ref()
|
|
.is_some_and(|symbol| !symbol.trim().is_empty())
|
|
&& self.successor_ratio_value() > 0.0
|
|
}
|
|
|
|
pub fn successor_ratio_value(&self) -> f64 {
|
|
self.successor_ratio
|
|
.filter(|ratio| ratio.is_finite() && *ratio > 0.0)
|
|
.unwrap_or(1.0)
|
|
}
|
|
|
|
pub fn successor_cash_value(&self) -> f64 {
|
|
self.successor_cash
|
|
.filter(|cash| cash.is_finite())
|
|
.unwrap_or(0.0)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct DailySnapshotBundle {
|
|
pub date: NaiveDate,
|
|
pub benchmark: BenchmarkSnapshot,
|
|
pub market: Vec<DailyMarketSnapshot>,
|
|
pub factors: Vec<DailyFactorSnapshot>,
|
|
pub candidates: Vec<CandidateEligibility>,
|
|
pub corporate_actions: Vec<CorporateAction>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct PriceBar {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub timestamp: Option<String>,
|
|
pub symbol: String,
|
|
pub frequency: String,
|
|
pub open: f64,
|
|
pub high: f64,
|
|
pub low: f64,
|
|
pub close: f64,
|
|
pub last_price: f64,
|
|
pub volume: u64,
|
|
pub amount: f64,
|
|
pub bid1: f64,
|
|
pub ask1: f64,
|
|
pub bid1_volume: u64,
|
|
pub ask1_volume: u64,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct DividendRecord {
|
|
#[serde(with = "date_format")]
|
|
pub ex_dividend_date: NaiveDate,
|
|
#[serde(with = "date_format")]
|
|
pub payable_date: NaiveDate,
|
|
pub symbol: String,
|
|
pub dividend_cash_before_tax: f64,
|
|
pub round_lot: u32,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct SplitRecord {
|
|
#[serde(with = "date_format")]
|
|
pub ex_dividend_date: NaiveDate,
|
|
pub symbol: String,
|
|
pub split_ratio: f64,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct FactorValue {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub symbol: String,
|
|
pub field: String,
|
|
pub value: f64,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct FactorTextValue {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub symbol: String,
|
|
pub field: String,
|
|
pub value: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct SecuritiesMarginRecord {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub symbol: String,
|
|
pub field: String,
|
|
pub value: f64,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct YieldCurvePoint {
|
|
#[serde(with = "date_format")]
|
|
pub date: NaiveDate,
|
|
pub tenor: String,
|
|
pub value: f64,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct EligibleUniverseSnapshot {
|
|
pub symbol: String,
|
|
pub market_cap_bn: f64,
|
|
pub free_float_cap_bn: f64,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct SymbolPriceSeries {
|
|
snapshots: Vec<DailyMarketSnapshot>,
|
|
dates: Vec<NaiveDate>,
|
|
opens: Vec<f64>,
|
|
closes: Vec<f64>,
|
|
prev_closes: Vec<f64>,
|
|
last_prices: Vec<f64>,
|
|
open_prefix: Vec<f64>,
|
|
close_prefix: Vec<f64>,
|
|
prev_close_prefix: Vec<f64>,
|
|
last_prefix: Vec<f64>,
|
|
volume_prefix: Vec<f64>,
|
|
}
|
|
|
|
impl SymbolPriceSeries {
|
|
fn new(rows: &[DailyMarketSnapshot]) -> Self {
|
|
let mut sorted = rows.to_vec();
|
|
sorted.sort_by_key(|row| row.date);
|
|
|
|
let dates = sorted.iter().map(|row| row.date).collect::<Vec<_>>();
|
|
let opens = sorted.iter().map(|row| row.open).collect::<Vec<_>>();
|
|
let closes = sorted.iter().map(|row| row.close).collect::<Vec<_>>();
|
|
let prev_closes = sorted.iter().map(|row| row.prev_close).collect::<Vec<_>>();
|
|
let last_prices = sorted.iter().map(|row| row.last_price).collect::<Vec<_>>();
|
|
let volumes = sorted
|
|
.iter()
|
|
.map(|row| row.volume as f64)
|
|
.collect::<Vec<_>>();
|
|
let open_prefix = prefix_sums(&opens);
|
|
let close_prefix = prefix_sums(&closes);
|
|
let prev_close_prefix = prefix_sums(&prev_closes);
|
|
let last_prefix = prefix_sums(&last_prices);
|
|
let volume_prefix = prefix_sums(&volumes);
|
|
|
|
Self {
|
|
snapshots: sorted,
|
|
dates,
|
|
opens,
|
|
closes,
|
|
prev_closes,
|
|
last_prices,
|
|
open_prefix,
|
|
close_prefix,
|
|
prev_close_prefix,
|
|
last_prefix,
|
|
volume_prefix,
|
|
}
|
|
}
|
|
|
|
fn moving_average(&self, date: NaiveDate, lookback: usize, field: PriceField) -> Option<f64> {
|
|
if lookback == 0 {
|
|
return None;
|
|
}
|
|
let end = self.end_index(date)?;
|
|
if end < lookback {
|
|
return None;
|
|
}
|
|
let start = end - lookback;
|
|
let prefix = self.prefix_for(field);
|
|
let sum = prefix[end] - prefix[start];
|
|
Some(sum / lookback as f64)
|
|
}
|
|
|
|
fn trailing_values(&self, date: NaiveDate, lookback: usize, field: PriceField) -> Vec<f64> {
|
|
let Some(end) = self.end_index(date) else {
|
|
return Vec::new();
|
|
};
|
|
let start = end.saturating_sub(lookback);
|
|
self.values_for(field)[start..end].to_vec()
|
|
}
|
|
|
|
fn trailing_snapshots(
|
|
&self,
|
|
date: NaiveDate,
|
|
lookback: usize,
|
|
include_now: bool,
|
|
) -> Vec<DailyMarketSnapshot> {
|
|
if lookback == 0 {
|
|
return Vec::new();
|
|
}
|
|
let end = if include_now {
|
|
self.end_index(date)
|
|
} else {
|
|
self.previous_completed_end_index(date)
|
|
};
|
|
let Some(end) = end else {
|
|
return Vec::new();
|
|
};
|
|
let start = end.saturating_sub(lookback);
|
|
self.snapshots[start..end].to_vec()
|
|
}
|
|
|
|
fn decision_price_on_or_before(&self, date: NaiveDate) -> Option<f64> {
|
|
let end = self.decision_end_index(date)?;
|
|
if end == 0 {
|
|
return None;
|
|
}
|
|
self.prev_closes.get(end - 1).copied()
|
|
}
|
|
|
|
fn decision_end_index(&self, date: NaiveDate) -> Option<usize> {
|
|
match self.dates.binary_search(&date) {
|
|
Ok(idx) => Some(idx + 1),
|
|
Err(0) => None,
|
|
Err(idx) => Some(idx),
|
|
}
|
|
}
|
|
|
|
fn previous_completed_end_index(&self, date: NaiveDate) -> Option<usize> {
|
|
match self.dates.binary_search(&date) {
|
|
Ok(idx) => Some(idx),
|
|
Err(0) => None,
|
|
Err(idx) => Some(idx),
|
|
}
|
|
}
|
|
|
|
fn decision_close_moving_average(&self, date: NaiveDate, lookback: usize) -> Option<f64> {
|
|
if lookback == 0 {
|
|
return None;
|
|
}
|
|
let end = self.decision_end_index(date)?;
|
|
if end < lookback {
|
|
return None;
|
|
}
|
|
let start = end - lookback;
|
|
let sum = self.prev_close_prefix[end] - self.prev_close_prefix[start];
|
|
Some(sum / lookback as f64)
|
|
}
|
|
|
|
fn decision_volume_moving_average(&self, date: NaiveDate, lookback: usize) -> Option<f64> {
|
|
if lookback == 0 {
|
|
return None;
|
|
}
|
|
let end = self.previous_completed_end_index(date)?;
|
|
if end < lookback {
|
|
return None;
|
|
}
|
|
let start = end - lookback;
|
|
let sum = self.volume_prefix[end] - self.volume_prefix[start];
|
|
Some(sum / lookback as f64)
|
|
}
|
|
|
|
fn end_index(&self, date: NaiveDate) -> Option<usize> {
|
|
match self.dates.binary_search(&date) {
|
|
Ok(idx) => Some(idx + 1),
|
|
Err(0) => None,
|
|
Err(idx) => Some(idx),
|
|
}
|
|
}
|
|
|
|
fn values_for(&self, field: PriceField) -> &[f64] {
|
|
match field {
|
|
PriceField::DayOpen => &self.opens,
|
|
PriceField::Open => &self.opens,
|
|
PriceField::Close => &self.closes,
|
|
PriceField::Last => &self.last_prices,
|
|
}
|
|
}
|
|
|
|
fn price_on_or_before(&self, date: NaiveDate, field: PriceField) -> Option<f64> {
|
|
let end = self.end_index(date)?;
|
|
if end == 0 {
|
|
return None;
|
|
}
|
|
self.values_for(field).get(end - 1).copied()
|
|
}
|
|
|
|
fn prefix_for(&self, field: PriceField) -> &[f64] {
|
|
match field {
|
|
PriceField::DayOpen => &self.open_prefix,
|
|
PriceField::Open => &self.open_prefix,
|
|
PriceField::Close => &self.close_prefix,
|
|
PriceField::Last => &self.last_prefix,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct BenchmarkPriceSeries {
|
|
dates: Vec<NaiveDate>,
|
|
closes: Vec<f64>,
|
|
open_prefix: Vec<f64>,
|
|
close_prefix: Vec<f64>,
|
|
}
|
|
|
|
impl BenchmarkPriceSeries {
|
|
fn new(rows: &[BenchmarkSnapshot]) -> Self {
|
|
let mut sorted = rows.to_vec();
|
|
sorted.sort_by_key(|row| row.date);
|
|
let dates = sorted.iter().map(|row| row.date).collect::<Vec<_>>();
|
|
let opens = sorted.iter().map(|row| row.open).collect::<Vec<_>>();
|
|
let closes = sorted.iter().map(|row| row.close).collect::<Vec<_>>();
|
|
let open_prefix = prefix_sums(&opens);
|
|
let close_prefix = prefix_sums(&closes);
|
|
Self {
|
|
dates,
|
|
closes,
|
|
open_prefix,
|
|
close_prefix,
|
|
}
|
|
}
|
|
|
|
fn moving_average(&self, date: NaiveDate, lookback: usize) -> Option<f64> {
|
|
self.moving_average_for(date, lookback, PriceField::Close)
|
|
}
|
|
|
|
fn moving_average_for(
|
|
&self,
|
|
date: NaiveDate,
|
|
lookback: usize,
|
|
field: PriceField,
|
|
) -> Option<f64> {
|
|
if lookback == 0 {
|
|
return None;
|
|
}
|
|
let end = match self.dates.binary_search(&date) {
|
|
Ok(idx) => idx + 1,
|
|
Err(0) => return None,
|
|
Err(idx) => idx,
|
|
};
|
|
if end < lookback {
|
|
return None;
|
|
}
|
|
let start = end - lookback;
|
|
let prefix = match field {
|
|
PriceField::DayOpen | PriceField::Open => &self.open_prefix,
|
|
PriceField::Close | PriceField::Last => &self.close_prefix,
|
|
};
|
|
let sum = prefix[end] - prefix[start];
|
|
Some(sum / lookback as f64)
|
|
}
|
|
|
|
fn trailing_values(&self, date: NaiveDate, lookback: usize) -> Vec<f64> {
|
|
let end = match self.dates.binary_search(&date) {
|
|
Ok(idx) => idx + 1,
|
|
Err(0) => return Vec::new(),
|
|
Err(idx) => idx,
|
|
};
|
|
let start = end.saturating_sub(lookback);
|
|
self.closes[start..end].to_vec()
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct DataSet {
|
|
instruments: HashMap<String, Instrument>,
|
|
calendar: TradingCalendar,
|
|
market_by_date: BTreeMap<NaiveDate, Vec<DailyMarketSnapshot>>,
|
|
market_index: HashMap<(NaiveDate, String), DailyMarketSnapshot>,
|
|
factor_by_date: BTreeMap<NaiveDate, Vec<DailyFactorSnapshot>>,
|
|
factor_index: HashMap<(NaiveDate, String), DailyFactorSnapshot>,
|
|
factor_text_by_date: BTreeMap<NaiveDate, Vec<FactorTextValue>>,
|
|
factor_text_index: HashMap<(NaiveDate, String, String), FactorTextValue>,
|
|
candidate_by_date: BTreeMap<NaiveDate, Vec<CandidateEligibility>>,
|
|
candidate_index: HashMap<(NaiveDate, String), CandidateEligibility>,
|
|
corporate_actions_by_date: BTreeMap<NaiveDate, Vec<CorporateAction>>,
|
|
execution_quotes_index: HashMap<(NaiveDate, String), Vec<IntradayExecutionQuote>>,
|
|
order_book_depth_index: HashMap<(NaiveDate, String), Vec<IntradayOrderBookDepthLevel>>,
|
|
benchmark_by_date: BTreeMap<NaiveDate, BenchmarkSnapshot>,
|
|
market_series_by_symbol: HashMap<String, SymbolPriceSeries>,
|
|
benchmark_series_cache: BenchmarkPriceSeries,
|
|
eligible_universe_by_date: BTreeMap<NaiveDate, Vec<EligibleUniverseSnapshot>>,
|
|
benchmark_code: String,
|
|
futures_params_by_symbol: HashMap<String, Vec<FuturesTradingParameter>>,
|
|
}
|
|
|
|
impl DataSet {
|
|
pub fn from_csv_dir(path: &Path) -> Result<Self, DataSetError> {
|
|
let instruments = read_instruments(&path.join("instruments.csv"))?;
|
|
let market = read_market(&path.join("market.csv"))?;
|
|
let factors = read_factors(&path.join("factors.csv"))?;
|
|
let factor_texts = read_factor_texts(&path.join("factors.csv"))?;
|
|
let candidates = read_candidates(&path.join("candidate_flags.csv"))?;
|
|
let benchmarks = read_benchmarks(&path.join("benchmark.csv"))?;
|
|
let corporate_actions_path = path.join("corporate_actions.csv");
|
|
let corporate_actions = if corporate_actions_path.exists() {
|
|
read_corporate_actions(&corporate_actions_path)?
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
let execution_quotes_path = path.join("execution_quotes.csv");
|
|
let execution_quotes = if execution_quotes_path.exists() {
|
|
read_execution_quotes(&execution_quotes_path)?
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
let futures_params_path = path.join("futures_trading_parameters.csv");
|
|
let futures_params = if futures_params_path.exists() {
|
|
read_futures_trading_parameters(&futures_params_path)?
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
let order_book_depth_path = path.join("order_book_depth.csv");
|
|
let order_book_depth = if order_book_depth_path.exists() {
|
|
read_order_book_depth(&order_book_depth_path)?
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
Self::from_components_with_actions_quotes_futures_depth_and_factor_texts(
|
|
instruments,
|
|
market,
|
|
factors,
|
|
candidates,
|
|
benchmarks,
|
|
corporate_actions,
|
|
execution_quotes,
|
|
futures_params,
|
|
order_book_depth,
|
|
factor_texts,
|
|
)
|
|
}
|
|
|
|
pub fn from_partitioned_dir(path: &Path) -> Result<Self, DataSetError> {
|
|
let instruments = read_instruments(&path.join("instruments.csv"))?;
|
|
let benchmarks = read_partitioned_dir(&path.join("benchmark"), read_benchmarks)?;
|
|
let market = read_partitioned_dir(&path.join("market"), read_market)?;
|
|
let factors = read_partitioned_dir(&path.join("factors"), read_factors)?;
|
|
let factor_texts = read_partitioned_dir(&path.join("factors"), read_factor_texts)?;
|
|
let candidates = read_partitioned_dir(&path.join("candidates"), read_candidates)?;
|
|
let corporate_actions_dir = path.join("corporate_actions");
|
|
let corporate_actions = if corporate_actions_dir.exists() {
|
|
read_partitioned_dir(&corporate_actions_dir, read_corporate_actions)?
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
let execution_quotes_dir = path.join("execution_quotes");
|
|
let execution_quotes = if execution_quotes_dir.exists() {
|
|
read_partitioned_dir(&execution_quotes_dir, read_execution_quotes)?
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
let futures_params_dir = path.join("futures_trading_parameters");
|
|
let futures_params = if futures_params_dir.exists() {
|
|
read_partitioned_dir(&futures_params_dir, read_futures_trading_parameters)?
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
let order_book_depth_dir = path.join("order_book_depth");
|
|
let order_book_depth = if order_book_depth_dir.exists() {
|
|
read_partitioned_dir(&order_book_depth_dir, read_order_book_depth)?
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
Self::from_components_with_actions_quotes_futures_depth_and_factor_texts(
|
|
instruments,
|
|
market,
|
|
factors,
|
|
candidates,
|
|
benchmarks,
|
|
corporate_actions,
|
|
execution_quotes,
|
|
futures_params,
|
|
order_book_depth,
|
|
factor_texts,
|
|
)
|
|
}
|
|
|
|
pub fn from_components(
|
|
instruments: Vec<Instrument>,
|
|
market: Vec<DailyMarketSnapshot>,
|
|
factors: Vec<DailyFactorSnapshot>,
|
|
candidates: Vec<CandidateEligibility>,
|
|
benchmarks: Vec<BenchmarkSnapshot>,
|
|
) -> Result<Self, DataSetError> {
|
|
Self::from_components_with_actions_and_quotes(
|
|
instruments,
|
|
market,
|
|
factors,
|
|
candidates,
|
|
benchmarks,
|
|
Vec::new(),
|
|
Vec::new(),
|
|
)
|
|
}
|
|
|
|
pub fn from_components_with_actions(
|
|
instruments: Vec<Instrument>,
|
|
market: Vec<DailyMarketSnapshot>,
|
|
factors: Vec<DailyFactorSnapshot>,
|
|
candidates: Vec<CandidateEligibility>,
|
|
benchmarks: Vec<BenchmarkSnapshot>,
|
|
corporate_actions: Vec<CorporateAction>,
|
|
) -> Result<Self, DataSetError> {
|
|
Self::from_components_with_actions_and_quotes(
|
|
instruments,
|
|
market,
|
|
factors,
|
|
candidates,
|
|
benchmarks,
|
|
corporate_actions,
|
|
Vec::new(),
|
|
)
|
|
}
|
|
|
|
pub fn from_components_with_actions_and_quotes(
|
|
instruments: Vec<Instrument>,
|
|
market: Vec<DailyMarketSnapshot>,
|
|
factors: Vec<DailyFactorSnapshot>,
|
|
candidates: Vec<CandidateEligibility>,
|
|
benchmarks: Vec<BenchmarkSnapshot>,
|
|
corporate_actions: Vec<CorporateAction>,
|
|
execution_quotes: Vec<IntradayExecutionQuote>,
|
|
) -> Result<Self, DataSetError> {
|
|
Self::from_components_with_actions_quotes_and_futures(
|
|
instruments,
|
|
market,
|
|
factors,
|
|
candidates,
|
|
benchmarks,
|
|
corporate_actions,
|
|
execution_quotes,
|
|
Vec::new(),
|
|
)
|
|
}
|
|
|
|
pub fn from_components_with_actions_quotes_and_futures(
|
|
instruments: Vec<Instrument>,
|
|
market: Vec<DailyMarketSnapshot>,
|
|
factors: Vec<DailyFactorSnapshot>,
|
|
candidates: Vec<CandidateEligibility>,
|
|
benchmarks: Vec<BenchmarkSnapshot>,
|
|
corporate_actions: Vec<CorporateAction>,
|
|
execution_quotes: Vec<IntradayExecutionQuote>,
|
|
futures_params: Vec<FuturesTradingParameter>,
|
|
) -> Result<Self, DataSetError> {
|
|
Self::from_components_with_actions_quotes_futures_and_depth(
|
|
instruments,
|
|
market,
|
|
factors,
|
|
candidates,
|
|
benchmarks,
|
|
corporate_actions,
|
|
execution_quotes,
|
|
futures_params,
|
|
Vec::new(),
|
|
)
|
|
}
|
|
|
|
pub fn from_components_with_actions_quotes_futures_and_depth(
|
|
instruments: Vec<Instrument>,
|
|
market: Vec<DailyMarketSnapshot>,
|
|
factors: Vec<DailyFactorSnapshot>,
|
|
candidates: Vec<CandidateEligibility>,
|
|
benchmarks: Vec<BenchmarkSnapshot>,
|
|
corporate_actions: Vec<CorporateAction>,
|
|
execution_quotes: Vec<IntradayExecutionQuote>,
|
|
futures_params: Vec<FuturesTradingParameter>,
|
|
order_book_depth: Vec<IntradayOrderBookDepthLevel>,
|
|
) -> Result<Self, DataSetError> {
|
|
Self::from_components_with_actions_quotes_futures_depth_and_factor_texts(
|
|
instruments,
|
|
market,
|
|
factors,
|
|
candidates,
|
|
benchmarks,
|
|
corporate_actions,
|
|
execution_quotes,
|
|
futures_params,
|
|
order_book_depth,
|
|
Vec::new(),
|
|
)
|
|
}
|
|
|
|
pub fn from_components_with_factor_texts(
|
|
instruments: Vec<Instrument>,
|
|
market: Vec<DailyMarketSnapshot>,
|
|
factors: Vec<DailyFactorSnapshot>,
|
|
candidates: Vec<CandidateEligibility>,
|
|
benchmarks: Vec<BenchmarkSnapshot>,
|
|
factor_texts: Vec<FactorTextValue>,
|
|
) -> Result<Self, DataSetError> {
|
|
Self::from_components_with_actions_quotes_futures_depth_and_factor_texts(
|
|
instruments,
|
|
market,
|
|
factors,
|
|
candidates,
|
|
benchmarks,
|
|
Vec::new(),
|
|
Vec::new(),
|
|
Vec::new(),
|
|
Vec::new(),
|
|
factor_texts,
|
|
)
|
|
}
|
|
|
|
pub fn from_components_with_actions_quotes_futures_depth_and_factor_texts(
|
|
instruments: Vec<Instrument>,
|
|
market: Vec<DailyMarketSnapshot>,
|
|
factors: Vec<DailyFactorSnapshot>,
|
|
candidates: Vec<CandidateEligibility>,
|
|
benchmarks: Vec<BenchmarkSnapshot>,
|
|
corporate_actions: Vec<CorporateAction>,
|
|
execution_quotes: Vec<IntradayExecutionQuote>,
|
|
futures_params: Vec<FuturesTradingParameter>,
|
|
order_book_depth: Vec<IntradayOrderBookDepthLevel>,
|
|
factor_texts: Vec<FactorTextValue>,
|
|
) -> Result<Self, DataSetError> {
|
|
let benchmark_code = collect_benchmark_code(&benchmarks)?;
|
|
let calendar = TradingCalendar::new(benchmarks.iter().map(|item| item.date).collect());
|
|
|
|
let instruments = instruments
|
|
.into_iter()
|
|
.map(|instrument| (instrument.symbol.clone(), instrument))
|
|
.collect::<HashMap<_, _>>();
|
|
|
|
let market_by_date = group_by_date(market.clone(), |item| item.date);
|
|
let market_index = market
|
|
.into_iter()
|
|
.map(|item| ((item.date, item.symbol.clone()), item))
|
|
.collect::<HashMap<_, _>>();
|
|
|
|
let factor_by_date = group_by_date(factors.clone(), |item| item.date);
|
|
let factor_index = factors
|
|
.into_iter()
|
|
.map(|item| ((item.date, item.symbol.clone()), item))
|
|
.collect::<HashMap<_, _>>();
|
|
let factor_texts = factor_texts
|
|
.into_iter()
|
|
.filter_map(|mut item| {
|
|
item.field = normalize_field(&item.field);
|
|
if item.field.is_empty() {
|
|
None
|
|
} else {
|
|
Some(item)
|
|
}
|
|
})
|
|
.collect::<Vec<_>>();
|
|
let factor_text_by_date = group_by_date(factor_texts.clone(), |item| item.date);
|
|
let factor_text_index = factor_texts
|
|
.into_iter()
|
|
.map(|item| ((item.date, item.symbol.clone(), item.field.clone()), item))
|
|
.collect::<HashMap<_, _>>();
|
|
|
|
let candidate_by_date = group_by_date(candidates.clone(), |item| item.date);
|
|
let candidate_index = candidates
|
|
.into_iter()
|
|
.map(|item| ((item.date, item.symbol.clone()), item))
|
|
.collect::<HashMap<_, _>>();
|
|
let corporate_actions_by_date = group_by_date(corporate_actions, |item| item.date);
|
|
let execution_quotes_index = build_execution_quote_index(execution_quotes);
|
|
let order_book_depth_index = build_order_book_depth_index(order_book_depth);
|
|
|
|
let benchmark_by_date = benchmarks
|
|
.into_iter()
|
|
.map(|item| (item.date, item))
|
|
.collect::<BTreeMap<_, _>>();
|
|
let market_series_by_symbol = build_market_series(&market_by_date);
|
|
let benchmark_series_cache =
|
|
BenchmarkPriceSeries::new(&benchmark_by_date.values().cloned().collect::<Vec<_>>());
|
|
let eligible_universe_by_date =
|
|
build_eligible_universe(&factor_by_date, &candidate_index, &market_index);
|
|
let futures_params_by_symbol = build_futures_params_index(futures_params);
|
|
|
|
Ok(Self {
|
|
instruments,
|
|
calendar,
|
|
market_by_date,
|
|
market_index,
|
|
factor_by_date,
|
|
factor_index,
|
|
factor_text_by_date,
|
|
factor_text_index,
|
|
candidate_by_date,
|
|
candidate_index,
|
|
corporate_actions_by_date,
|
|
execution_quotes_index,
|
|
order_book_depth_index,
|
|
benchmark_by_date,
|
|
market_series_by_symbol,
|
|
benchmark_series_cache,
|
|
eligible_universe_by_date,
|
|
benchmark_code,
|
|
futures_params_by_symbol,
|
|
})
|
|
}
|
|
|
|
pub fn calendar(&self) -> &TradingCalendar {
|
|
&self.calendar
|
|
}
|
|
|
|
pub fn benchmark_code(&self) -> &str {
|
|
&self.benchmark_code
|
|
}
|
|
|
|
pub fn instruments(&self) -> &HashMap<String, Instrument> {
|
|
&self.instruments
|
|
}
|
|
|
|
pub fn all_instruments(&self) -> Vec<&Instrument> {
|
|
let mut instruments = self.instruments.values().collect::<Vec<_>>();
|
|
instruments.sort_by(|left, right| left.symbol.cmp(&right.symbol));
|
|
instruments
|
|
}
|
|
|
|
pub fn instruments_history(&self, symbols: &[&str]) -> Vec<&Instrument> {
|
|
symbols
|
|
.iter()
|
|
.filter_map(|symbol| self.instruments.get(*symbol))
|
|
.collect()
|
|
}
|
|
|
|
pub fn active_instruments(&self, date: NaiveDate, symbols: &[&str]) -> Vec<&Instrument> {
|
|
symbols
|
|
.iter()
|
|
.filter_map(|symbol| self.instruments.get(*symbol))
|
|
.filter(|instrument| instrument.is_active_on(date))
|
|
.collect()
|
|
}
|
|
|
|
pub fn instrument(&self, symbol: &str) -> Option<&Instrument> {
|
|
self.instruments.get(symbol)
|
|
}
|
|
|
|
pub fn market(&self, date: NaiveDate, symbol: &str) -> Option<&DailyMarketSnapshot> {
|
|
self.market_index.get(&(date, symbol.to_string()))
|
|
}
|
|
|
|
pub fn factor(&self, date: NaiveDate, symbol: &str) -> Option<&DailyFactorSnapshot> {
|
|
self.factor_index.get(&(date, symbol.to_string()))
|
|
}
|
|
|
|
pub fn candidate(&self, date: NaiveDate, symbol: &str) -> Option<&CandidateEligibility> {
|
|
self.candidate_index.get(&(date, symbol.to_string()))
|
|
}
|
|
|
|
pub fn benchmark(&self, date: NaiveDate) -> Option<&BenchmarkSnapshot> {
|
|
self.benchmark_by_date.get(&date)
|
|
}
|
|
|
|
pub fn corporate_actions_on(&self, date: NaiveDate) -> &[CorporateAction] {
|
|
self.corporate_actions_by_date
|
|
.get(&date)
|
|
.map(Vec::as_slice)
|
|
.unwrap_or(&[])
|
|
}
|
|
|
|
pub fn execution_quotes_on(&self, date: NaiveDate, symbol: &str) -> &[IntradayExecutionQuote] {
|
|
self.execution_quotes_index
|
|
.get(&(date, symbol.to_string()))
|
|
.map(Vec::as_slice)
|
|
.unwrap_or(&[])
|
|
}
|
|
|
|
pub fn order_book_depth_on(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
) -> &[IntradayOrderBookDepthLevel] {
|
|
self.order_book_depth_index
|
|
.get(&(date, symbol.to_string()))
|
|
.map(Vec::as_slice)
|
|
.unwrap_or(&[])
|
|
}
|
|
|
|
pub fn execution_quotes_on_date(&self, date: NaiveDate) -> Vec<IntradayExecutionQuote> {
|
|
let mut quotes = self
|
|
.execution_quotes_index
|
|
.iter()
|
|
.filter(|((quote_date, _), _)| *quote_date == date)
|
|
.flat_map(|(_, rows)| rows.iter().cloned())
|
|
.collect::<Vec<_>>();
|
|
quotes.sort_by(|left, right| {
|
|
left.timestamp
|
|
.cmp(&right.timestamp)
|
|
.then_with(|| left.symbol.cmp(&right.symbol))
|
|
});
|
|
quotes
|
|
}
|
|
|
|
pub fn benchmark_series(&self) -> Vec<BenchmarkSnapshot> {
|
|
self.benchmark_by_date.values().cloned().collect()
|
|
}
|
|
|
|
pub fn futures_trading_parameter(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
) -> Option<&FuturesTradingParameter> {
|
|
self.futures_params_by_symbol.get(symbol).and_then(|rows| {
|
|
rows.iter()
|
|
.rev()
|
|
.find(|row| row.effective_date.is_none_or(|effective| effective <= date))
|
|
})
|
|
}
|
|
|
|
pub fn futures_settlement_price(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
mode: &str,
|
|
) -> Option<f64> {
|
|
let snapshot = self.market(date, symbol)?;
|
|
match normalize_field(mode).as_str() {
|
|
"settlement" | "settle" => self
|
|
.factor_numeric_value(date, symbol, "settlement")
|
|
.or_else(|| self.factor_numeric_value(date, symbol, "settle"))
|
|
.or(Some(snapshot.close)),
|
|
"prev_settlement" | "pre_settlement" => self
|
|
.factor_numeric_value(date, symbol, "prev_settlement")
|
|
.or_else(|| self.factor_numeric_value(date, symbol, "pre_settlement"))
|
|
.or(Some(snapshot.prev_close)),
|
|
_ => Some(snapshot.close),
|
|
}
|
|
}
|
|
|
|
pub fn history_bars(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
bar_count: usize,
|
|
frequency: &str,
|
|
field: &str,
|
|
include_now: bool,
|
|
) -> Vec<f64> {
|
|
self.history_bars_at(date, None, symbol, bar_count, frequency, field, include_now)
|
|
}
|
|
|
|
pub fn history_bars_at(
|
|
&self,
|
|
date: NaiveDate,
|
|
active_datetime: Option<NaiveDateTime>,
|
|
symbol: &str,
|
|
bar_count: usize,
|
|
frequency: &str,
|
|
field: &str,
|
|
include_now: bool,
|
|
) -> Vec<f64> {
|
|
if bar_count == 0 {
|
|
return Vec::new();
|
|
}
|
|
match normalize_history_frequency(frequency).as_deref() {
|
|
Some("1d") => self.history_daily_values(date, symbol, bar_count, field, include_now),
|
|
Some("1m") | Some("tick") => self.history_intraday_values(
|
|
date,
|
|
active_datetime,
|
|
symbol,
|
|
bar_count,
|
|
field,
|
|
include_now,
|
|
),
|
|
_ => Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn history_daily_snapshots(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
bar_count: usize,
|
|
include_now: bool,
|
|
) -> Vec<DailyMarketSnapshot> {
|
|
self.market_series_by_symbol
|
|
.get(symbol)
|
|
.map(|series| series.trailing_snapshots(date, bar_count, include_now))
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
pub fn history_intraday_quotes(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
bar_count: usize,
|
|
include_now: bool,
|
|
) -> Vec<IntradayExecutionQuote> {
|
|
self.history_intraday_quotes_at(date, None, symbol, bar_count, include_now)
|
|
}
|
|
|
|
pub fn history_intraday_quotes_at(
|
|
&self,
|
|
date: NaiveDate,
|
|
active_datetime: Option<NaiveDateTime>,
|
|
symbol: &str,
|
|
bar_count: usize,
|
|
include_now: bool,
|
|
) -> Vec<IntradayExecutionQuote> {
|
|
if bar_count == 0 {
|
|
return Vec::new();
|
|
}
|
|
let mut quotes = self
|
|
.execution_quotes_index
|
|
.iter()
|
|
.filter(|((_, quote_symbol), _)| quote_symbol == symbol)
|
|
.flat_map(|(_, rows)| rows.iter())
|
|
.filter(|quote| intraday_quote_visible(quote, date, active_datetime, include_now))
|
|
.cloned()
|
|
.collect::<Vec<_>>();
|
|
quotes.sort_by_key(|quote| quote.timestamp);
|
|
take_last(quotes, bar_count)
|
|
}
|
|
|
|
pub fn trading_dates(&self, start: NaiveDate, end: NaiveDate) -> Vec<NaiveDate> {
|
|
self.calendar.trading_dates(start, end)
|
|
}
|
|
|
|
pub fn previous_trading_date(&self, date: NaiveDate, n: usize) -> Option<NaiveDate> {
|
|
self.calendar.previous_trading_date(date, n)
|
|
}
|
|
|
|
pub fn next_trading_date(&self, date: NaiveDate, n: usize) -> Option<NaiveDate> {
|
|
self.calendar.next_trading_date(date, n)
|
|
}
|
|
|
|
pub fn is_suspended_flags(&self, date: NaiveDate, symbol: &str, count: usize) -> Vec<bool> {
|
|
self.historical_daily_flags(date, symbol, count, |candidate, market| {
|
|
candidate.is_some_and(|row| row.is_paused) || market.is_some_and(|row| row.paused)
|
|
})
|
|
}
|
|
|
|
pub fn is_st_stock_flags(&self, date: NaiveDate, symbol: &str, count: usize) -> Vec<bool> {
|
|
self.historical_daily_flags(date, symbol, count, |candidate, _| {
|
|
candidate.is_some_and(|row| row.is_st)
|
|
})
|
|
}
|
|
|
|
pub fn get_dividend(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
) -> Vec<DividendRecord> {
|
|
let mut rows = self
|
|
.corporate_actions_by_date
|
|
.range(start..=end)
|
|
.flat_map(|(_, actions)| actions.iter())
|
|
.filter(|action| action.symbol == symbol && action.share_cash.abs() > f64::EPSILON)
|
|
.map(|action| DividendRecord {
|
|
ex_dividend_date: action.date,
|
|
payable_date: action.payable_date.unwrap_or(action.date),
|
|
symbol: action.symbol.clone(),
|
|
dividend_cash_before_tax: action.share_cash,
|
|
round_lot: self
|
|
.instrument(symbol)
|
|
.map(Instrument::effective_round_lot)
|
|
.unwrap_or(100),
|
|
})
|
|
.collect::<Vec<_>>();
|
|
rows.sort_by_key(|row| row.ex_dividend_date);
|
|
rows
|
|
}
|
|
|
|
pub fn get_split(&self, symbol: &str, start: NaiveDate, end: NaiveDate) -> Vec<SplitRecord> {
|
|
let mut rows = self
|
|
.corporate_actions_by_date
|
|
.range(start..=end)
|
|
.flat_map(|(_, actions)| actions.iter())
|
|
.filter(|action| action.symbol == symbol && (action.split_ratio() - 1.0).abs() > 1e-12)
|
|
.map(|action| SplitRecord {
|
|
ex_dividend_date: action.date,
|
|
symbol: action.symbol.clone(),
|
|
split_ratio: action.split_ratio(),
|
|
})
|
|
.collect::<Vec<_>>();
|
|
rows.sort_by_key(|row| row.ex_dividend_date);
|
|
rows
|
|
}
|
|
|
|
pub fn get_factor(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
field: &str,
|
|
) -> Vec<FactorValue> {
|
|
if start > end {
|
|
return Vec::new();
|
|
}
|
|
let field = normalize_field(field);
|
|
let mut rows = self
|
|
.factor_by_date
|
|
.range(start..=end)
|
|
.flat_map(|(_, snapshots)| snapshots.iter())
|
|
.filter(|snapshot| snapshot.symbol == symbol)
|
|
.filter_map(|snapshot| {
|
|
factor_numeric_value(snapshot, &field).map(|value| FactorValue {
|
|
date: snapshot.date,
|
|
symbol: snapshot.symbol.clone(),
|
|
field: field.clone(),
|
|
value,
|
|
})
|
|
})
|
|
.collect::<Vec<_>>();
|
|
rows.sort_by_key(|row| row.date);
|
|
rows
|
|
}
|
|
|
|
pub fn get_factor_text(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
field: &str,
|
|
) -> Vec<FactorTextValue> {
|
|
if start > end {
|
|
return Vec::new();
|
|
}
|
|
let field = normalize_field(field);
|
|
let mut rows = self
|
|
.factor_text_by_date
|
|
.range(start..=end)
|
|
.flat_map(|(_, snapshots)| snapshots.iter())
|
|
.filter(|snapshot| {
|
|
snapshot.symbol == symbol && normalize_field(&snapshot.field) == field
|
|
})
|
|
.cloned()
|
|
.collect::<Vec<_>>();
|
|
rows.sort_by_key(|row| row.date);
|
|
rows
|
|
}
|
|
|
|
pub fn get_yield_curve(
|
|
&self,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
tenor: Option<&str>,
|
|
) -> Vec<YieldCurvePoint> {
|
|
if start > end {
|
|
return Vec::new();
|
|
}
|
|
let tenor_filter = tenor.map(normalize_field);
|
|
let mut rows = Vec::new();
|
|
for (date, snapshots) in self.factor_by_date.range(start..=end) {
|
|
for snapshot in snapshots {
|
|
for (field, value) in &snapshot.extra_factors {
|
|
let normalized = normalize_field(field);
|
|
let Some(raw_tenor) = normalized
|
|
.strip_prefix("yield_curve_")
|
|
.or_else(|| normalized.strip_prefix("yc_"))
|
|
else {
|
|
continue;
|
|
};
|
|
if tenor_filter
|
|
.as_ref()
|
|
.is_some_and(|expected| expected != raw_tenor)
|
|
{
|
|
continue;
|
|
}
|
|
rows.push(YieldCurvePoint {
|
|
date: *date,
|
|
tenor: raw_tenor.to_string(),
|
|
value: *value,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
rows.sort_by(|left, right| {
|
|
left.date
|
|
.cmp(&right.date)
|
|
.then(left.tenor.cmp(&right.tenor))
|
|
});
|
|
rows
|
|
}
|
|
|
|
pub fn get_margin_stocks(&self, date: NaiveDate, margin_type: &str) -> Vec<String> {
|
|
let field = match normalize_field(margin_type).as_str() {
|
|
"stock" => "margin_stock",
|
|
"cash" => "margin_cash",
|
|
_ => "margin_all",
|
|
};
|
|
let mut symbols = self
|
|
.factor_by_date
|
|
.get(&date)
|
|
.map(|rows| {
|
|
rows.iter()
|
|
.filter(|row| {
|
|
row.extra_factors
|
|
.get(field)
|
|
.or_else(|| row.extra_factors.get("margin_all"))
|
|
.is_some_and(|value| *value > 0.0)
|
|
})
|
|
.map(|row| row.symbol.clone())
|
|
.collect::<Vec<_>>()
|
|
})
|
|
.unwrap_or_default();
|
|
if symbols.is_empty() {
|
|
symbols = self
|
|
.active_instruments(
|
|
date,
|
|
&self
|
|
.instruments
|
|
.keys()
|
|
.map(String::as_str)
|
|
.collect::<Vec<_>>(),
|
|
)
|
|
.into_iter()
|
|
.filter(|instrument| !instrument.board.eq_ignore_ascii_case("FUTURE"))
|
|
.map(|instrument| instrument.symbol.clone())
|
|
.collect();
|
|
}
|
|
symbols.sort();
|
|
symbols.dedup();
|
|
symbols
|
|
}
|
|
|
|
pub fn get_securities_margin(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
field: &str,
|
|
) -> Vec<SecuritiesMarginRecord> {
|
|
self.get_factor(symbol, start, end, field)
|
|
.into_iter()
|
|
.map(|row| SecuritiesMarginRecord {
|
|
date: row.date,
|
|
symbol: row.symbol,
|
|
field: row.field,
|
|
value: row.value,
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
pub fn get_shares(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
share_type: &str,
|
|
) -> Vec<FactorValue> {
|
|
self.get_first_available_factor_series(
|
|
symbol,
|
|
start,
|
|
end,
|
|
&shares_factor_aliases(share_type),
|
|
&format!("shares_{}", normalize_field(share_type)),
|
|
)
|
|
}
|
|
|
|
pub fn get_turnover_rate(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
field: &str,
|
|
) -> Vec<FactorValue> {
|
|
self.get_first_available_factor_series(
|
|
symbol,
|
|
start,
|
|
end,
|
|
&turnover_rate_factor_aliases(field),
|
|
&format!("turnover_rate_{}", normalize_field(field)),
|
|
)
|
|
}
|
|
|
|
pub fn get_price_change_rate(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
) -> Vec<FactorValue> {
|
|
if start > end {
|
|
return Vec::new();
|
|
}
|
|
let mut rows = self
|
|
.market_by_date
|
|
.range(start..=end)
|
|
.flat_map(|(_, snapshots)| snapshots.iter())
|
|
.filter(|snapshot| snapshot.symbol == symbol)
|
|
.filter_map(|snapshot| {
|
|
if snapshot.prev_close.is_finite() && snapshot.prev_close > 0.0 {
|
|
Some(FactorValue {
|
|
date: snapshot.date,
|
|
symbol: snapshot.symbol.clone(),
|
|
field: "price_change_rate".to_string(),
|
|
value: snapshot.close / snapshot.prev_close - 1.0,
|
|
})
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.collect::<Vec<_>>();
|
|
if rows.is_empty() {
|
|
rows = self.get_first_available_factor_series(
|
|
symbol,
|
|
start,
|
|
end,
|
|
&[
|
|
"price_change_rate".to_string(),
|
|
"change_rate".to_string(),
|
|
"pct_change".to_string(),
|
|
],
|
|
"price_change_rate",
|
|
);
|
|
}
|
|
rows.sort_by_key(|row| row.date);
|
|
rows
|
|
}
|
|
|
|
pub fn get_stock_connect(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
field: &str,
|
|
) -> Vec<FactorValue> {
|
|
self.get_first_available_factor_series(
|
|
symbol,
|
|
start,
|
|
end,
|
|
&stock_connect_factor_aliases(field),
|
|
&format!("stock_connect_{}", normalize_field(field)),
|
|
)
|
|
}
|
|
|
|
pub fn current_performance(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
field: &str,
|
|
) -> Vec<FactorValue> {
|
|
self.get_first_available_factor_series(
|
|
symbol,
|
|
start,
|
|
end,
|
|
&prefixed_factor_aliases("current_performance", field),
|
|
field,
|
|
)
|
|
}
|
|
|
|
pub fn get_fundamentals(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
field: &str,
|
|
) -> Vec<FactorValue> {
|
|
self.get_first_available_factor_series(
|
|
symbol,
|
|
start,
|
|
end,
|
|
&prefixed_factor_aliases("fundamental", field),
|
|
field,
|
|
)
|
|
}
|
|
|
|
pub fn get_financials(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
field: &str,
|
|
) -> Vec<FactorValue> {
|
|
self.get_first_available_factor_series(
|
|
symbol,
|
|
start,
|
|
end,
|
|
&prefixed_factor_aliases("financial", field),
|
|
field,
|
|
)
|
|
}
|
|
|
|
pub fn get_pit_financials(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
field: &str,
|
|
) -> Vec<FactorValue> {
|
|
self.get_first_available_factor_series(
|
|
symbol,
|
|
start,
|
|
end,
|
|
&prefixed_factor_aliases("pit_financial", field),
|
|
field,
|
|
)
|
|
}
|
|
|
|
pub fn get_industry(
|
|
&self,
|
|
symbol: &str,
|
|
date: NaiveDate,
|
|
source: &str,
|
|
level: usize,
|
|
) -> Option<FactorValue> {
|
|
let fields = industry_factor_aliases(source, level);
|
|
for (factor_date, snapshots) in self.factor_by_date.range(..=date).rev() {
|
|
let Some(snapshot) = snapshots.iter().find(|row| row.symbol == symbol) else {
|
|
continue;
|
|
};
|
|
for field in &fields {
|
|
if let Some(value) = factor_numeric_value(snapshot, field) {
|
|
return Some(FactorValue {
|
|
date: *factor_date,
|
|
symbol: snapshot.symbol.clone(),
|
|
field: field.clone(),
|
|
value,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
pub fn get_industry_name(
|
|
&self,
|
|
symbol: &str,
|
|
date: NaiveDate,
|
|
source: &str,
|
|
level: usize,
|
|
) -> Option<FactorTextValue> {
|
|
let fields = industry_name_factor_aliases(source, level);
|
|
for (factor_date, snapshots) in self.factor_text_by_date.range(..=date).rev() {
|
|
for snapshot in snapshots {
|
|
if snapshot.symbol != symbol {
|
|
continue;
|
|
}
|
|
let normalized = normalize_field(&snapshot.field);
|
|
if fields.iter().any(|field| field == &normalized) {
|
|
return Some(FactorTextValue {
|
|
date: *factor_date,
|
|
symbol: snapshot.symbol.clone(),
|
|
field: snapshot.field.clone(),
|
|
value: snapshot.value.clone(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
pub fn get_dominant_future(&self, underlying_symbol: &str, date: NaiveDate) -> Option<String> {
|
|
let underlying = normalize_field(underlying_symbol);
|
|
let mut candidates = self
|
|
.futures_params_by_symbol
|
|
.keys()
|
|
.filter(|symbol| normalize_field(symbol).starts_with(&underlying))
|
|
.filter(|symbol| {
|
|
self.futures_trading_parameter(date, symbol.as_str())
|
|
.is_some()
|
|
})
|
|
.cloned()
|
|
.collect::<Vec<_>>();
|
|
if candidates.is_empty() {
|
|
candidates = self
|
|
.instruments
|
|
.values()
|
|
.filter(|instrument| instrument.board.eq_ignore_ascii_case("FUTURE"))
|
|
.filter(|instrument| normalize_field(&instrument.symbol).starts_with(&underlying))
|
|
.filter(|instrument| instrument.is_active_on(date))
|
|
.map(|instrument| instrument.symbol.clone())
|
|
.collect();
|
|
}
|
|
candidates.sort();
|
|
candidates.into_iter().next()
|
|
}
|
|
|
|
pub fn get_dominant_future_price(
|
|
&self,
|
|
underlying_symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
frequency: &str,
|
|
) -> Vec<PriceBar> {
|
|
let Some(symbol) = self.get_dominant_future(underlying_symbol, end) else {
|
|
return Vec::new();
|
|
};
|
|
self.get_price(&symbol, start, end, frequency)
|
|
}
|
|
|
|
pub fn get_price(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
frequency: &str,
|
|
) -> Vec<PriceBar> {
|
|
if start > end {
|
|
return Vec::new();
|
|
}
|
|
match normalize_history_frequency(frequency).as_deref() {
|
|
Some("1d") => self
|
|
.market_by_date
|
|
.range(start..=end)
|
|
.flat_map(|(_, rows)| rows.iter())
|
|
.filter(|row| row.symbol == symbol)
|
|
.map(daily_market_price_bar)
|
|
.collect(),
|
|
Some("1m") | Some("tick") => {
|
|
let mut bars = self
|
|
.execution_quotes_index
|
|
.iter()
|
|
.filter(|((date, quote_symbol), _)| {
|
|
quote_symbol == symbol && *date >= start && *date <= end
|
|
})
|
|
.flat_map(|(_, rows)| rows.iter())
|
|
.map(intraday_quote_price_bar)
|
|
.collect::<Vec<_>>();
|
|
bars.sort_by(|left, right| {
|
|
left.date
|
|
.cmp(&right.date)
|
|
.then_with(|| left.timestamp.cmp(&right.timestamp))
|
|
});
|
|
bars
|
|
}
|
|
_ => Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn price(&self, date: NaiveDate, symbol: &str, field: PriceField) -> Option<f64> {
|
|
let snapshot = self.market(date, symbol)?;
|
|
Some(snapshot.price(field))
|
|
}
|
|
|
|
pub fn price_on_or_before(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
field: PriceField,
|
|
) -> Option<f64> {
|
|
self.market_series_by_symbol
|
|
.get(symbol)
|
|
.and_then(|series| series.price_on_or_before(date, field))
|
|
}
|
|
|
|
pub fn factor_snapshots_on(&self, date: NaiveDate) -> Vec<&DailyFactorSnapshot> {
|
|
self.factor_by_date
|
|
.get(&date)
|
|
.map(|rows| rows.iter().collect())
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
pub fn factor_text_snapshots_on(&self, date: NaiveDate) -> Vec<&FactorTextValue> {
|
|
self.factor_text_by_date
|
|
.get(&date)
|
|
.map(|rows| rows.iter().collect())
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
pub fn market_snapshots_on(&self, date: NaiveDate) -> Vec<&DailyMarketSnapshot> {
|
|
self.market_by_date
|
|
.get(&date)
|
|
.map(|rows| rows.iter().collect())
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
pub fn candidate_snapshots_on(&self, date: NaiveDate) -> Vec<&CandidateEligibility> {
|
|
self.candidate_by_date
|
|
.get(&date)
|
|
.map(|rows| rows.iter().collect())
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
pub fn bundle_on(&self, date: NaiveDate) -> Result<DailySnapshotBundle, DataSetError> {
|
|
let benchmark = self
|
|
.benchmark(date)
|
|
.cloned()
|
|
.ok_or(DataSetError::MissingBenchmark { date })?;
|
|
Ok(DailySnapshotBundle {
|
|
date,
|
|
benchmark,
|
|
market: self.market_by_date.get(&date).cloned().unwrap_or_default(),
|
|
factors: self.factor_by_date.get(&date).cloned().unwrap_or_default(),
|
|
candidates: self
|
|
.candidate_by_date
|
|
.get(&date)
|
|
.cloned()
|
|
.unwrap_or_default(),
|
|
corporate_actions: self
|
|
.corporate_actions_by_date
|
|
.get(&date)
|
|
.cloned()
|
|
.unwrap_or_default(),
|
|
})
|
|
}
|
|
|
|
pub fn benchmark_closes_up_to(&self, date: NaiveDate, lookback: usize) -> Vec<f64> {
|
|
self.benchmark_series_cache.trailing_values(date, lookback)
|
|
}
|
|
|
|
pub fn market_closes_up_to(&self, date: NaiveDate, symbol: &str, lookback: usize) -> Vec<f64> {
|
|
self.market_series_by_symbol
|
|
.get(symbol)
|
|
.map(|series| series.trailing_values(date, lookback, PriceField::Close))
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
fn history_daily_values(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
bar_count: usize,
|
|
field: &str,
|
|
include_now: bool,
|
|
) -> Vec<f64> {
|
|
self.history_daily_snapshots(date, symbol, bar_count, include_now)
|
|
.into_iter()
|
|
.filter_map(|row| daily_market_numeric_value(&row, field))
|
|
.collect()
|
|
}
|
|
|
|
fn history_intraday_values(
|
|
&self,
|
|
date: NaiveDate,
|
|
active_datetime: Option<NaiveDateTime>,
|
|
symbol: &str,
|
|
bar_count: usize,
|
|
field: &str,
|
|
include_now: bool,
|
|
) -> Vec<f64> {
|
|
self.history_intraday_quotes_at(date, active_datetime, symbol, bar_count, include_now)
|
|
.into_iter()
|
|
.filter_map(|row| intraday_quote_numeric_value(&row, field))
|
|
.collect()
|
|
}
|
|
|
|
fn historical_daily_flags<F>(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
count: usize,
|
|
evaluator: F,
|
|
) -> Vec<bool>
|
|
where
|
|
F: Fn(Option<&CandidateEligibility>, Option<&DailyMarketSnapshot>) -> bool,
|
|
{
|
|
if count == 0 {
|
|
return Vec::new();
|
|
}
|
|
let days = self
|
|
.calendar
|
|
.iter()
|
|
.filter(|day| *day <= date)
|
|
.collect::<Vec<_>>();
|
|
let start = days.len().saturating_sub(count);
|
|
days[start..]
|
|
.iter()
|
|
.map(|day| {
|
|
evaluator(
|
|
self.candidate_index.get(&(*day, symbol.to_string())),
|
|
self.market_index.get(&(*day, symbol.to_string())),
|
|
)
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
pub fn market_decision_close(&self, date: NaiveDate, symbol: &str) -> Option<f64> {
|
|
self.market_series_by_symbol
|
|
.get(symbol)
|
|
.and_then(|series| series.decision_price_on_or_before(date))
|
|
}
|
|
|
|
pub fn market_decision_close_moving_average(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
lookback: usize,
|
|
) -> Option<f64> {
|
|
self.market_series_by_symbol
|
|
.get(symbol)
|
|
.and_then(|series| series.decision_close_moving_average(date, lookback))
|
|
}
|
|
|
|
pub fn market_decision_volume_moving_average(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
lookback: usize,
|
|
) -> Option<f64> {
|
|
self.market_series_by_symbol
|
|
.get(symbol)
|
|
.and_then(|series| series.decision_volume_moving_average(date, lookback))
|
|
}
|
|
|
|
pub fn factor_numeric_value(&self, date: NaiveDate, symbol: &str, field: &str) -> Option<f64> {
|
|
self.factor(date, symbol)
|
|
.and_then(|snapshot| factor_numeric_value(snapshot, field))
|
|
}
|
|
|
|
pub fn factor_text_value(&self, date: NaiveDate, symbol: &str, field: &str) -> Option<String> {
|
|
self.factor_text_index
|
|
.get(&(date, symbol.to_string(), normalize_field(field)))
|
|
.map(|row| row.value.clone())
|
|
}
|
|
|
|
fn get_first_available_factor_series(
|
|
&self,
|
|
symbol: &str,
|
|
start: NaiveDate,
|
|
end: NaiveDate,
|
|
fields: &[String],
|
|
output_field: &str,
|
|
) -> Vec<FactorValue> {
|
|
if start > end {
|
|
return Vec::new();
|
|
}
|
|
let output_field = normalize_field(output_field);
|
|
let mut rows = Vec::new();
|
|
for (_, snapshots) in self.factor_by_date.range(start..=end) {
|
|
let Some(snapshot) = snapshots.iter().find(|row| row.symbol == symbol) else {
|
|
continue;
|
|
};
|
|
for field in fields {
|
|
if let Some(value) = factor_numeric_value(snapshot, field) {
|
|
rows.push(FactorValue {
|
|
date: snapshot.date,
|
|
symbol: snapshot.symbol.clone(),
|
|
field: output_field.clone(),
|
|
value,
|
|
});
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
rows.sort_by_key(|row| row.date);
|
|
rows
|
|
}
|
|
|
|
pub fn factor_moving_average(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
field: &str,
|
|
lookback: usize,
|
|
) -> Option<f64> {
|
|
if lookback == 0 {
|
|
return None;
|
|
}
|
|
let dates = self.calendar.trailing_days(date, lookback);
|
|
if dates.is_empty() {
|
|
return None;
|
|
}
|
|
let mut sum = 0.0_f64;
|
|
let mut count = 0usize;
|
|
for trading_day in dates {
|
|
let snapshot = self.factor(trading_day, symbol)?;
|
|
let value = factor_numeric_value(snapshot, field)?;
|
|
sum += value;
|
|
count += 1;
|
|
}
|
|
if count == 0 {
|
|
None
|
|
} else {
|
|
Some(sum / count as f64)
|
|
}
|
|
}
|
|
|
|
pub fn market_decision_numeric_moving_average(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
field: &str,
|
|
lookback: usize,
|
|
) -> Option<f64> {
|
|
match field {
|
|
"close" | "prev_close" | "stock_close" | "price" => self
|
|
.market_series_by_symbol
|
|
.get(symbol)
|
|
.and_then(|series| series.decision_close_moving_average(date, lookback)),
|
|
"volume" | "stock_volume" => self
|
|
.market_series_by_symbol
|
|
.get(symbol)
|
|
.and_then(|series| series.decision_volume_moving_average(date, lookback)),
|
|
"day_open" | "dayopen" => {
|
|
self.market_moving_average(date, symbol, lookback, PriceField::DayOpen)
|
|
}
|
|
"open" => self.market_moving_average(date, symbol, lookback, PriceField::Open),
|
|
"last" | "last_price" => {
|
|
self.market_moving_average(date, symbol, lookback, PriceField::Last)
|
|
}
|
|
other => self.factor_moving_average(date, symbol, other, lookback),
|
|
}
|
|
}
|
|
|
|
pub fn market_moving_average(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
lookback: usize,
|
|
field: PriceField,
|
|
) -> Option<f64> {
|
|
self.market_series_by_symbol
|
|
.get(symbol)
|
|
.and_then(|series| series.moving_average(date, lookback, field))
|
|
}
|
|
|
|
pub fn benchmark_moving_average(&self, date: NaiveDate, lookback: usize) -> Option<f64> {
|
|
self.benchmark_series_cache.moving_average(date, lookback)
|
|
}
|
|
|
|
pub fn benchmark_open_moving_average(&self, date: NaiveDate, lookback: usize) -> Option<f64> {
|
|
self.benchmark_series_cache
|
|
.moving_average_for(date, lookback, PriceField::Open)
|
|
}
|
|
|
|
pub fn market_open_moving_average(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
lookback: usize,
|
|
) -> Option<f64> {
|
|
self.market_moving_average(date, symbol, lookback, PriceField::Open)
|
|
}
|
|
|
|
pub fn eligible_universe_on(&self, date: NaiveDate) -> &[EligibleUniverseSnapshot] {
|
|
self.eligible_universe_by_date
|
|
.get(&date)
|
|
.map(Vec::as_slice)
|
|
.unwrap_or(&[])
|
|
}
|
|
|
|
pub fn require_market(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
) -> Result<&DailyMarketSnapshot, DataSetError> {
|
|
self.market(date, symbol)
|
|
.ok_or_else(|| DataSetError::MissingSnapshot {
|
|
kind: "market",
|
|
date,
|
|
symbol: symbol.to_string(),
|
|
})
|
|
}
|
|
|
|
pub fn require_candidate(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
) -> Result<&CandidateEligibility, DataSetError> {
|
|
self.candidate(date, symbol)
|
|
.ok_or_else(|| DataSetError::MissingSnapshot {
|
|
kind: "candidate",
|
|
date,
|
|
symbol: symbol.to_string(),
|
|
})
|
|
}
|
|
|
|
pub fn require_factor(
|
|
&self,
|
|
date: NaiveDate,
|
|
symbol: &str,
|
|
) -> Result<&DailyFactorSnapshot, DataSetError> {
|
|
self.factor(date, symbol)
|
|
.ok_or_else(|| DataSetError::MissingSnapshot {
|
|
kind: "factor",
|
|
date,
|
|
symbol: symbol.to_string(),
|
|
})
|
|
}
|
|
}
|
|
|
|
fn read_instruments(path: &Path) -> Result<Vec<Instrument>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut instruments = Vec::new();
|
|
for row in rows {
|
|
instruments.push(Instrument {
|
|
symbol: row.get(0)?.to_string(),
|
|
name: row.get(1)?.to_string(),
|
|
board: row.get(2)?.to_string(),
|
|
round_lot: row.parse_optional_u32(3).unwrap_or(100),
|
|
listed_at: row.parse_optional_date(4)?,
|
|
delisted_at: row.parse_optional_date(5)?,
|
|
status: row
|
|
.fields
|
|
.get(6)
|
|
.map(|value| value.trim())
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or("active")
|
|
.to_string(),
|
|
});
|
|
}
|
|
Ok(instruments)
|
|
}
|
|
|
|
fn read_market(path: &Path) -> Result<Vec<DailyMarketSnapshot>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut snapshots = Vec::new();
|
|
for row in rows {
|
|
let open = row.parse_f64(2)?;
|
|
let close = row.parse_f64(5)?;
|
|
let prev_close = row.parse_f64(6)?;
|
|
let price_tick = row.parse_optional_f64(15).unwrap_or(0.01);
|
|
let derived_upper_limit = round_price_to_tick(prev_close * 1.10, price_tick);
|
|
let derived_lower_limit = round_price_to_tick(prev_close * 0.90, price_tick);
|
|
snapshots.push(DailyMarketSnapshot {
|
|
date: row.parse_date(0)?,
|
|
symbol: row.get(1)?.to_string(),
|
|
timestamp: row
|
|
.fields
|
|
.get(16)
|
|
.map(|value| value.trim().to_string())
|
|
.filter(|value| !value.is_empty()),
|
|
day_open: row.parse_optional_f64(11).unwrap_or(open),
|
|
open,
|
|
high: row.parse_f64(3)?,
|
|
low: row.parse_f64(4)?,
|
|
close,
|
|
last_price: row.parse_optional_f64(12).unwrap_or(close),
|
|
bid1: row.parse_optional_f64(13).unwrap_or(close),
|
|
ask1: row.parse_optional_f64(14).unwrap_or(close),
|
|
prev_close,
|
|
volume: row.parse_u64(7)?,
|
|
tick_volume: row.parse_optional_u64(17).unwrap_or_default(),
|
|
bid1_volume: row.parse_optional_u64(18).unwrap_or_default(),
|
|
ask1_volume: row.parse_optional_u64(19).unwrap_or_default(),
|
|
trading_phase: row
|
|
.fields
|
|
.get(20)
|
|
.map(|value| value.trim().to_string())
|
|
.filter(|value| !value.is_empty()),
|
|
paused: row.parse_bool(8)?,
|
|
upper_limit: row.parse_optional_f64(9).unwrap_or(derived_upper_limit),
|
|
lower_limit: row.parse_optional_f64(10).unwrap_or(derived_lower_limit),
|
|
price_tick,
|
|
});
|
|
}
|
|
Ok(snapshots)
|
|
}
|
|
|
|
fn read_factors(path: &Path) -> Result<Vec<DailyFactorSnapshot>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut snapshots = Vec::new();
|
|
for row in rows {
|
|
let (extra_factors, _) = parse_extra_factor_maps(&row);
|
|
snapshots.push(DailyFactorSnapshot {
|
|
date: row.parse_date(0)?,
|
|
symbol: row.get(1)?.to_string(),
|
|
market_cap_bn: row.parse_f64(2)?,
|
|
free_float_cap_bn: row.parse_f64(3)?,
|
|
pe_ttm: row.parse_f64(4)?,
|
|
turnover_ratio: row.parse_optional_f64(5),
|
|
effective_turnover_ratio: row.parse_optional_f64(6),
|
|
extra_factors,
|
|
});
|
|
}
|
|
Ok(snapshots)
|
|
}
|
|
|
|
fn read_factor_texts(path: &Path) -> Result<Vec<FactorTextValue>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut text_values = Vec::new();
|
|
for row in rows {
|
|
let date = row.parse_date(0)?;
|
|
let symbol = row.get(1)?.to_string();
|
|
let (_, extra_text_factors) = parse_extra_factor_maps(&row);
|
|
for (field, value) in extra_text_factors {
|
|
text_values.push(FactorTextValue {
|
|
date,
|
|
symbol: symbol.clone(),
|
|
field,
|
|
value,
|
|
});
|
|
}
|
|
}
|
|
Ok(text_values)
|
|
}
|
|
|
|
fn parse_extra_factor_maps(row: &CsvRow) -> (BTreeMap<String, f64>, BTreeMap<String, String>) {
|
|
let mut numeric = BTreeMap::new();
|
|
let mut text = BTreeMap::new();
|
|
for value in row.fields.get(7).into_iter().chain(row.fields.get(8)) {
|
|
merge_extra_factor_json(value, &mut numeric, &mut text);
|
|
}
|
|
(numeric, text)
|
|
}
|
|
|
|
fn merge_extra_factor_json(
|
|
raw: &str,
|
|
numeric: &mut BTreeMap<String, f64>,
|
|
text: &mut BTreeMap<String, String>,
|
|
) {
|
|
let trimmed = raw.trim();
|
|
if trimmed.is_empty() {
|
|
return;
|
|
}
|
|
let Ok(serde_json::Value::Object(map)) = serde_json::from_str::<serde_json::Value>(trimmed)
|
|
else {
|
|
return;
|
|
};
|
|
for (key, value) in map {
|
|
let key = normalize_field(&key);
|
|
if key.is_empty() {
|
|
continue;
|
|
}
|
|
match value {
|
|
serde_json::Value::Number(number) => {
|
|
if let Some(value) = number.as_f64().filter(|value| value.is_finite()) {
|
|
numeric.insert(key, value);
|
|
}
|
|
}
|
|
serde_json::Value::String(value) => {
|
|
text.insert(key, value);
|
|
}
|
|
serde_json::Value::Bool(value) => {
|
|
numeric.insert(key.clone(), if value { 1.0 } else { 0.0 });
|
|
text.insert(key, value.to_string());
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn normalized_aliases(values: &[String]) -> Vec<String> {
|
|
let mut aliases = Vec::new();
|
|
for value in values {
|
|
let normalized = normalize_field(value);
|
|
if !aliases.contains(&normalized) {
|
|
aliases.push(normalized);
|
|
}
|
|
}
|
|
aliases
|
|
}
|
|
|
|
fn shares_factor_aliases(share_type: &str) -> Vec<String> {
|
|
let field = normalize_field(share_type);
|
|
let values = match field.as_str() {
|
|
"" | "all" | "total" => vec![
|
|
"total_shares",
|
|
"shares_total",
|
|
"total_share",
|
|
"total_share_capital",
|
|
"capitalization",
|
|
"shares",
|
|
],
|
|
"float" | "free_float" | "circulating" | "circulation" => vec![
|
|
"free_float_shares",
|
|
"float_shares",
|
|
"circulating_shares",
|
|
"circulation_shares",
|
|
"float_a_shares",
|
|
],
|
|
"a" | "a_share" | "a_shares" => vec!["a_shares", "shares_a", "a_share_capital"],
|
|
other => {
|
|
return normalized_aliases(&[
|
|
other.to_string(),
|
|
format!("shares_{other}"),
|
|
format!("{other}_shares"),
|
|
]);
|
|
}
|
|
};
|
|
normalized_aliases(
|
|
&values
|
|
.iter()
|
|
.map(|value| value.to_string())
|
|
.collect::<Vec<_>>(),
|
|
)
|
|
}
|
|
|
|
fn turnover_rate_factor_aliases(field: &str) -> Vec<String> {
|
|
let field = normalize_field(field);
|
|
let values = match field.as_str() {
|
|
"" | "all" | "rate" | "turnover" | "turnover_rate" | "turnover_ratio" => {
|
|
vec!["turnover_rate", "turnover_ratio"]
|
|
}
|
|
"effective" | "effective_turnover" | "effective_turnover_rate" => {
|
|
vec!["effective_turnover_rate", "effective_turnover_ratio"]
|
|
}
|
|
other => {
|
|
return normalized_aliases(&[
|
|
other.to_string(),
|
|
format!("turnover_rate_{other}"),
|
|
format!("{other}_turnover_rate"),
|
|
format!("turnover_ratio_{other}"),
|
|
format!("{other}_turnover_ratio"),
|
|
]);
|
|
}
|
|
};
|
|
normalized_aliases(
|
|
&values
|
|
.iter()
|
|
.map(|value| value.to_string())
|
|
.collect::<Vec<_>>(),
|
|
)
|
|
}
|
|
|
|
fn stock_connect_factor_aliases(field: &str) -> Vec<String> {
|
|
let field = normalize_field(field);
|
|
let values = match field.as_str() {
|
|
"" | "all" | "connect" | "stock_connect" => {
|
|
vec![
|
|
"stock_connect",
|
|
"stock_connect_all",
|
|
"connect_all",
|
|
"north_bound",
|
|
]
|
|
}
|
|
"north" | "north_bound" | "northbound" => vec![
|
|
"stock_connect_north_bound",
|
|
"stock_connect_northbound",
|
|
"connect_north_bound",
|
|
"north_bound",
|
|
"northbound",
|
|
],
|
|
"south" | "south_bound" | "southbound" => vec![
|
|
"stock_connect_south_bound",
|
|
"stock_connect_southbound",
|
|
"connect_south_bound",
|
|
"south_bound",
|
|
"southbound",
|
|
],
|
|
other => {
|
|
return normalized_aliases(&[
|
|
other.to_string(),
|
|
format!("stock_connect_{other}"),
|
|
format!("connect_{other}"),
|
|
]);
|
|
}
|
|
};
|
|
normalized_aliases(
|
|
&values
|
|
.iter()
|
|
.map(|value| value.to_string())
|
|
.collect::<Vec<_>>(),
|
|
)
|
|
}
|
|
|
|
fn prefixed_factor_aliases(prefix: &str, field: &str) -> Vec<String> {
|
|
let prefix = normalize_field(prefix);
|
|
let field = normalize_field(field);
|
|
let plural_prefix = format!("{prefix}s");
|
|
normalized_aliases(&[
|
|
format!("{prefix}_{field}"),
|
|
format!("{plural_prefix}_{field}"),
|
|
field.clone(),
|
|
])
|
|
}
|
|
|
|
fn industry_factor_aliases(source: &str, level: usize) -> Vec<String> {
|
|
let source = normalize_field(source);
|
|
normalized_aliases(&[
|
|
format!("industry_{source}_l{level}"),
|
|
format!("industry_{source}_{level}"),
|
|
format!("{source}_industry_l{level}"),
|
|
format!("{source}_industry_{level}"),
|
|
format!("industry_l{level}"),
|
|
format!("industry_{level}"),
|
|
"industry_code".to_string(),
|
|
])
|
|
}
|
|
|
|
fn industry_name_factor_aliases(source: &str, level: usize) -> Vec<String> {
|
|
let source = normalize_field(source);
|
|
normalized_aliases(&[
|
|
format!("industry_{source}_l{level}_name"),
|
|
format!("industry_{source}_{level}_name"),
|
|
format!("industry_{source}_name_l{level}"),
|
|
format!("{source}_industry_l{level}_name"),
|
|
format!("{source}_industry_{level}_name"),
|
|
format!("{source}_industry_name_l{level}"),
|
|
format!("industry_l{level}_name"),
|
|
format!("industry_{level}_name"),
|
|
"industry_name".to_string(),
|
|
])
|
|
}
|
|
|
|
fn factor_numeric_value(snapshot: &DailyFactorSnapshot, field: &str) -> Option<f64> {
|
|
match field {
|
|
"market_cap" | "market_cap_bn" => Some(snapshot.market_cap_bn),
|
|
"free_float_cap" | "free_float_market_cap" | "free_float_cap_bn" => {
|
|
Some(snapshot.free_float_cap_bn)
|
|
}
|
|
"pe_ttm" => Some(snapshot.pe_ttm),
|
|
"turnover_ratio" => snapshot.turnover_ratio,
|
|
"effective_turnover_ratio" => snapshot.effective_turnover_ratio,
|
|
other => snapshot.extra_factors.get(other).copied(),
|
|
}
|
|
}
|
|
|
|
fn daily_market_numeric_value(snapshot: &DailyMarketSnapshot, field: &str) -> Option<f64> {
|
|
match normalize_field(field).as_str() {
|
|
"day_open" | "dayopen" => Some(snapshot.day_open),
|
|
"open" => Some(snapshot.open),
|
|
"high" => Some(snapshot.high),
|
|
"low" => Some(snapshot.low),
|
|
"close" | "price" => Some(snapshot.close),
|
|
"last" | "last_price" => Some(snapshot.last_price),
|
|
"prev_close" | "pre_close" => Some(snapshot.prev_close),
|
|
"volume" => Some(snapshot.volume as f64),
|
|
"tick_volume" => Some(snapshot.tick_volume as f64),
|
|
"bid1" => Some(snapshot.bid1),
|
|
"ask1" => Some(snapshot.ask1),
|
|
"bid1_volume" => Some(snapshot.bid1_volume as f64),
|
|
"ask1_volume" => Some(snapshot.ask1_volume as f64),
|
|
"upper_limit" => Some(snapshot.upper_limit),
|
|
"lower_limit" => Some(snapshot.lower_limit),
|
|
"price_tick" => Some(snapshot.price_tick),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn intraday_quote_numeric_value(snapshot: &IntradayExecutionQuote, field: &str) -> Option<f64> {
|
|
match normalize_field(field).as_str() {
|
|
"last" | "last_price" | "close" | "price" => Some(snapshot.last_price),
|
|
"bid1" => Some(snapshot.bid1),
|
|
"ask1" => Some(snapshot.ask1),
|
|
"bid1_volume" => Some(snapshot.bid1_volume as f64),
|
|
"ask1_volume" => Some(snapshot.ask1_volume as f64),
|
|
"volume" | "volume_delta" => Some(snapshot.volume_delta as f64),
|
|
"amount" | "amount_delta" | "total_turnover" => Some(snapshot.amount_delta),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn intraday_quote_visible(
|
|
quote: &IntradayExecutionQuote,
|
|
date: NaiveDate,
|
|
active_datetime: Option<NaiveDateTime>,
|
|
include_now: bool,
|
|
) -> bool {
|
|
if quote.date < date {
|
|
return true;
|
|
}
|
|
if quote.date > date {
|
|
return false;
|
|
}
|
|
let Some(active_datetime) = active_datetime.filter(|value| value.date() == date) else {
|
|
return include_now;
|
|
};
|
|
if include_now {
|
|
quote.timestamp <= active_datetime
|
|
} else {
|
|
quote.timestamp < active_datetime
|
|
}
|
|
}
|
|
|
|
fn daily_market_price_bar(snapshot: &DailyMarketSnapshot) -> PriceBar {
|
|
PriceBar {
|
|
date: snapshot.date,
|
|
timestamp: snapshot.timestamp.clone(),
|
|
symbol: snapshot.symbol.clone(),
|
|
frequency: "1d".to_string(),
|
|
open: snapshot.open,
|
|
high: snapshot.high,
|
|
low: snapshot.low,
|
|
close: snapshot.close,
|
|
last_price: snapshot.last_price,
|
|
volume: snapshot.volume,
|
|
amount: 0.0,
|
|
bid1: snapshot.bid1,
|
|
ask1: snapshot.ask1,
|
|
bid1_volume: snapshot.bid1_volume,
|
|
ask1_volume: snapshot.ask1_volume,
|
|
}
|
|
}
|
|
|
|
fn intraday_quote_price_bar(snapshot: &IntradayExecutionQuote) -> PriceBar {
|
|
PriceBar {
|
|
date: snapshot.date,
|
|
timestamp: Some(snapshot.timestamp.format("%Y-%m-%d %H:%M:%S").to_string()),
|
|
symbol: snapshot.symbol.clone(),
|
|
frequency: "tick".to_string(),
|
|
open: snapshot.last_price,
|
|
high: snapshot.last_price,
|
|
low: snapshot.last_price,
|
|
close: snapshot.last_price,
|
|
last_price: snapshot.last_price,
|
|
volume: snapshot.volume_delta,
|
|
amount: snapshot.amount_delta,
|
|
bid1: snapshot.bid1,
|
|
ask1: snapshot.ask1,
|
|
bid1_volume: snapshot.bid1_volume,
|
|
ask1_volume: snapshot.ask1_volume,
|
|
}
|
|
}
|
|
|
|
fn normalize_field(field: &str) -> String {
|
|
field
|
|
.trim()
|
|
.trim_matches('"')
|
|
.trim_matches('\'')
|
|
.to_ascii_lowercase()
|
|
}
|
|
|
|
fn normalize_history_frequency(frequency: &str) -> Option<String> {
|
|
let normalized = normalize_field(frequency);
|
|
match normalized.as_str() {
|
|
"1d" | "d" | "day" | "daily" => Some("1d".to_string()),
|
|
"1m" | "m" | "minute" | "min" => Some("1m".to_string()),
|
|
"tick" | "t" => Some("tick".to_string()),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn take_last<T>(mut rows: Vec<T>, count: usize) -> Vec<T> {
|
|
if rows.len() <= count {
|
|
return rows;
|
|
}
|
|
rows.split_off(rows.len() - count)
|
|
}
|
|
|
|
fn read_candidates(path: &Path) -> Result<Vec<CandidateEligibility>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut snapshots = Vec::new();
|
|
for row in rows {
|
|
snapshots.push(CandidateEligibility {
|
|
date: row.parse_date(0)?,
|
|
symbol: row.get(1)?.to_string(),
|
|
is_st: row.parse_bool(2)?,
|
|
is_new_listing: row.parse_bool(3)?,
|
|
is_paused: row.parse_bool(4)?,
|
|
allow_buy: row.parse_bool(5)?,
|
|
allow_sell: row.parse_bool(6)?,
|
|
is_kcb: row.parse_optional_bool(7).unwrap_or(false),
|
|
is_one_yuan: row.parse_optional_bool(8).unwrap_or(false),
|
|
});
|
|
}
|
|
Ok(snapshots)
|
|
}
|
|
|
|
fn read_benchmarks(path: &Path) -> Result<Vec<BenchmarkSnapshot>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut snapshots = Vec::new();
|
|
for row in rows {
|
|
snapshots.push(BenchmarkSnapshot {
|
|
date: row.parse_date(0)?,
|
|
benchmark: row.get(1)?.to_string(),
|
|
open: row.parse_f64(2)?,
|
|
close: row.parse_f64(3)?,
|
|
prev_close: row.parse_f64(4)?,
|
|
volume: row.parse_u64(5)?,
|
|
});
|
|
}
|
|
Ok(snapshots)
|
|
}
|
|
|
|
fn read_corporate_actions(path: &Path) -> Result<Vec<CorporateAction>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut snapshots = Vec::new();
|
|
for row in rows {
|
|
let has_payable_date = row.fields.len() >= 10;
|
|
let payable_date = if has_payable_date {
|
|
row.parse_optional_date(2)?
|
|
} else {
|
|
None
|
|
};
|
|
let offset = if has_payable_date { 1 } else { 0 };
|
|
snapshots.push(CorporateAction {
|
|
date: row.parse_date(0)?,
|
|
symbol: row.get(1)?.to_string(),
|
|
payable_date,
|
|
share_cash: row.parse_optional_f64(2 + offset).unwrap_or(0.0),
|
|
share_bonus: row.parse_optional_f64(3 + offset).unwrap_or(0.0),
|
|
share_gift: row.parse_optional_f64(4 + offset).unwrap_or(0.0),
|
|
issue_quantity: row.parse_optional_f64(5 + offset).unwrap_or(0.0),
|
|
issue_price: row.parse_optional_f64(6 + offset).unwrap_or(0.0),
|
|
reform: row.parse_optional_bool(7 + offset).unwrap_or(false),
|
|
adjust_factor: row.parse_optional_f64(8 + offset),
|
|
successor_symbol: row
|
|
.fields
|
|
.get(9 + offset)
|
|
.map(|value| value.trim().to_string())
|
|
.filter(|value| !value.is_empty()),
|
|
successor_ratio: row.parse_optional_f64(10 + offset),
|
|
successor_cash: row.parse_optional_f64(11 + offset),
|
|
});
|
|
}
|
|
Ok(snapshots)
|
|
}
|
|
|
|
fn read_execution_quotes(path: &Path) -> Result<Vec<IntradayExecutionQuote>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut quotes = Vec::new();
|
|
for row in rows {
|
|
quotes.push(IntradayExecutionQuote {
|
|
date: row.parse_date(0)?,
|
|
symbol: row.get(1)?.to_string(),
|
|
timestamp: row.parse_datetime(2)?,
|
|
last_price: row.parse_optional_f64(3).unwrap_or_default(),
|
|
bid1: row.parse_optional_f64(4).unwrap_or_default(),
|
|
ask1: row.parse_optional_f64(5).unwrap_or_default(),
|
|
bid1_volume: row.parse_optional_u64(6).unwrap_or_default(),
|
|
ask1_volume: row.parse_optional_u64(7).unwrap_or_default(),
|
|
volume_delta: row.parse_optional_u64(8).unwrap_or_default(),
|
|
amount_delta: row.parse_optional_f64(9).unwrap_or_default(),
|
|
trading_phase: row
|
|
.fields
|
|
.get(10)
|
|
.map(|value| value.trim().to_string())
|
|
.filter(|value| !value.is_empty()),
|
|
});
|
|
}
|
|
Ok(quotes)
|
|
}
|
|
|
|
fn read_order_book_depth(path: &Path) -> Result<Vec<IntradayOrderBookDepthLevel>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut levels = Vec::new();
|
|
for row in rows {
|
|
levels.push(IntradayOrderBookDepthLevel {
|
|
date: row.parse_date(0)?,
|
|
symbol: row.get(1)?.to_string(),
|
|
timestamp: row.parse_datetime(2)?,
|
|
level: row
|
|
.parse_optional_u32(3)
|
|
.unwrap_or(1)
|
|
.clamp(1, u8::MAX as u32) as u8,
|
|
bid_price: row.parse_optional_f64(4).unwrap_or_default(),
|
|
bid_volume: row.parse_optional_u64(5).unwrap_or_default(),
|
|
ask_price: row.parse_optional_f64(6).unwrap_or_default(),
|
|
ask_volume: row.parse_optional_u64(7).unwrap_or_default(),
|
|
});
|
|
}
|
|
Ok(levels)
|
|
}
|
|
|
|
fn read_futures_trading_parameters(
|
|
path: &Path,
|
|
) -> Result<Vec<FuturesTradingParameter>, DataSetError> {
|
|
let rows = read_rows(path)?;
|
|
let mut params = Vec::new();
|
|
for row in rows {
|
|
let first = row.get(0)?.trim();
|
|
let (effective_date, symbol_index) = if NaiveDate::parse_from_str(first, "%Y-%m-%d").is_ok()
|
|
{
|
|
(row.parse_optional_date(0)?, 1)
|
|
} else {
|
|
(None, 0)
|
|
};
|
|
params.push(FuturesTradingParameter {
|
|
effective_date,
|
|
symbol: row.get(symbol_index)?.to_string(),
|
|
contract_multiplier: row.parse_optional_f64(symbol_index + 1).unwrap_or(1.0),
|
|
long_margin_rate: row.parse_optional_f64(symbol_index + 2).unwrap_or(0.0),
|
|
short_margin_rate: row.parse_optional_f64(symbol_index + 3).unwrap_or(0.0),
|
|
commission_type: row
|
|
.fields
|
|
.get(symbol_index + 4)
|
|
.map(|value| FuturesCommissionType::parse(value))
|
|
.unwrap_or(FuturesCommissionType::ByMoney),
|
|
open_commission_ratio: row.parse_optional_f64(symbol_index + 5).unwrap_or(0.0),
|
|
close_commission_ratio: row.parse_optional_f64(symbol_index + 6).unwrap_or(0.0),
|
|
close_today_commission_ratio: row
|
|
.parse_optional_f64(symbol_index + 7)
|
|
.unwrap_or_else(|| row.parse_optional_f64(symbol_index + 6).unwrap_or(0.0)),
|
|
price_tick: row.parse_optional_f64(symbol_index + 8).unwrap_or(1.0),
|
|
});
|
|
}
|
|
Ok(params)
|
|
}
|
|
|
|
struct CsvRow {
|
|
path: String,
|
|
line: usize,
|
|
fields: Vec<String>,
|
|
}
|
|
|
|
impl CsvRow {
|
|
fn get(&self, index: usize) -> Result<&str, DataSetError> {
|
|
self.fields
|
|
.get(index)
|
|
.map(String::as_str)
|
|
.ok_or_else(|| DataSetError::InvalidRow {
|
|
path: self.path.clone(),
|
|
line: self.line,
|
|
message: format!("missing column {index}"),
|
|
})
|
|
}
|
|
|
|
fn parse_date(&self, index: usize) -> Result<NaiveDate, DataSetError> {
|
|
NaiveDate::parse_from_str(self.get(index)?, "%Y-%m-%d").map_err(|err| {
|
|
DataSetError::InvalidRow {
|
|
path: self.path.clone(),
|
|
line: self.line,
|
|
message: format!("invalid date: {err}"),
|
|
}
|
|
})
|
|
}
|
|
|
|
fn parse_f64(&self, index: usize) -> Result<f64, DataSetError> {
|
|
self.get(index)?
|
|
.parse::<f64>()
|
|
.map_err(|err| DataSetError::InvalidRow {
|
|
path: self.path.clone(),
|
|
line: self.line,
|
|
message: format!("invalid f64: {err}"),
|
|
})
|
|
}
|
|
|
|
fn parse_u64(&self, index: usize) -> Result<u64, DataSetError> {
|
|
self.get(index)?
|
|
.parse::<u64>()
|
|
.map_err(|err| DataSetError::InvalidRow {
|
|
path: self.path.clone(),
|
|
line: self.line,
|
|
message: format!("invalid u64: {err}"),
|
|
})
|
|
}
|
|
|
|
fn parse_optional_f64(&self, index: usize) -> Option<f64> {
|
|
self.fields.get(index).and_then(|value| {
|
|
let trimmed = value.trim();
|
|
if trimmed.is_empty() {
|
|
None
|
|
} else {
|
|
trimmed.parse::<f64>().ok()
|
|
}
|
|
})
|
|
}
|
|
|
|
fn parse_bool(&self, index: usize) -> Result<bool, DataSetError> {
|
|
self.get(index)?
|
|
.parse::<bool>()
|
|
.map_err(|err| DataSetError::InvalidRow {
|
|
path: self.path.clone(),
|
|
line: self.line,
|
|
message: format!("invalid bool: {err}"),
|
|
})
|
|
}
|
|
|
|
fn parse_optional_bool(&self, index: usize) -> Option<bool> {
|
|
self.fields
|
|
.get(index)
|
|
.and_then(|value| value.parse::<bool>().ok())
|
|
}
|
|
|
|
fn parse_optional_date(&self, index: usize) -> Result<Option<NaiveDate>, DataSetError> {
|
|
let Some(value) = self.fields.get(index) else {
|
|
return Ok(None);
|
|
};
|
|
let trimmed = value.trim();
|
|
if trimmed.is_empty() {
|
|
return Ok(None);
|
|
}
|
|
NaiveDate::parse_from_str(trimmed, "%Y-%m-%d")
|
|
.map(Some)
|
|
.map_err(|err| DataSetError::InvalidRow {
|
|
path: self.path.clone(),
|
|
line: self.line,
|
|
message: format!("invalid optional date: {err}"),
|
|
})
|
|
}
|
|
|
|
fn parse_datetime(&self, index: usize) -> Result<NaiveDateTime, DataSetError> {
|
|
NaiveDateTime::parse_from_str(self.get(index)?, "%Y-%m-%d %H:%M:%S").map_err(|err| {
|
|
DataSetError::InvalidRow {
|
|
path: self.path.clone(),
|
|
line: self.line,
|
|
message: format!("invalid datetime: {err}"),
|
|
}
|
|
})
|
|
}
|
|
|
|
fn parse_optional_u32(&self, index: usize) -> Option<u32> {
|
|
self.fields.get(index).and_then(|value| {
|
|
let trimmed = value.trim();
|
|
if trimmed.is_empty() {
|
|
None
|
|
} else {
|
|
trimmed.parse::<u32>().ok()
|
|
}
|
|
})
|
|
}
|
|
|
|
fn parse_optional_u64(&self, index: usize) -> Option<u64> {
|
|
self.fields.get(index).and_then(|value| {
|
|
let trimmed = value.trim();
|
|
if trimmed.is_empty() {
|
|
None
|
|
} else {
|
|
trimmed.parse::<u64>().ok()
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
fn read_partitioned_dir<T, F>(dir: &Path, mut loader: F) -> Result<Vec<T>, DataSetError>
|
|
where
|
|
F: FnMut(&Path) -> Result<Vec<T>, DataSetError>,
|
|
{
|
|
let mut rows = Vec::new();
|
|
let mut stack = vec![dir.to_path_buf()];
|
|
|
|
while let Some(current_dir) = stack.pop() {
|
|
let mut entries = fs::read_dir(¤t_dir)
|
|
.map_err(|source| DataSetError::Io {
|
|
path: current_dir.display().to_string(),
|
|
source,
|
|
})?
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.map_err(|source| DataSetError::Io {
|
|
path: current_dir.display().to_string(),
|
|
source,
|
|
})?;
|
|
entries.sort_by_key(|entry| entry.path());
|
|
|
|
for entry in entries.into_iter().rev() {
|
|
let path = entry.path();
|
|
if path.is_dir() {
|
|
stack.push(path);
|
|
continue;
|
|
}
|
|
if path.extension().and_then(|x| x.to_str()) != Some("csv") {
|
|
continue;
|
|
}
|
|
rows.extend(loader(&path)?);
|
|
}
|
|
}
|
|
|
|
Ok(rows)
|
|
}
|
|
|
|
fn read_rows(path: &Path) -> Result<Vec<CsvRow>, DataSetError> {
|
|
let content = fs::read_to_string(path).map_err(|source| DataSetError::Io {
|
|
path: path.display().to_string(),
|
|
source,
|
|
})?;
|
|
|
|
let mut rows = Vec::new();
|
|
for (line_idx, line) in content.lines().enumerate() {
|
|
let line_no = line_idx + 1;
|
|
if line_no == 1 || line.trim().is_empty() {
|
|
continue;
|
|
}
|
|
|
|
rows.push(CsvRow {
|
|
path: path.display().to_string(),
|
|
line: line_no,
|
|
fields: split_csv_line(line),
|
|
});
|
|
}
|
|
|
|
Ok(rows)
|
|
}
|
|
|
|
fn split_csv_line(line: &str) -> Vec<String> {
|
|
let mut fields = Vec::new();
|
|
let mut field = String::new();
|
|
let mut chars = line.trim_start_matches('\u{feff}').chars().peekable();
|
|
let mut in_quotes = false;
|
|
|
|
while let Some(ch) = chars.next() {
|
|
match ch {
|
|
'"' if in_quotes && chars.peek() == Some(&'"') => {
|
|
field.push('"');
|
|
chars.next();
|
|
}
|
|
'"' => {
|
|
in_quotes = !in_quotes;
|
|
}
|
|
',' if !in_quotes => {
|
|
fields.push(field.trim().to_string());
|
|
field.clear();
|
|
}
|
|
_ => field.push(ch),
|
|
}
|
|
}
|
|
fields.push(field.trim().to_string());
|
|
fields
|
|
}
|
|
|
|
fn group_by_date<T, F>(rows: Vec<T>, mut date_of: F) -> BTreeMap<NaiveDate, Vec<T>>
|
|
where
|
|
F: FnMut(&T) -> NaiveDate,
|
|
{
|
|
let mut grouped = BTreeMap::<NaiveDate, Vec<T>>::new();
|
|
for row in rows {
|
|
grouped.entry(date_of(&row)).or_default().push(row);
|
|
}
|
|
grouped
|
|
}
|
|
|
|
fn collect_benchmark_code(benchmarks: &[BenchmarkSnapshot]) -> Result<String, DataSetError> {
|
|
let mut codes = benchmarks
|
|
.iter()
|
|
.map(|row| row.benchmark.clone())
|
|
.collect::<Vec<_>>();
|
|
codes.sort_unstable();
|
|
codes.dedup();
|
|
|
|
if codes.len() == 1 {
|
|
Ok(codes.remove(0))
|
|
} else {
|
|
Err(DataSetError::MultipleBenchmarks)
|
|
}
|
|
}
|
|
|
|
fn round_price_to_tick(value: f64, tick: f64) -> f64 {
|
|
let effective_tick = if tick.is_finite() && tick > 0.0 {
|
|
tick
|
|
} else {
|
|
0.01
|
|
};
|
|
((value / effective_tick).round() * effective_tick * 10000.0).round() / 10000.0
|
|
}
|
|
|
|
fn prefix_sums(values: &[f64]) -> Vec<f64> {
|
|
let mut prefix = Vec::with_capacity(values.len() + 1);
|
|
prefix.push(0.0);
|
|
for value in values {
|
|
let next = prefix.last().copied().unwrap_or_default() + *value;
|
|
prefix.push(next);
|
|
}
|
|
prefix
|
|
}
|
|
|
|
mod optional_date_format {
|
|
use chrono::NaiveDate;
|
|
use serde::{self, Deserialize, Deserializer, Serializer};
|
|
|
|
const FORMAT: &str = "%Y-%m-%d";
|
|
|
|
pub fn serialize<S>(date: &Option<NaiveDate>, serializer: S) -> Result<S::Ok, S::Error>
|
|
where
|
|
S: Serializer,
|
|
{
|
|
match date {
|
|
Some(date) => serializer.serialize_some(&date.format(FORMAT).to_string()),
|
|
None => serializer.serialize_none(),
|
|
}
|
|
}
|
|
|
|
pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<NaiveDate>, D::Error>
|
|
where
|
|
D: Deserializer<'de>,
|
|
{
|
|
let text = Option::<String>::deserialize(deserializer)?;
|
|
match text
|
|
.as_deref()
|
|
.map(str::trim)
|
|
.filter(|value| !value.is_empty())
|
|
{
|
|
Some(text) => NaiveDate::parse_from_str(text, FORMAT)
|
|
.map(Some)
|
|
.map_err(serde::de::Error::custom),
|
|
None => Ok(None),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn build_market_series(
|
|
market_by_date: &BTreeMap<NaiveDate, Vec<DailyMarketSnapshot>>,
|
|
) -> HashMap<String, SymbolPriceSeries> {
|
|
let mut grouped = HashMap::<String, Vec<DailyMarketSnapshot>>::new();
|
|
for rows in market_by_date.values() {
|
|
for row in rows {
|
|
grouped
|
|
.entry(row.symbol.clone())
|
|
.or_default()
|
|
.push(row.clone());
|
|
}
|
|
}
|
|
|
|
grouped
|
|
.into_iter()
|
|
.map(|(symbol, rows)| (symbol, SymbolPriceSeries::new(&rows)))
|
|
.collect()
|
|
}
|
|
|
|
fn build_futures_params_index(
|
|
rows: Vec<FuturesTradingParameter>,
|
|
) -> HashMap<String, Vec<FuturesTradingParameter>> {
|
|
let mut grouped = HashMap::<String, Vec<FuturesTradingParameter>>::new();
|
|
for row in rows {
|
|
grouped.entry(row.symbol.clone()).or_default().push(row);
|
|
}
|
|
for rows in grouped.values_mut() {
|
|
rows.sort_by_key(|row| row.effective_date);
|
|
}
|
|
grouped
|
|
}
|
|
|
|
fn build_execution_quote_index(
|
|
execution_quotes: Vec<IntradayExecutionQuote>,
|
|
) -> HashMap<(NaiveDate, String), Vec<IntradayExecutionQuote>> {
|
|
let mut grouped = HashMap::<(NaiveDate, String), Vec<IntradayExecutionQuote>>::new();
|
|
for quote in execution_quotes {
|
|
grouped
|
|
.entry((quote.date, quote.symbol.clone()))
|
|
.or_default()
|
|
.push(quote);
|
|
}
|
|
|
|
for quotes in grouped.values_mut() {
|
|
quotes.sort_by_key(|quote| quote.timestamp);
|
|
}
|
|
|
|
grouped
|
|
}
|
|
|
|
fn build_order_book_depth_index(
|
|
order_book_depth: Vec<IntradayOrderBookDepthLevel>,
|
|
) -> HashMap<(NaiveDate, String), Vec<IntradayOrderBookDepthLevel>> {
|
|
let mut grouped = HashMap::<(NaiveDate, String), Vec<IntradayOrderBookDepthLevel>>::new();
|
|
for level in order_book_depth {
|
|
grouped
|
|
.entry((level.date, level.symbol.clone()))
|
|
.or_default()
|
|
.push(level);
|
|
}
|
|
|
|
for levels in grouped.values_mut() {
|
|
levels.sort_by(|left, right| {
|
|
left.timestamp
|
|
.cmp(&right.timestamp)
|
|
.then(left.level.cmp(&right.level))
|
|
});
|
|
}
|
|
|
|
grouped
|
|
}
|
|
|
|
fn build_eligible_universe(
|
|
factor_by_date: &BTreeMap<NaiveDate, Vec<DailyFactorSnapshot>>,
|
|
candidate_index: &HashMap<(NaiveDate, String), CandidateEligibility>,
|
|
market_index: &HashMap<(NaiveDate, String), DailyMarketSnapshot>,
|
|
) -> BTreeMap<NaiveDate, Vec<EligibleUniverseSnapshot>> {
|
|
let mut per_date = BTreeMap::<NaiveDate, Vec<EligibleUniverseSnapshot>>::new();
|
|
|
|
for (date, factors) in factor_by_date {
|
|
let mut rows = Vec::new();
|
|
for factor in factors {
|
|
if factor.market_cap_bn <= 0.0 || !factor.market_cap_bn.is_finite() {
|
|
continue;
|
|
}
|
|
let key = (*date, factor.symbol.clone());
|
|
let Some(candidate) = candidate_index.get(&key) else {
|
|
continue;
|
|
};
|
|
let Some(market) = market_index.get(&key) else {
|
|
continue;
|
|
};
|
|
if !candidate.eligible_for_selection() || market.paused {
|
|
continue;
|
|
}
|
|
rows.push(EligibleUniverseSnapshot {
|
|
symbol: factor.symbol.clone(),
|
|
market_cap_bn: factor.market_cap_bn,
|
|
free_float_cap_bn: factor.free_float_cap_bn,
|
|
});
|
|
}
|
|
rows.sort_by(|left, right| {
|
|
left.market_cap_bn
|
|
.partial_cmp(&right.market_cap_bn)
|
|
.unwrap_or(std::cmp::Ordering::Equal)
|
|
.then_with(|| left.symbol.cmp(&right.symbol))
|
|
});
|
|
per_date.insert(*date, rows);
|
|
}
|
|
|
|
per_date
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::time::{SystemTime, UNIX_EPOCH};
|
|
|
|
fn temp_csv_path(name: &str) -> std::path::PathBuf {
|
|
let nanos = SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap_or_default()
|
|
.as_nanos();
|
|
std::env::temp_dir().join(format!("{}_{}_{}.csv", name, std::process::id(), nanos))
|
|
}
|
|
|
|
fn market_row(date: &str, prev_close: f64, volume: u64) -> DailyMarketSnapshot {
|
|
DailyMarketSnapshot {
|
|
date: NaiveDate::parse_from_str(date, "%Y-%m-%d").unwrap(),
|
|
symbol: "000001.SZ".to_string(),
|
|
timestamp: None,
|
|
day_open: prev_close,
|
|
open: prev_close,
|
|
high: prev_close,
|
|
low: prev_close,
|
|
close: prev_close,
|
|
last_price: prev_close,
|
|
bid1: prev_close,
|
|
ask1: prev_close,
|
|
prev_close,
|
|
volume,
|
|
tick_volume: 0,
|
|
bid1_volume: 0,
|
|
ask1_volume: 0,
|
|
trading_phase: None,
|
|
paused: false,
|
|
upper_limit: prev_close * 1.1,
|
|
lower_limit: prev_close * 0.9,
|
|
price_tick: 0.01,
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn decision_volume_average_uses_previous_completed_days_only() {
|
|
let series = SymbolPriceSeries::new(&[
|
|
market_row("2025-01-02", 10.0, 100),
|
|
market_row("2025-01-03", 11.0, 200),
|
|
market_row("2025-01-06", 12.0, 10_000),
|
|
]);
|
|
|
|
assert_eq!(
|
|
series.decision_close_moving_average(
|
|
NaiveDate::parse_from_str("2025-01-06", "%Y-%m-%d").unwrap(),
|
|
2
|
|
),
|
|
Some(11.5)
|
|
);
|
|
assert_eq!(
|
|
series.decision_volume_moving_average(
|
|
NaiveDate::parse_from_str("2025-01-06", "%Y-%m-%d").unwrap(),
|
|
2
|
|
),
|
|
Some(150.0)
|
|
);
|
|
assert_eq!(
|
|
series.decision_volume_moving_average(
|
|
NaiveDate::parse_from_str("2025-01-06", "%Y-%m-%d").unwrap(),
|
|
3
|
|
),
|
|
None
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn reads_mixed_numeric_and_text_extra_factors_from_quoted_csv_json() {
|
|
let path = temp_csv_path("mixed_factor_maps");
|
|
fs::write(
|
|
&path,
|
|
concat!(
|
|
"date,symbol,market_cap_bn,free_float_cap_bn,pe_ttm,turnover_ratio,effective_turnover_ratio,extra_factors\n",
|
|
"2025-01-02,000001.SZ,12,10,8,1,1,\"{\"\"custom_alpha\"\":7,\"\"industry_name\"\":\"\"electronics,hardware\"\",\"\"flag\"\":true}\"\n"
|
|
),
|
|
)
|
|
.unwrap();
|
|
|
|
let factors = read_factors(&path).unwrap();
|
|
let text_factors = read_factor_texts(&path).unwrap();
|
|
fs::remove_file(&path).ok();
|
|
|
|
assert_eq!(factors.len(), 1);
|
|
assert_eq!(
|
|
factors[0].extra_factors.get("custom_alpha").copied(),
|
|
Some(7.0)
|
|
);
|
|
assert_eq!(factors[0].extra_factors.get("flag").copied(), Some(1.0));
|
|
assert_eq!(text_factors.len(), 2);
|
|
assert!(
|
|
text_factors
|
|
.iter()
|
|
.any(|row| row.field == "industry_name" && row.value == "electronics,hardware")
|
|
);
|
|
assert!(
|
|
text_factors
|
|
.iter()
|
|
.any(|row| row.field == "flag" && row.value == "true")
|
|
);
|
|
}
|
|
}
|