增加分区快照数据入口
This commit is contained in:
32
README.md
32
README.md
@@ -103,6 +103,7 @@
|
|||||||
## Phase 2 新增内容
|
## Phase 2 新增内容
|
||||||
|
|
||||||
- `DataSet::bundle_on(date)`:引入按日 snapshot bundle 视图,方便未来直接对接 FiDataCenter / FiDataScraper 预计算快照
|
- `DataSet::bundle_on(date)`:引入按日 snapshot bundle 视图,方便未来直接对接 FiDataCenter / FiDataScraper 预计算快照
|
||||||
|
- `DataSet::from_partitioned_dir(path)`:新增按日分区 snapshot 目录读取入口,为真实回测数据源接入打基础
|
||||||
- 策略诊断输出:equity curve 里新增 `diagnostics` 字段,记录市值带、候选样本、退出原因等信息
|
- 策略诊断输出:equity curve 里新增 `diagnostics` 字段,记录市值带、候选样本、退出原因等信息
|
||||||
- 候选资格快照扩展:补入 `is_kcb`、`is_one_yuan`
|
- 候选资格快照扩展:补入 `is_kcb`、`is_one_yuan`
|
||||||
- 增加策略选择行为测试
|
- 增加策略选择行为测试
|
||||||
@@ -122,10 +123,41 @@
|
|||||||
|
|
||||||
## 运行方式
|
## 运行方式
|
||||||
|
|
||||||
|
默认跑仓库内置 flat demo CSV:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run --bin bt-demo
|
cargo run --bin bt-demo
|
||||||
```
|
```
|
||||||
|
|
||||||
|
如果要接更接近真实数据面的按日分区 snapshot 目录:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
FIDC_BT_DATA_LAYOUT=partitioned \
|
||||||
|
FIDC_BT_DATA_DIR=/path/to/snapshots \
|
||||||
|
cargo run --bin bt-demo
|
||||||
|
```
|
||||||
|
|
||||||
|
约定目录结构:
|
||||||
|
|
||||||
|
```text
|
||||||
|
snapshots/
|
||||||
|
├── instruments.csv
|
||||||
|
├── benchmark/
|
||||||
|
│ ├── 2024-01-02.csv
|
||||||
|
│ └── ...
|
||||||
|
├── market/
|
||||||
|
├── factors/
|
||||||
|
└── candidates/
|
||||||
|
```
|
||||||
|
|
||||||
|
其中:
|
||||||
|
- `market/`:日级行情快照
|
||||||
|
- `factors/`:估值/因子快照
|
||||||
|
- `candidates/`:候选资格/过滤标记快照
|
||||||
|
- `benchmark/`:指数快照
|
||||||
|
|
||||||
|
这层接口是为后续对接 `FiDataCenter / FiDataScraper` 的预计算 snapshot 数据准备的。
|
||||||
|
|
||||||
运行后会生成:
|
运行后会生成:
|
||||||
|
|
||||||
- `output/demo/equity_curve.csv`
|
- `output/demo/equity_curve.csv`
|
||||||
|
|||||||
@@ -20,12 +20,19 @@ use fidc_core::{
|
|||||||
|
|
||||||
fn main() -> Result<(), Box<dyn Error>> {
|
fn main() -> Result<(), Box<dyn Error>> {
|
||||||
let root = workspace_root();
|
let root = workspace_root();
|
||||||
let data_dir = root.join("data/demo");
|
let data_dir = std::env::var("FIDC_BT_DATA_DIR")
|
||||||
|
.map(PathBuf::from)
|
||||||
|
.unwrap_or_else(|_| root.join("data/demo"));
|
||||||
|
let data_layout = std::env::var("FIDC_BT_DATA_LAYOUT").unwrap_or_else(|_| "flat".to_string());
|
||||||
let output_dir = root.join("output/demo");
|
let output_dir = root.join("output/demo");
|
||||||
|
|
||||||
fs::create_dir_all(&output_dir)?;
|
fs::create_dir_all(&output_dir)?;
|
||||||
|
|
||||||
let data = DataSet::from_csv_dir(&data_dir)?;
|
let data = if data_layout == "partitioned" {
|
||||||
|
DataSet::from_partitioned_dir(&data_dir)?
|
||||||
|
} else {
|
||||||
|
DataSet::from_csv_dir(&data_dir)?
|
||||||
|
};
|
||||||
let mut strategy_cfg = CnSmallCapRotationConfig::demo();
|
let mut strategy_cfg = CnSmallCapRotationConfig::demo();
|
||||||
strategy_cfg.base_index_level = 3000.0;
|
strategy_cfg.base_index_level = 3000.0;
|
||||||
strategy_cfg.base_cap_floor = 38.0;
|
strategy_cfg.base_cap_floor = 38.0;
|
||||||
|
|||||||
@@ -156,7 +156,25 @@ impl DataSet {
|
|||||||
let factors = read_factors(&path.join("factors.csv"))?;
|
let factors = read_factors(&path.join("factors.csv"))?;
|
||||||
let candidates = read_candidates(&path.join("candidate_flags.csv"))?;
|
let candidates = read_candidates(&path.join("candidate_flags.csv"))?;
|
||||||
let benchmarks = read_benchmarks(&path.join("benchmark.csv"))?;
|
let benchmarks = read_benchmarks(&path.join("benchmark.csv"))?;
|
||||||
|
Self::from_components(instruments, market, factors, candidates, benchmarks)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_partitioned_dir(path: &Path) -> Result<Self, DataSetError> {
|
||||||
|
let instruments = read_instruments(&path.join("instruments.csv"))?;
|
||||||
|
let benchmarks = read_partitioned_dir(&path.join("benchmark"), read_benchmarks)?;
|
||||||
|
let market = read_partitioned_dir(&path.join("market"), read_market)?;
|
||||||
|
let factors = read_partitioned_dir(&path.join("factors"), read_factors)?;
|
||||||
|
let candidates = read_partitioned_dir(&path.join("candidates"), read_candidates)?;
|
||||||
|
Self::from_components(instruments, market, factors, candidates, benchmarks)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_components(
|
||||||
|
instruments: Vec<Instrument>,
|
||||||
|
market: Vec<DailyMarketSnapshot>,
|
||||||
|
factors: Vec<DailyFactorSnapshot>,
|
||||||
|
candidates: Vec<CandidateEligibility>,
|
||||||
|
benchmarks: Vec<BenchmarkSnapshot>,
|
||||||
|
) -> Result<Self, DataSetError> {
|
||||||
let benchmark_code = collect_benchmark_code(&benchmarks)?;
|
let benchmark_code = collect_benchmark_code(&benchmarks)?;
|
||||||
let calendar = TradingCalendar::new(benchmarks.iter().map(|item| item.date).collect());
|
let calendar = TradingCalendar::new(benchmarks.iter().map(|item| item.date).collect());
|
||||||
|
|
||||||
@@ -456,6 +474,33 @@ impl CsvRow {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn read_partitioned_dir<T, F>(dir: &Path, mut loader: F) -> Result<Vec<T>, DataSetError>
|
||||||
|
where
|
||||||
|
F: FnMut(&Path) -> Result<Vec<T>, DataSetError>,
|
||||||
|
{
|
||||||
|
let mut files = fs::read_dir(dir)
|
||||||
|
.map_err(|source| DataSetError::Io {
|
||||||
|
path: dir.display().to_string(),
|
||||||
|
source,
|
||||||
|
})?
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.map_err(|source| DataSetError::Io {
|
||||||
|
path: dir.display().to_string(),
|
||||||
|
source,
|
||||||
|
})?;
|
||||||
|
files.sort_by_key(|entry| entry.path());
|
||||||
|
|
||||||
|
let mut rows = Vec::new();
|
||||||
|
for entry in files {
|
||||||
|
let path = entry.path();
|
||||||
|
if path.extension().and_then(|x| x.to_str()) != Some("csv") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
rows.extend(loader(&path)?);
|
||||||
|
}
|
||||||
|
Ok(rows)
|
||||||
|
}
|
||||||
|
|
||||||
fn read_rows(path: &Path) -> Result<Vec<CsvRow>, DataSetError> {
|
fn read_rows(path: &Path) -> Result<Vec<CsvRow>, DataSetError> {
|
||||||
let content = fs::read_to_string(path).map_err(|source| DataSetError::Io {
|
let content = fs::read_to_string(path).map_err(|source| DataSetError::Io {
|
||||||
path: path.display().to_string(),
|
path: path.display().to_string(),
|
||||||
|
|||||||
55
crates/fidc-core/tests/partitioned_loader.rs
Normal file
55
crates/fidc-core/tests/partitioned_loader.rs
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
use fidc_core::DataSet;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
fn temp_dir() -> PathBuf {
|
||||||
|
let uniq = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.expect("clock")
|
||||||
|
.as_nanos();
|
||||||
|
let dir = std::env::temp_dir().join(format!("fidc-bt-partitioned-{uniq}"));
|
||||||
|
fs::create_dir_all(&dir).expect("mkdir temp");
|
||||||
|
dir
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn can_load_partitioned_snapshot_dir() {
|
||||||
|
let dir = temp_dir();
|
||||||
|
fs::create_dir_all(dir.join("benchmark")).unwrap();
|
||||||
|
fs::create_dir_all(dir.join("market")).unwrap();
|
||||||
|
fs::create_dir_all(dir.join("factors")).unwrap();
|
||||||
|
fs::create_dir_all(dir.join("candidates")).unwrap();
|
||||||
|
|
||||||
|
fs::write(
|
||||||
|
dir.join("instruments.csv"),
|
||||||
|
"symbol,name,exchange,lot_size\n000001.SZ,PingAn,SZ,100\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
fs::write(
|
||||||
|
dir.join("benchmark/2024-01-02.csv"),
|
||||||
|
"date,benchmark,open,close,prev_close,volume\n2024-01-02,CSI300.DEMO,2990,3000,2980,100000000\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
fs::write(
|
||||||
|
dir.join("market/2024-01-02.csv"),
|
||||||
|
"date,symbol,open,high,low,close,prev_close,volume,paused,upper_limit,lower_limit\n2024-01-02,000001.SZ,10,10.5,9.9,10.2,10,100000,false,11,9\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
fs::write(
|
||||||
|
dir.join("factors/2024-01-02.csv"),
|
||||||
|
"date,symbol,market_cap_bn,free_float_cap_bn,pe_ttm\n2024-01-02,000001.SZ,40,35,12\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
fs::write(
|
||||||
|
dir.join("candidates/2024-01-02.csv"),
|
||||||
|
"date,symbol,is_st,is_new_listing,is_paused,allow_buy,allow_sell,is_kcb,is_one_yuan\n2024-01-02,000001.SZ,false,false,false,true,true,false,false\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let data = DataSet::from_partitioned_dir(&dir).expect("partitioned dataset");
|
||||||
|
assert_eq!(data.benchmark_code(), "CSI300.DEMO");
|
||||||
|
assert!(data.market_snapshots_on(chrono::NaiveDate::from_ymd_opt(2024, 1, 2).unwrap()).len() == 1);
|
||||||
|
|
||||||
|
let _ = fs::remove_dir_all(&dir);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user