Spaces:

42Cummer
/

QuantumMarketMonitor

Paused

App Files Files Community

QuantumMarketMonitor / utils /process.py

42Cummer

Upload 15 files

2f560eb verified 4 months ago

raw

history blame contribute delete

2.55 kB

	import pandas as pd # type: ignore
	import numpy as np # type: ignore
	import torch # type: ignore

	import os
	project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	data_dir = os.path.join(project_root, "data")
	if not os.path.exists(data_dir):
	os.makedirs(data_dir)
	CACHE_FILE = os.path.join(data_dir, "vfv_market_data.csv")

	WINDOW_SIZE = 15

	def get_processed_tensors():
	"""
	Reads the yfinance CSV, cleans MultiIndex headers,
	and converts prices into normalized 15-element windows.
	"""
	if not os.path.exists(CACHE_FILE):
	print(f"Error: {CACHE_FILE} not found. Run your fetcher script first.")
	return None

	# Load CSV with MultiIndex (Price/Ticker)
	# yfinance saves two header rows. header=[0,1] ensures we capture both.
	df = pd.read_csv(CACHE_FILE, header=[0, 1], index_col=0, parse_dates=True)

	# Flatten MultiIndex
	# Converts (Price, VFV.TO) -> Price. This allows df['Close'] to work.
	df.columns = df.columns.get_level_values(0)

	# Extract Close prices and convert to float
	# errors='coerce' turns any non-numeric strings (like Ticker names) into NaN
	prices = pd.to_numeric(df['Close'], errors='coerce').dropna().values

	if len(prices) < WINDOW_SIZE + 1:
	print(f"Error: Not enough data. Need at least {WINDOW_SIZE + 1} points.")
	return None

	# Calculate Log Returns
	# r_t = ln(P_t / P_{t-1})
	# This results in a vector of length len(prices) - 1
	log_returns = np.log(prices[1:] / prices[:-1])

	# Create Sliding Windows
	windows = []
	for i in range(len(log_returns) - WINDOW_SIZE + 1):
	window = log_returns[i : i + WINDOW_SIZE]

	# Z-Score Normalization
	# (x - mean) / std_dev
	# Essential for Quantum Angle Embedding to avoid saturation
	mu = np.mean(window)
	std = np.std(window)

	if std > 1e-9:
	norm_window = (window - mu) / std
	else:
	norm_window = window - mu # Handle zero-variance cases

	windows.append(norm_window)

	# Convert to PyTorch Tensor
	return torch.tensor(np.array(windows), dtype=torch.float32)

	if __name__ == "__main__":
	tensors = get_processed_tensors()
	if tensors is not None:
	print("--- Processing Complete ---")
	print(f"Tensor Shape: {tensors.shape}") # Should be [N, 15]
	print("\nFirst Window Example:")
	print(tensors[0])
	print("\nLatest Window Example (Last 15 minutes):")
	print(tensors[-1])