Spaces:

ArchCoder
/

llm-excel-plotter-agent

Running

llm-excel-plotter-agent / data_processor.py

Priyansh Saxena

fix: add xlrd for .xls support, use explicit engine in read_excel

e6c30af about 17 hours ago

2.41 kB

	import pandas as pd
	import os
	import logging

	class DataProcessor:
	def __init__(self, data_path=None):
	logging.info("Initializing DataProcessor")
	# Allow dynamic data path (for user uploads), fallback to default
	if data_path and os.path.exists(data_path):
	self.data_path = data_path
	else:
	self.data_path = os.path.join(os.path.dirname(__file__), 'data', 'sample_data.csv')
	self.data = self.load_data(self.data_path)

	def load_data(self, path):
	ext = os.path.splitext(path)[1].lower()
	try:
	if ext == '.csv':
	data = pd.read_csv(path)
	elif ext == '.xls':
	data = pd.read_excel(path, engine='xlrd')
	elif ext == '.xlsx':
	data = pd.read_excel(path, engine='openpyxl')
	else:
	raise ValueError(f"Unsupported file type: {ext}")
	logging.info(f"Loaded data from {path} with shape {data.shape}")
	return data
	except Exception as e:
	logging.error(f"Failed to load data: {e}")
	return pd.DataFrame()

	def validate_columns(self, required_columns):
	missing = [col for col in required_columns if col not in self.data.columns]
	if missing:
	logging.warning(f"Missing columns: {missing}")
	return False, missing
	return True, []

	def get_columns(self):
	return list(self.data.columns)

	def preview(self, n=5):
	return self.data.head(n).to_dict(orient='records')

	def get_dtypes(self) -> dict:
	result = {}
	for col, dtype in self.data.dtypes.items():
	if pd.api.types.is_integer_dtype(dtype):
	result[col] = "integer"
	elif pd.api.types.is_float_dtype(dtype):
	result[col] = "float"
	elif pd.api.types.is_datetime64_any_dtype(dtype):
	result[col] = "datetime"
	elif pd.api.types.is_bool_dtype(dtype):
	result[col] = "boolean"
	else:
	result[col] = "string"
	return result

	def get_stats(self) -> dict:
	numeric = self.data.select_dtypes(include='number')
	if numeric.empty:
	return {}
	desc = numeric.describe().to_dict()
	return {col: {k: round(v, 4) for k, v in stats.items()} for col, stats in desc.items()}