Nuzz23 commited on
Commit
486578c
·
1 Parent(s): a5368e1

better controls

Browse files
Files changed (2) hide show
  1. app.py +7 -5
  2. utils.py +9 -4
app.py CHANGED
@@ -15,11 +15,7 @@ OUT_PATH ="./savedPredictions/results.csv"
15
  def dataProcessing(file, timestamp_column:str=None):
16
  if os.path.exists(OUT_PATH):
17
  os.remove(OUT_PATH)
18
- global plot, download, error, chronos2
19
-
20
- plot = gr.update(visible=False)
21
- download = gr.update(visible=False)
22
- error = gr.update(visible=False)
23
 
24
  try:
25
  validateData(file, timestamp_column)
@@ -108,6 +104,12 @@ with gr.Blocks(title="Time series anomaly detection with Chronos2") as demo:
108
  outputs=processing_msg
109
  )
110
 
 
 
 
 
 
 
111
  detect_button.click(
112
  lambda file, timestamp_question, timestamp_column:
113
  dataProcessing(
 
15
  def dataProcessing(file, timestamp_column:str=None):
16
  if os.path.exists(OUT_PATH):
17
  os.remove(OUT_PATH)
18
+ global chronos2
 
 
 
 
19
 
20
  try:
21
  validateData(file, timestamp_column)
 
104
  outputs=processing_msg
105
  )
106
 
107
+ detect_button.click(
108
+ lambda : gr.update(visible=False, value=""),
109
+ inputs=None,
110
+ outputs=[plot_output, download_output, errorHandler]
111
+ )
112
+
113
  detect_button.click(
114
  lambda file, timestamp_question, timestamp_column:
115
  dataProcessing(
utils.py CHANGED
@@ -8,6 +8,8 @@ from chronos import Chronos2Pipeline
8
  MIN_LENGTH_CONTEXT = 64
9
  PREDICTION_LENGTH = 32
10
  BATCH_SIZE = 256
 
 
11
  MAX_NUMBER_OF_PLOTTABLE_SERIES = 5 # To avoid plotting too many series in the same plot, which can be unreadable. If there are more than this number of target columns, we will only plot the first MAX_NUMBER_OF_PLOTTABLE_SERIES columns.
12
  MAX_NUMBER_OF_POINTS_PLOTTABLE = 32_000 # To avoid plotting too many points in the same plot, which can be unreadable. If there are more than this number of points, we will only plot the first MAX_NUMBER_OF_POINTS_PLOTTABLE points.
13
 
@@ -19,9 +21,10 @@ def validateData(file, timestamp_column:str=None):
19
  2. If a timestamp column is provided, it must exist in the data and must not contain any missing values (NaNs).
20
  3. The time series data must contain at least a minimum number of data points (defined by MIN_LENGTH_CONTEXT) for accurate anomaly detection.
21
  4. The data must contain at least one column of values for anomaly detection.
22
- 5. The data must not contain any missing values (NaNs) for accurate anomaly detection.
23
- 6. The data must not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection.
24
- 7. All value columns must contain numeric data for accurate anomaly detection.
 
25
 
26
  Args:
27
  file (str): The path to the uploaded CSV file containing the time series data.
@@ -30,7 +33,8 @@ def validateData(file, timestamp_column:str=None):
30
  Raises:
31
  AssertionError: If any of the validation conditions are not met, an AssertionError will be raised with a descriptive error message.
32
  """
33
- assert os.path.getsize(file) < 256 * 1024 * 1024, "File size exceeds the maximum limit of 256MB. Please upload a smaller file."
 
34
  assert file is not None, "No file uploaded. Please upload a CSV file containing your time series data."
35
  assert file.endswith('.csv') and os.path.basename(file).count(".") == 1, "Invalid file format. Please upload a CSV file."
36
  df = pd.read_csv(file, index_col=None, header=0)
@@ -41,6 +45,7 @@ def validateData(file, timestamp_column:str=None):
41
 
42
  assert len(df) >= MIN_LENGTH_CONTEXT, f"Insufficient data length. The uploaded time series must contain at least {MIN_LENGTH_CONTEXT} data points for accurate anomaly detection."
43
  assert len(df.columns) >= 1, "No value columns found. Please ensure your CSV file contains at least one column of values for anomaly detection."
 
44
 
45
  assert df.isna().sum().sum() == 0, "Missing values detected in the uploaded data. Please ensure your CSV file does not contain any missing values (NaNs) for accurate anomaly detection."
46
  assert np.isfinite(df.select_dtypes(include=[np.number])).all().all(), "Non-finite values detected in the uploaded data. Please ensure your CSV file does not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection."
 
8
  MIN_LENGTH_CONTEXT = 64
9
  PREDICTION_LENGTH = 32
10
  BATCH_SIZE = 256
11
+ MAX_FILE_SIZE_MB = 256 # Maximum file size for upload (in megabytes)
12
+ MAX_NUMBER_OF_COLUMNS = 200 # To avoid performance issues and ensure readability of results, we will limit the number of columns that can be processed to 200. If the uploaded data contains more than this number of columns, an error will be raised.
13
  MAX_NUMBER_OF_PLOTTABLE_SERIES = 5 # To avoid plotting too many series in the same plot, which can be unreadable. If there are more than this number of target columns, we will only plot the first MAX_NUMBER_OF_PLOTTABLE_SERIES columns.
14
  MAX_NUMBER_OF_POINTS_PLOTTABLE = 32_000 # To avoid plotting too many points in the same plot, which can be unreadable. If there are more than this number of points, we will only plot the first MAX_NUMBER_OF_POINTS_PLOTTABLE points.
15
 
 
21
  2. If a timestamp column is provided, it must exist in the data and must not contain any missing values (NaNs).
22
  3. The time series data must contain at least a minimum number of data points (defined by MIN_LENGTH_CONTEXT) for accurate anomaly detection.
23
  4. The data must contain at least one column of values for anomaly detection.
24
+ 5. The data must contain less than 200 columns to avoid performance issues and ensure readability of results.
25
+ 6. The data must not contain any missing values (NaNs) for accurate anomaly detection.
26
+ 7. The data must not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection.
27
+ 8. All value columns must contain numeric data for accurate anomaly detection.
28
 
29
  Args:
30
  file (str): The path to the uploaded CSV file containing the time series data.
 
33
  Raises:
34
  AssertionError: If any of the validation conditions are not met, an AssertionError will be raised with a descriptive error message.
35
  """
36
+ assert file is not None and isinstance(file, str), "No file uploaded. Please upload a CSV file containing your time series data."
37
+ assert os.path.getsize(file) < MAX_FILE_SIZE_MB * 1024 * 1024, f"File size exceeds the maximum limit of {MAX_FILE_SIZE_MB}MB. Please upload a smaller file."
38
  assert file is not None, "No file uploaded. Please upload a CSV file containing your time series data."
39
  assert file.endswith('.csv') and os.path.basename(file).count(".") == 1, "Invalid file format. Please upload a CSV file."
40
  df = pd.read_csv(file, index_col=None, header=0)
 
45
 
46
  assert len(df) >= MIN_LENGTH_CONTEXT, f"Insufficient data length. The uploaded time series must contain at least {MIN_LENGTH_CONTEXT} data points for accurate anomaly detection."
47
  assert len(df.columns) >= 1, "No value columns found. Please ensure your CSV file contains at least one column of values for anomaly detection."
48
+ assert len(df.columns) <= MAX_NUMBER_OF_COLUMNS, f"Too many columns. The uploaded time series must contain less than {MAX_NUMBER_OF_COLUMNS} columns to avoid performance issues and ensure readability of results."
49
 
50
  assert df.isna().sum().sum() == 0, "Missing values detected in the uploaded data. Please ensure your CSV file does not contain any missing values (NaNs) for accurate anomaly detection."
51
  assert np.isfinite(df.select_dtypes(include=[np.number])).all().all(), "Non-finite values detected in the uploaded data. Please ensure your CSV file does not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection."