Spaces:

Nuzz23
/

Chronos2AD_AF

Running

App Files Files Community

Nuzz23 commited on Feb 15

Commit

486578c

1 Parent(s): a5368e1

better controls

Browse files

Files changed (2) hide show

app.py +7 -5
utils.py +9 -4

app.py CHANGED Viewed

@@ -15,11 +15,7 @@ OUT_PATH ="./savedPredictions/results.csv"
 def dataProcessing(file, timestamp_column:str=None):
     if os.path.exists(OUT_PATH):
         os.remove(OUT_PATH)
-    global plot, download, error, chronos2
-    plot = gr.update(visible=False)
-    download = gr.update(visible=False)
-    error = gr.update(visible=False)
     try:
         validateData(file, timestamp_column)
@@ -108,6 +104,12 @@ with gr.Blocks(title="Time series anomaly detection with Chronos2") as demo:
                 outputs=processing_msg
             )
             detect_button.click(
                 lambda file, timestamp_question, timestamp_column:
                     dataProcessing(

 def dataProcessing(file, timestamp_column:str=None):
     if os.path.exists(OUT_PATH):
         os.remove(OUT_PATH)
+    global chronos2
     try:
         validateData(file, timestamp_column)
                 outputs=processing_msg
             )
+            detect_button.click(
+                lambda : gr.update(visible=False, value=""),
+                inputs=None,
+                outputs=[plot_output, download_output, errorHandler]
+            )
             detect_button.click(
                 lambda file, timestamp_question, timestamp_column:
                     dataProcessing(

utils.py CHANGED Viewed

@@ -8,6 +8,8 @@ from chronos import Chronos2Pipeline
 MIN_LENGTH_CONTEXT = 64
 PREDICTION_LENGTH = 32
 BATCH_SIZE = 256
 MAX_NUMBER_OF_PLOTTABLE_SERIES = 5  # To avoid plotting too many series in the same plot, which can be unreadable. If there are more than this number of target columns, we will only plot the first MAX_NUMBER_OF_PLOTTABLE_SERIES columns.
 MAX_NUMBER_OF_POINTS_PLOTTABLE = 32_000  # To avoid plotting too many points in the same plot, which can be unreadable. If there are more than this number of points, we will only plot the first MAX_NUMBER_OF_POINTS_PLOTTABLE points.
@@ -19,9 +21,10 @@ def validateData(file, timestamp_column:str=None):
     2. If a timestamp column is provided, it must exist in the data and must not contain any missing values (NaNs).
     3. The time series data must contain at least a minimum number of data points (defined by MIN_LENGTH_CONTEXT) for accurate anomaly detection.
     4. The data must contain at least one column of values for anomaly detection.
-    5. The data must not contain any missing values (NaNs) for accurate anomaly detection.
-    6. The data must not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection.
-    7. All value columns must contain numeric data for accurate anomaly detection.
     Args:
         file (str): The path to the uploaded CSV file containing the time series data.
@@ -30,7 +33,8 @@ def validateData(file, timestamp_column:str=None):
     Raises:
         AssertionError: If any of the validation conditions are not met, an AssertionError will be raised with a descriptive error message.
     """
-    assert os.path.getsize(file) < 256 * 1024 * 1024, "File size exceeds the maximum limit of 256MB. Please upload a smaller file."
     assert file is not None, "No file uploaded. Please upload a CSV file containing your time series data."
     assert file.endswith('.csv') and os.path.basename(file).count(".") == 1, "Invalid file format. Please upload a CSV file."
     df = pd.read_csv(file, index_col=None, header=0)
@@ -41,6 +45,7 @@ def validateData(file, timestamp_column:str=None):
     assert len(df) >= MIN_LENGTH_CONTEXT, f"Insufficient data length. The uploaded time series must contain at least {MIN_LENGTH_CONTEXT} data points for accurate anomaly detection."
     assert len(df.columns) >= 1, "No value columns found. Please ensure your CSV file contains at least one column of values for anomaly detection."
     assert df.isna().sum().sum() == 0, "Missing values detected in the uploaded data. Please ensure your CSV file does not contain any missing values (NaNs) for accurate anomaly detection."
     assert np.isfinite(df.select_dtypes(include=[np.number])).all().all(), "Non-finite values detected in the uploaded data. Please ensure your CSV file does not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection."

 MIN_LENGTH_CONTEXT = 64
 PREDICTION_LENGTH = 32
 BATCH_SIZE = 256
+MAX_FILE_SIZE_MB = 256  # Maximum file size for upload (in megabytes)
+MAX_NUMBER_OF_COLUMNS = 200  # To avoid performance issues and ensure readability of results, we will limit the number of columns that can be processed to 200. If the uploaded data contains more than this number of columns, an error will be raised.
 MAX_NUMBER_OF_PLOTTABLE_SERIES = 5  # To avoid plotting too many series in the same plot, which can be unreadable. If there are more than this number of target columns, we will only plot the first MAX_NUMBER_OF_PLOTTABLE_SERIES columns.
 MAX_NUMBER_OF_POINTS_PLOTTABLE = 32_000  # To avoid plotting too many points in the same plot, which can be unreadable. If there are more than this number of points, we will only plot the first MAX_NUMBER_OF_POINTS_PLOTTABLE points.
     2. If a timestamp column is provided, it must exist in the data and must not contain any missing values (NaNs).
     3. The time series data must contain at least a minimum number of data points (defined by MIN_LENGTH_CONTEXT) for accurate anomaly detection.
     4. The data must contain at least one column of values for anomaly detection.
+    5. The data must contain less than 200 columns to avoid performance issues and ensure readability of results.
+    6. The data must not contain any missing values (NaNs) for accurate anomaly detection.
+    7. The data must not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection.
+    8. All value columns must contain numeric data for accurate anomaly detection.
     Args:
         file (str): The path to the uploaded CSV file containing the time series data.
     Raises:
         AssertionError: If any of the validation conditions are not met, an AssertionError will be raised with a descriptive error message.
     """
+    assert file is not None and isinstance(file, str), "No file uploaded. Please upload a CSV file containing your time series data."
+    assert os.path.getsize(file) < MAX_FILE_SIZE_MB * 1024 * 1024, f"File size exceeds the maximum limit of {MAX_FILE_SIZE_MB}MB. Please upload a smaller file."
     assert file is not None, "No file uploaded. Please upload a CSV file containing your time series data."
     assert file.endswith('.csv') and os.path.basename(file).count(".") == 1, "Invalid file format. Please upload a CSV file."
     df = pd.read_csv(file, index_col=None, header=0)
     assert len(df) >= MIN_LENGTH_CONTEXT, f"Insufficient data length. The uploaded time series must contain at least {MIN_LENGTH_CONTEXT} data points for accurate anomaly detection."
     assert len(df.columns) >= 1, "No value columns found. Please ensure your CSV file contains at least one column of values for anomaly detection."
+    assert len(df.columns) <= MAX_NUMBER_OF_COLUMNS, f"Too many columns. The uploaded time series must contain less than {MAX_NUMBER_OF_COLUMNS} columns to avoid performance issues and ensure readability of results."
     assert df.isna().sum().sum() == 0, "Missing values detected in the uploaded data. Please ensure your CSV file does not contain any missing values (NaNs) for accurate anomaly detection."
     assert np.isfinite(df.select_dtypes(include=[np.number])).all().all(), "Non-finite values detected in the uploaded data. Please ensure your CSV file does not contain any non-finite values (e.g., inf, -inf) for accurate anomaly detection."