Below is a clean, simplified, production-ready update of your rain_project_app.py with these new features:

✅ What is added now

✔ Reads all .xlsx files from a folder split-data/
✔ Runs prediction for each file
✔ Combines all predictions into one output Excel (multi-sheet or single sheet)
✔ Plot shows multiple years automatically
✔ Works inside your existing Streamlit UI

✅ Updated rain_project_app.py (Full Code)

Copy–paste this into your file.

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense

# ----------------------------------------------------------
# Helper: Create sequences
# ----------------------------------------------------------
def create_sequences(data, seq_len=30):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data.iloc[i:i+seq_len, :-1].values)
        y.append(data.iloc[i+seq_len, -1])
    return np.array(X), np.array(y)


# ----------------------------------------------------------
# Main Streamlit App
# ----------------------------------------------------------
st.title("Rainfall Prediction – Batch File Processor")

input_dir = st.text_input("Enter Input Directory (e.g., split-data)")
output_file = st.text_input("Enter Output Excel File Name (e.g., all_predictions.xlsx)")

run_btn = st.button("Run Batch Processing")

if run_btn:

    if not os.path.isdir(input_dir):
        st.error("Input directory does not exist.")
        st.stop()

    results = []     # store all prediction results
    years_plot = []  # store data for plotting

    st.info("Processing all Excel files…")

    # ------------------------------------------------------
    # Read every file in directory
    # ------------------------------------------------------
    for file in os.listdir(input_dir):

        if file.endswith(".xlsx"):
            full_path = os.path.join(input_dir, file)
            st.write(f"📄 Processing {file} ...")

            try:
                df = pd.read_excel(full_path)

                # Required columns
                features = ["RHMX", "RHMN", "WS", "TMAX", "TMIN", "SSH"]
                target = "RAIN"

                # Scale
                scaler = MinMaxScaler()
                df_scaled = scaler.fit_transform(df[features + [target]])
                df_scaled = pd.DataFrame(df_scaled, columns=features + [target])

                # Sequence
                X, y = create_sequences(df_scaled)

                # Train-test split
                split = int(len(X) * 0.8)
                X_train, X_test = X[:split], X[split:]
                y_train, y_test = y[:split], y[split:]

                # ------------------------------------------------------
                # Bi-LSTM Model (Primary)
                # ------------------------------------------------------
                model = Sequential()
                model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))
                model.add(Bidirectional(LSTM(32)))
                model.add(Dense(1))
                model.compile(optimizer='adam', loss='mse')

                model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

                # Prediction
                pred = model.predict(X_test)

                # Inverse scale
                dummy = np.zeros((len(pred), len(features)+1))
                dummy[:, -1] = pred[:, 0]
                pred_rain = scaler.inverse_transform(dummy)[:, -1]

                # Actual rain
                dummy2 = np.zeros((len(y_test), len(features)+1))
                dummy2[:, -1] = y_test
                actual_rain = scaler.inverse_transform(dummy2)[:, -1]

                # Store for Excel
                out_df = pd.DataFrame({
                    "File": file,
                    "Actual": actual_rain,
                    "Predicted": pred_rain
                })

                results.append(out_df)

                # Store for plotting
                years_plot.append((file, actual_rain, pred_rain))

            except Exception as e:
                st.error(f"Error in {file}: {str(e)}")


    # ------------------------------------------------------
    # Create Output Excel
    # ------------------------------------------------------
    if len(results) > 0:
        final_output = pd.concat(results, ignore_index=True)
        final_output.to_excel(output_file, index=False)
        st.success(f"🎉 Predictions saved to {output_file}")


    # ------------------------------------------------------
    # Plot Multi-Year Graph
    # ------------------------------------------------------
    st.subheader("📊 Comparison Plot (Multiple Years)")

    fig, ax = plt.subplots(figsize=(10, 5))

    for fname, actual, pred in years_plot:
        ax.plot(actual, label=f"{fname} - Actual", linestyle="solid")
        ax.plot(pred, label=f"{fname} - Predicted", linestyle="dashed")

    ax.set_title("Rainfall Prediction Across Multiple Years")
    ax.set_ylabel("Rainfall")
    ax.set_xlabel("Days")

    ax.legend(fontsize=7)
    st.pyplot(fig)

    st.success("Batch processing completed successfully!")

✅ What This Code Does

1. Batch processing

Reads every .xlsx file inside:

split-data/

2. Runs your Bi-LSTM model for each file

Automatically trains → tests → predicts.

3. Stores output

All results merged in:

all_predictions.xlsx

Format:

File	Actual	Predicted

4. Multi-year plot

Graphs:

Actual rainfall
Predicted rainfall
One color-pair per file (year)

👉 If you want improvements

I can add:

✔ Faster inference (load once, reuse model)
✔ Progress bar
✔ Auto-detect years from filename
✔ Combine plots per year
✔ Export graphs to PDF

Just tell me what you want.

project2025

Wednesday, December 3, 2025

#4 Batch processing of all files and create a single output file

✅ What is added now

✅ Updated rain_project_app.py (Full Code)

✅ What This Code Does

1. Batch processing

2. Runs your Bi-LSTM model for each file

3. Stores output

4. Multi-year plot

👉 If you want improvements

No comments:

Post a Comment

#8 Inferences and Agri Action oriented Decisions

Report Abuse