Below is a clean, simplified, production-ready update of your rain_project_app.py with these new features:
✅ What is added now
✔ Reads all .xlsx files from a folder split-data/
✔ Runs prediction for each file
✔ Combines all predictions into one output Excel (multi-sheet or single sheet)
✔ Plot shows multiple years automatically
✔ Works inside your existing Streamlit UI
✅ Updated rain_project_app.py (Full Code)
Copy–paste this into your file.
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
# ----------------------------------------------------------
# Helper: Create sequences
# ----------------------------------------------------------
def create_sequences(data, seq_len=30):
X, y = [], []
for i in range(len(data) - seq_len):
X.append(data.iloc[i:i+seq_len, :-1].values)
y.append(data.iloc[i+seq_len, -1])
return np.array(X), np.array(y)
# ----------------------------------------------------------
# Main Streamlit App
# ----------------------------------------------------------
st.title("Rainfall Prediction – Batch File Processor")
input_dir = st.text_input("Enter Input Directory (e.g., split-data)")
output_file = st.text_input("Enter Output Excel File Name (e.g., all_predictions.xlsx)")
run_btn = st.button("Run Batch Processing")
if run_btn:
if not os.path.isdir(input_dir):
st.error("Input directory does not exist.")
st.stop()
results = [] # store all prediction results
years_plot = [] # store data for plotting
st.info("Processing all Excel files…")
# ------------------------------------------------------
# Read every file in directory
# ------------------------------------------------------
for file in os.listdir(input_dir):
if file.endswith(".xlsx"):
full_path = os.path.join(input_dir, file)
st.write(f"๐ Processing {file} ...")
try:
df = pd.read_excel(full_path)
# Required columns
features = ["RHMX", "RHMN", "WS", "TMAX", "TMIN", "SSH"]
target = "RAIN"
# Scale
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df[features + [target]])
df_scaled = pd.DataFrame(df_scaled, columns=features + [target])
# Sequence
X, y = create_sequences(df_scaled)
# Train-test split
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
# ------------------------------------------------------
# Bi-LSTM Model (Primary)
# ------------------------------------------------------
model = Sequential()
model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(32)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)
# Prediction
pred = model.predict(X_test)
# Inverse scale
dummy = np.zeros((len(pred), len(features)+1))
dummy[:, -1] = pred[:, 0]
pred_rain = scaler.inverse_transform(dummy)[:, -1]
# Actual rain
dummy2 = np.zeros((len(y_test), len(features)+1))
dummy2[:, -1] = y_test
actual_rain = scaler.inverse_transform(dummy2)[:, -1]
# Store for Excel
out_df = pd.DataFrame({
"File": file,
"Actual": actual_rain,
"Predicted": pred_rain
})
results.append(out_df)
# Store for plotting
years_plot.append((file, actual_rain, pred_rain))
except Exception as e:
st.error(f"Error in {file}: {str(e)}")
# ------------------------------------------------------
# Create Output Excel
# ------------------------------------------------------
if len(results) > 0:
final_output = pd.concat(results, ignore_index=True)
final_output.to_excel(output_file, index=False)
st.success(f"๐ Predictions saved to {output_file}")
# ------------------------------------------------------
# Plot Multi-Year Graph
# ------------------------------------------------------
st.subheader("๐ Comparison Plot (Multiple Years)")
fig, ax = plt.subplots(figsize=(10, 5))
for fname, actual, pred in years_plot:
ax.plot(actual, label=f"{fname} - Actual", linestyle="solid")
ax.plot(pred, label=f"{fname} - Predicted", linestyle="dashed")
ax.set_title("Rainfall Prediction Across Multiple Years")
ax.set_ylabel("Rainfall")
ax.set_xlabel("Days")
ax.legend(fontsize=7)
st.pyplot(fig)
st.success("Batch processing completed successfully!")
✅ What This Code Does
1. Batch processing
Reads every .xlsx file inside:
split-data/
2. Runs your Bi-LSTM model for each file
Automatically trains → tests → predicts.
3. Stores output
All results merged in:
all_predictions.xlsx
Format:
| File | Actual | Predicted |
|---|
4. Multi-year plot
Graphs:
-
Actual rainfall
-
Predicted rainfall
-
One color-pair per file (year)
๐ If you want improvements
I can add:
✔ Faster inference (load once, reuse model)
✔ Progress bar
✔ Auto-detect years from filename
✔ Combine plots per year
✔ Export graphs to PDF
Just tell me what you want.
No comments:
Post a Comment