Below is a single Python file that adds a Streamlit UI, TFLite conversion, and PDF report generation — all in one.
Save as rain_project_app.py. Run with streamlit run rain_project_app.py.
Short sentences. Simple language. Neutral tone.
Before running install needed packages once:
pip install pandas numpy matplotlib scikit-learn tensorflow statsmodels openpyxl streamlit reportlab fpdf
File: rain_project_app.py
"""
rain_project_app.py
Single-file Streamlit app for:
- Load Excel weather data
- Preprocess data
- Train ARIMA (baseline), LSTM, Bi-LSTM
- Plot predictions
- Convert Keras models to TFLite
- Generate a PDF report
Usage:
$ streamlit run rain_project_app.py
"""
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import os
import tempfile
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima.model import ARIMA
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from datetime import datetime
st.set_page_config(page_title="Rain Prediction Project", layout="wide")
# ---------------------------
# Helper functions
# ---------------------------
@st.cache_data
def load_excel_file(uploaded_file):
df = pd.read_excel(uploaded_file)
return df
def create_sequences(data_df, features, target, seq_len=30):
scaler = MinMaxScaler()
scaled = scaler.fit_transform(data_df[features + [target]])
df_scaled = pd.DataFrame(scaled, columns=features + [target])
X, y = [], []
for i in range(len(df_scaled) - seq_len):
X.append(df_scaled.iloc[i:i+seq_len, :-1].values)
y.append(df_scaled.iloc[i+seq_len, -1])
X = np.array(X)
y = np.array(y)
return X, y, scaler
def build_lstm(shape):
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=shape))
model.add(LSTM(32))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
return model
def build_bilstm(shape):
model = Sequential()
model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=shape))
model.add(Bidirectional(LSTM(32)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
return model
def train_keras(model, X_train, y_train, epochs=10, batch_size=32):
history = model.fit(X_train, y_train,
epochs=epochs,
batch_size=batch_size,
validation_split=0.1,
verbose=0)
return history
def plot_prediction(y_true, y_pred_lstm=None, y_pred_bilstm=None, show=True):
plt.figure(figsize=(10,4))
plt.plot(y_true, label="Actual", linewidth=1.5)
if y_pred_lstm is not None:
plt.plot(y_pred_lstm, label="LSTM", linewidth=1)
if y_pred_bilstm is not None:
plt.plot(y_pred_bilstm, label="Bi-LSTM", linewidth=1)
plt.legend()
plt.title("Rainfall: Actual vs Predicted (scaled)")
plt.tight_layout()
if show:
st.pyplot(plt)
# Save to buffer and return
buf = io.BytesIO()
plt.savefig(buf, format="png")
buf.seek(0)
plt.close()
return buf
def save_model_tflite(keras_model, save_path):
# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
tflite_model = converter.convert()
with open(save_path, 'wb') as f:
f.write(tflite_model)
return save_path
def generate_pdf_report(pdf_path, title, df_info, arima_summary, metrics_text, plot_image_bytes, notes=""):
c = canvas.Canvas(pdf_path, pagesize=A4)
width, height = A4
margin = 40
y = height - margin
# Title
c.setFont("Helvetica-Bold", 16)
c.drawString(margin, y, title)
y -= 30
# Timestamp
c.setFont("Helvetica", 9)
c.drawString(margin, y, f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
y -= 20
# Data info
c.setFont("Helvetica-Bold", 11)
c.drawString(margin, y, "Data summary:")
y -= 14
c.setFont("Helvetica", 9)
for line in df_info.splitlines():
c.drawString(margin, y, line)
y -= 12
if y < 120:
c.showPage()
y = height - margin
# ARIMA summary (short)
c.setFont("Helvetica-Bold", 11)
c.drawString(margin, y, "ARIMA summary (short):")
y -= 14
c.setFont("Helvetica", 9)
for line in arima_summary.splitlines():
c.drawString(margin, y, line)
y -= 10
if y < 120:
c.showPage()
y = height - margin
# Metrics
c.setFont("Helvetica-Bold", 11)
c.drawString(margin, y, "Model metrics:")
y -= 14
c.setFont("Helvetica", 9)
for line in metrics_text.splitlines():
c.drawString(margin, y, line)
y -= 10
if y < 180:
c.showPage()
y = height - margin
# Plot image
y -= 10
try:
img_x = margin
img_w = width - 2 * margin
img_h = img_w * 0.4
c.drawImage(plot_image_bytes, img_x, y - img_h, width=img_w, height=img_h)
except Exception as e:
c.setFont("Helvetica", 9)
c.drawString(margin, y, f"Could not include plot image: {e}")
y -= (img_h + 20)
# Notes
if notes:
c.setFont("Helvetica-Bold", 11)
c.drawString(margin, y, "Notes / Next steps:")
y -= 14
c.setFont("Helvetica", 9)
for line in notes.splitlines():
c.drawString(margin, y, line)
y -= 10
if y < 120:
c.showPage()
y = height - margin
c.save()
# ---------------------------
# Streamlit UI
# ---------------------------
st.title("Real-Time Rainfall Prediction — Project App")
st.write("Single file app. Load data, train models, convert to TFLite, and make a PDF report.")
# Sidebar for file and settings
st.sidebar.header("1. Data and settings")
uploaded = st.sidebar.file_uploader("Upload Excel file (weather.xlsx)", type=["xlsx", "xls"])
seq_len = st.sidebar.number_input("Sequence length (days)", min_value=5, max_value=365, value=30)
test_ratio = st.sidebar.slider("Test ratio", 0.05, 0.5, 0.2)
epochs = st.sidebar.number_input("Epochs for Keras models", min_value=1, max_value=200, value=10)
batch_size = st.sidebar.number_input("Batch size", min_value=1, max_value=256, value=32)
# Placeholders for models and results in session state
if "df" not in st.session_state:
st.session_state.df = None
if "X_train" not in st.session_state:
st.session_state.X_train = None
if "X_test" not in st.session_state:
st.session_state.X_test = None
if "y_train" not in st.session_state:
st.session_state.y_train = None
if "y_test" not in st.session_state:
st.session_state.y_test = None
if "scaler" not in st.session_state:
st.session_state.scaler = None
if "model_lstm" not in st.session_state:
st.session_state.model_lstm = None
if "model_bilstm" not in st.session_state:
st.session_state.model_bilstm = None
if "arima_summary" not in st.session_state:
st.session_state.arima_summary = ""
if "y_pred_lstm" not in st.session_state:
st.session_state.y_pred_lstm = None
if "y_pred_bilstm" not in st.session_state:
st.session_state.y_pred_bilstm = None
# Step 1: Load data
st.subheader("1. Load and view data")
if uploaded:
try:
df = load_excel_file(uploaded)
st.session_state.df = df
st.write("First rows of data:")
st.dataframe(df.head())
st.write("Columns detected:", list(df.columns))
except Exception as e:
st.error(f"Error reading file: {e}")
else:
st.info("Upload your Excel file. Columns must include: RAIN, RHMX, RHMN, WS, TMAX, TMIN, SSH")
# Step 2: Preprocess
st.subheader("2. Preprocess data")
if st.button("Run Preprocessing"):
if st.session_state.df is None:
st.error("Load data first.")
else:
df = st.session_state.df.copy()
required = ["RAIN", "RHMX", "RHMN", "WS", "TMAX", "TMIN", "SSH"]
missing = [c for c in required if c not in df.columns]
if missing:
st.error(f"Missing columns: {missing}")
else:
features = ["RHMX", "RHMN", "WS", "TMAX", "TMIN", "SSH"]
target = "RAIN"
X, y, scaler = create_sequences(df, features, target, seq_len=seq_len)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, shuffle=False)
st.session_state.X_train, st.session_state.X_test = X_train, X_test
st.session_state.y_train, st.session_state.y_test = y_train, y_test
st.session_state.scaler = scaler
st.success(f"Preprocessing done. Train shape {X_train.shape}, Test shape {X_test.shape}")
# Step 3: ARIMA baseline
st.subheader("3. ARIMA baseline")
if st.button("Run ARIMA (univariate RAIN)"):
if st.session_state.df is None:
st.error("Load data first.")
else:
try:
rain = st.session_state.df["RAIN"].dropna().values
arima_model = ARIMA(rain, order=(2,1,2))
arima_fit = arima_model.fit()
summary_text = arima_fit.summary().as_text().splitlines()
# keep short summary (first 12 lines)
short_summary = "\n".join(summary_text[:12])
st.session_state.arima_summary = short_summary
st.text("ARIMA summary (short):")
st.text(short_summary)
forecast = arima_fit.forecast(steps=10)
st.write("10-step forecast (raw scale):")
st.write(np.round(forecast,4))
except Exception as e:
st.error(f"ARIMA failed: {e}")
# Step 4: Train LSTM
st.subheader("4. Train LSTM and Bi-LSTM")
col1, col2 = st.columns(2)
with col1:
if st.button("Train LSTM"):
if st.session_state.X_train is None:
st.error("Run preprocessing first.")
else:
X_train = st.session_state.X_train
y_train = st.session_state.y_train
shape = (X_train.shape[1], X_train.shape[2])
model_lstm = build_lstm(shape)
with st.spinner("Training LSTM ..."):
train_keras(model_lstm, X_train, y_train, epochs=epochs, batch_size=batch_size)
st.session_state.model_lstm = model_lstm
st.success("LSTM trained.")
with col2:
if st.button("Train Bi-LSTM"):
if st.session_state.X_train is None:
st.error("Run preprocessing first.")
else:
X_train = st.session_state.X_train
y_train = st.session_state.y_train
shape = (X_train.shape[1], X_train.shape[2])
model_bilstm = build_bilstm(shape)
with st.spinner("Training Bi-LSTM ..."):
train_keras(model_bilstm, X_train, y_train, epochs=epochs, batch_size=batch_size)
st.session_state.model_bilstm = model_bilstm
st.success("Bi-LSTM trained.")
# Step 5: Predict
st.subheader("5. Predict on test set")
if st.button("Run Predictions"):
if st.session_state.X_test is None:
st.error("Run preprocessing first.")
else:
X_test = st.session_state.X_test
y_test = st.session_state.y_test
if st.session_state.model_lstm is not None:
y_pred_lstm = st.session_state.model_lstm.predict(X_test)
st.session_state.y_pred_lstm = y_pred_lstm
st.write("LSTM predictions done.")
else:
st.info("LSTM not trained yet.")
if st.session_state.model_bilstm is not None:
y_pred_bilstm = st.session_state.model_bilstm.predict(X_test)
st.session_state.y_pred_bilstm = y_pred_bilstm
st.write("Bi-LSTM predictions done.")
else:
st.info("Bi-LSTM not trained yet.")
# show plot (scaled)
plot_buf = plot_prediction(y_test,
st.session_state.y_pred_lstm,
st.session_state.y_pred_bilstm,
show=True)
# Save plot in session for PDF
st.session_state.plot_image = plot_buf
# Step 6: Convert to TFLite
st.subheader("6. Convert Keras models to TFLite")
if st.session_state.model_lstm is None and st.session_state.model_bilstm is None:
st.write("Train models first to enable TFLite conversion.")
else:
col3, col4 = st.columns(2)
with col3:
if st.button("Convert LSTM to TFLite"):
if st.session_state.model_lstm is None:
st.error("Train LSTM first.")
else:
tmp = tempfile.gettempdir()
tflite_path = os.path.join(tmp, "lstm_model.tflite")
with st.spinner("Converting LSTM to TFLite ..."):
save_model_tflite(st.session_state.model_lstm, tflite_path)
st.success(f"LSTM TFLite saved: {tflite_path}")
with open(tflite_path, "rb") as f:
st.download_button("Download LSTM .tflite", f, file_name="lstm_model.tflite")
with col4:
if st.button("Convert Bi-LSTM to TFLite"):
if st.session_state.model_bilstm is None:
st.error("Train Bi-LSTM first.")
else:
tmp = tempfile.gettempdir()
tflite_path = os.path.join(tmp, "bilstm_model.tflite")
with st.spinner("Converting Bi-LSTM to TFLite ..."):
save_model_tflite(st.session_state.model_bilstm, tflite_path)
st.success(f"Bi-LSTM TFLite saved: {tflite_path}")
with open(tflite_path, "rb") as f:
st.download_button("Download Bi-LSTM .tflite", f, file_name="bilstm_model.tflite")
# Step 7: Generate PDF report
st.subheader("7. Generate PDF report")
notes = st.text_area("Optional notes for PDF", value="Include model info and next steps here.", height=80)
if st.button("Make PDF Report"):
if st.session_state.df is None:
st.error("Load data first.")
else:
df = st.session_state.df
df_info = f"Rows: {df.shape[0]}, Columns: {df.shape[1]}"
df_info += "\nColumns: " + ", ".join(list(df.columns))
arima_summary = st.session_state.arima_summary or "ARIMA not run."
# Metrics: compute simple MSE if predictions exist
metrics_text = ""
if st.session_state.y_pred_lstm is not None:
mse_l = np.mean((st.session_state.y_pred_lstm.flatten() - st.session_state.y_test.flatten())**2)
metrics_text += f"LSTM MSE (scaled): {mse_l:.6f}\n"
if st.session_state.y_pred_bilstm is not None:
mse_b = np.mean((st.session_state.y_pred_bilstm.flatten() - st.session_state.y_test.flatten())**2)
metrics_text += f"Bi-LSTM MSE (scaled): {mse_b:.6f}\n"
if metrics_text == "":
metrics_text = "No predictions available to compute metrics."
# Plot image bytes
plot_buf = st.session_state.plot_image if "plot_image" in st.session_state else None
if plot_buf is None:
st.info("Run predictions to include a plot in the PDF. A PDF without plot will be created.")
tmp = tempfile.gettempdir()
pdf_path = os.path.join(tmp, "rain_project_report.pdf")
# If we have a plot buffer, save as image file for reportlab
img_path = None
if plot_buf is not None:
try:
img_path = os.path.join(tmp, "prediction_plot.png")
with open(img_path, "wb") as f:
f.write(plot_buf.getbuffer())
except Exception as e:
st.error(f"Could not save plot image: {e}")
img_path = None
# Create short byte-like object accepted by drawImage
plot_image_for_pdf = img_path if img_path else None
# Make PDF
try:
generate_pdf_report(pdf_path,
title="Rainfall Prediction Project Report",
df_info=df_info,
arima_summary=arima_summary,
metrics_text=metrics_text,
plot_image_bytes=plot_image_for_pdf,
notes=notes)
st.success(f"PDF report created: {pdf_path}")
with open(pdf_path, "rb") as f:
st.download_button("Download PDF report", f, file_name="rain_project_report.pdf")
except Exception as e:
st.error(f"Failed to create PDF: {e}")
# Footer
st.markdown("---")
st.write("Tips:")
st.write("- Train models with care. Use small epochs for testing.")
st.write("- Sequence length affects results. Use domain knowledge.")
st.write("- TFLite files run on mobile or Raspberry Pi.")
Notes & guidance
-
The app uses scaled targets. PDF and plot show scaled values.
-
For real-world use, inverse-scale predictions to original units before final reporting. Use the stored
scaler. -
TFLite conversion is direct. If conversion fails for large/custom ops, consider saving a simpler model or using
representative_datasetfor quantization. -
ARIMA runs on raw RAIN column. It is univariate.
-
PDF uses ReportLab. The plot is embedded if predictions exist.
If you want I can:
-
Add inverse-scaling code and show original-unit metrics.
-
Add example dataset and a demo run.
-
Simplify PDF layout or add additional charts.
Which one next?
No comments:
Post a Comment