project2025: #2 Project Code in Python

Below is a single Python file that adds a Streamlit UI, TFLite conversion, and PDF report generation — all in one.

Save as rain_project_app.py. Run with streamlit run rain_project_app.py.

Short sentences. Simple language. Neutral tone.

Before running install needed packages once:

pip install pandas numpy matplotlib scikit-learn tensorflow statsmodels openpyxl streamlit reportlab fpdf

File: rain_project_app.py

"""
rain_project_app.py

Single-file Streamlit app for:
- Load Excel weather data
- Preprocess data
- Train ARIMA (baseline), LSTM, Bi-LSTM
- Plot predictions
- Convert Keras models to TFLite
- Generate a PDF report

Usage:
$ streamlit run rain_project_app.py
"""

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import os
import tempfile
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima.model import ARIMA
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from datetime import datetime

st.set_page_config(page_title="Rain Prediction Project", layout="wide")

# ---------------------------
# Helper functions
# ---------------------------

@st.cache_data
def load_excel_file(uploaded_file):
    df = pd.read_excel(uploaded_file)
    return df

def create_sequences(data_df, features, target, seq_len=30):
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(data_df[features + [target]])
    df_scaled = pd.DataFrame(scaled, columns=features + [target])

    X, y = [], []
    for i in range(len(df_scaled) - seq_len):
        X.append(df_scaled.iloc[i:i+seq_len, :-1].values)
        y.append(df_scaled.iloc[i+seq_len, -1])
    X = np.array(X)
    y = np.array(y)
    return X, y, scaler

def build_lstm(shape):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=shape))
    model.add(LSTM(32))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

def build_bilstm(shape):
    model = Sequential()
    model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=shape))
    model.add(Bidirectional(LSTM(32)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

def train_keras(model, X_train, y_train, epochs=10, batch_size=32):
    history = model.fit(X_train, y_train,
                        epochs=epochs,
                        batch_size=batch_size,
                        validation_split=0.1,
                        verbose=0)
    return history

def plot_prediction(y_true, y_pred_lstm=None, y_pred_bilstm=None, show=True):
    plt.figure(figsize=(10,4))
    plt.plot(y_true, label="Actual", linewidth=1.5)
    if y_pred_lstm is not None:
        plt.plot(y_pred_lstm, label="LSTM", linewidth=1)
    if y_pred_bilstm is not None:
        plt.plot(y_pred_bilstm, label="Bi-LSTM", linewidth=1)
    plt.legend()
    plt.title("Rainfall: Actual vs Predicted (scaled)")
    plt.tight_layout()
    if show:
        st.pyplot(plt)
    # Save to buffer and return
    buf = io.BytesIO()
    plt.savefig(buf, format="png")
    buf.seek(0)
    plt.close()
    return buf

def save_model_tflite(keras_model, save_path):
    # Convert to TFLite
    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
    tflite_model = converter.convert()
    with open(save_path, 'wb') as f:
        f.write(tflite_model)
    return save_path

def generate_pdf_report(pdf_path, title, df_info, arima_summary, metrics_text, plot_image_bytes, notes=""):
    c = canvas.Canvas(pdf_path, pagesize=A4)
    width, height = A4
    margin = 40
    y = height - margin

    # Title
    c.setFont("Helvetica-Bold", 16)
    c.drawString(margin, y, title)
    y -= 30

    # Timestamp
    c.setFont("Helvetica", 9)
    c.drawString(margin, y, f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    y -= 20

    # Data info
    c.setFont("Helvetica-Bold", 11)
    c.drawString(margin, y, "Data summary:")
    y -= 14
    c.setFont("Helvetica", 9)
    for line in df_info.splitlines():
        c.drawString(margin, y, line)
        y -= 12
        if y < 120:
            c.showPage()
            y = height - margin

    # ARIMA summary (short)
    c.setFont("Helvetica-Bold", 11)
    c.drawString(margin, y, "ARIMA summary (short):")
    y -= 14
    c.setFont("Helvetica", 9)
    for line in arima_summary.splitlines():
        c.drawString(margin, y, line)
        y -= 10
        if y < 120:
            c.showPage()
            y = height - margin

    # Metrics
    c.setFont("Helvetica-Bold", 11)
    c.drawString(margin, y, "Model metrics:")
    y -= 14
    c.setFont("Helvetica", 9)
    for line in metrics_text.splitlines():
        c.drawString(margin, y, line)
        y -= 10
        if y < 180:
            c.showPage()
            y = height - margin

    # Plot image
    y -= 10
    try:
        img_x = margin
        img_w = width - 2 * margin
        img_h = img_w * 0.4
        c.drawImage(plot_image_bytes, img_x, y - img_h, width=img_w, height=img_h)
    except Exception as e:
        c.setFont("Helvetica", 9)
        c.drawString(margin, y, f"Could not include plot image: {e}")

    y -= (img_h + 20)

    # Notes
    if notes:
        c.setFont("Helvetica-Bold", 11)
        c.drawString(margin, y, "Notes / Next steps:")
        y -= 14
        c.setFont("Helvetica", 9)
        for line in notes.splitlines():
            c.drawString(margin, y, line)
            y -= 10
            if y < 120:
                c.showPage()
                y = height - margin

    c.save()

# ---------------------------
# Streamlit UI
# ---------------------------

st.title("Real-Time Rainfall Prediction — Project App")
st.write("Single file app. Load data, train models, convert to TFLite, and make a PDF report.")

# Sidebar for file and settings
st.sidebar.header("1. Data and settings")
uploaded = st.sidebar.file_uploader("Upload Excel file (weather.xlsx)", type=["xlsx", "xls"])
seq_len = st.sidebar.number_input("Sequence length (days)", min_value=5, max_value=365, value=30)
test_ratio = st.sidebar.slider("Test ratio", 0.05, 0.5, 0.2)
epochs = st.sidebar.number_input("Epochs for Keras models", min_value=1, max_value=200, value=10)
batch_size = st.sidebar.number_input("Batch size", min_value=1, max_value=256, value=32)

# Placeholders for models and results in session state
if "df" not in st.session_state:
    st.session_state.df = None
if "X_train" not in st.session_state:
    st.session_state.X_train = None
if "X_test" not in st.session_state:
    st.session_state.X_test = None
if "y_train" not in st.session_state:
    st.session_state.y_train = None
if "y_test" not in st.session_state:
    st.session_state.y_test = None
if "scaler" not in st.session_state:
    st.session_state.scaler = None
if "model_lstm" not in st.session_state:
    st.session_state.model_lstm = None
if "model_bilstm" not in st.session_state:
    st.session_state.model_bilstm = None
if "arima_summary" not in st.session_state:
    st.session_state.arima_summary = ""
if "y_pred_lstm" not in st.session_state:
    st.session_state.y_pred_lstm = None
if "y_pred_bilstm" not in st.session_state:
    st.session_state.y_pred_bilstm = None

# Step 1: Load data
st.subheader("1. Load and view data")
if uploaded:
    try:
        df = load_excel_file(uploaded)
        st.session_state.df = df
        st.write("First rows of data:")
        st.dataframe(df.head())
        st.write("Columns detected:", list(df.columns))
    except Exception as e:
        st.error(f"Error reading file: {e}")
else:
    st.info("Upload your Excel file. Columns must include: RAIN, RHMX, RHMN, WS, TMAX, TMIN, SSH")

# Step 2: Preprocess
st.subheader("2. Preprocess data")
if st.button("Run Preprocessing"):
    if st.session_state.df is None:
        st.error("Load data first.")
    else:
        df = st.session_state.df.copy()
        required = ["RAIN", "RHMX", "RHMN", "WS", "TMAX", "TMIN", "SSH"]
        missing = [c for c in required if c not in df.columns]
        if missing:
            st.error(f"Missing columns: {missing}")
        else:
            features = ["RHMX", "RHMN", "WS", "TMAX", "TMIN", "SSH"]
            target = "RAIN"
            X, y, scaler = create_sequences(df, features, target, seq_len=seq_len)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, shuffle=False)
            st.session_state.X_train, st.session_state.X_test = X_train, X_test
            st.session_state.y_train, st.session_state.y_test = y_train, y_test
            st.session_state.scaler = scaler
            st.success(f"Preprocessing done. Train shape {X_train.shape}, Test shape {X_test.shape}")

# Step 3: ARIMA baseline
st.subheader("3. ARIMA baseline")
if st.button("Run ARIMA (univariate RAIN)"):
    if st.session_state.df is None:
        st.error("Load data first.")
    else:
        try:
            rain = st.session_state.df["RAIN"].dropna().values
            arima_model = ARIMA(rain, order=(2,1,2))
            arima_fit = arima_model.fit()
            summary_text = arima_fit.summary().as_text().splitlines()
            # keep short summary (first 12 lines)
            short_summary = "\n".join(summary_text[:12])
            st.session_state.arima_summary = short_summary
            st.text("ARIMA summary (short):")
            st.text(short_summary)
            forecast = arima_fit.forecast(steps=10)
            st.write("10-step forecast (raw scale):")
            st.write(np.round(forecast,4))
        except Exception as e:
            st.error(f"ARIMA failed: {e}")

# Step 4: Train LSTM
st.subheader("4. Train LSTM and Bi-LSTM")
col1, col2 = st.columns(2)
with col1:
    if st.button("Train LSTM"):
        if st.session_state.X_train is None:
            st.error("Run preprocessing first.")
        else:
            X_train = st.session_state.X_train
            y_train = st.session_state.y_train
            shape = (X_train.shape[1], X_train.shape[2])
            model_lstm = build_lstm(shape)
            with st.spinner("Training LSTM ..."):
                train_keras(model_lstm, X_train, y_train, epochs=epochs, batch_size=batch_size)
            st.session_state.model_lstm = model_lstm
            st.success("LSTM trained.")
with col2:
    if st.button("Train Bi-LSTM"):
        if st.session_state.X_train is None:
            st.error("Run preprocessing first.")
        else:
            X_train = st.session_state.X_train
            y_train = st.session_state.y_train
            shape = (X_train.shape[1], X_train.shape[2])
            model_bilstm = build_bilstm(shape)
            with st.spinner("Training Bi-LSTM ..."):
                train_keras(model_bilstm, X_train, y_train, epochs=epochs, batch_size=batch_size)
            st.session_state.model_bilstm = model_bilstm
            st.success("Bi-LSTM trained.")

# Step 5: Predict
st.subheader("5. Predict on test set")
if st.button("Run Predictions"):
    if st.session_state.X_test is None:
        st.error("Run preprocessing first.")
    else:
        X_test = st.session_state.X_test
        y_test = st.session_state.y_test
        if st.session_state.model_lstm is not None:
            y_pred_lstm = st.session_state.model_lstm.predict(X_test)
            st.session_state.y_pred_lstm = y_pred_lstm
            st.write("LSTM predictions done.")
        else:
            st.info("LSTM not trained yet.")
        if st.session_state.model_bilstm is not None:
            y_pred_bilstm = st.session_state.model_bilstm.predict(X_test)
            st.session_state.y_pred_bilstm = y_pred_bilstm
            st.write("Bi-LSTM predictions done.")
        else:
            st.info("Bi-LSTM not trained yet.")
        # show plot (scaled)
        plot_buf = plot_prediction(y_test,
                                   st.session_state.y_pred_lstm,
                                   st.session_state.y_pred_bilstm,
                                   show=True)
        # Save plot in session for PDF
        st.session_state.plot_image = plot_buf

# Step 6: Convert to TFLite
st.subheader("6. Convert Keras models to TFLite")
if st.session_state.model_lstm is None and st.session_state.model_bilstm is None:
    st.write("Train models first to enable TFLite conversion.")
else:
    col3, col4 = st.columns(2)
    with col3:
        if st.button("Convert LSTM to TFLite"):
            if st.session_state.model_lstm is None:
                st.error("Train LSTM first.")
            else:
                tmp = tempfile.gettempdir()
                tflite_path = os.path.join(tmp, "lstm_model.tflite")
                with st.spinner("Converting LSTM to TFLite ..."):
                    save_model_tflite(st.session_state.model_lstm, tflite_path)
                st.success(f"LSTM TFLite saved: {tflite_path}")
                with open(tflite_path, "rb") as f:
                    st.download_button("Download LSTM .tflite", f, file_name="lstm_model.tflite")
    with col4:
        if st.button("Convert Bi-LSTM to TFLite"):
            if st.session_state.model_bilstm is None:
                st.error("Train Bi-LSTM first.")
            else:
                tmp = tempfile.gettempdir()
                tflite_path = os.path.join(tmp, "bilstm_model.tflite")
                with st.spinner("Converting Bi-LSTM to TFLite ..."):
                    save_model_tflite(st.session_state.model_bilstm, tflite_path)
                st.success(f"Bi-LSTM TFLite saved: {tflite_path}")
                with open(tflite_path, "rb") as f:
                    st.download_button("Download Bi-LSTM .tflite", f, file_name="bilstm_model.tflite")

# Step 7: Generate PDF report
st.subheader("7. Generate PDF report")
notes = st.text_area("Optional notes for PDF", value="Include model info and next steps here.", height=80)
if st.button("Make PDF Report"):
    if st.session_state.df is None:
        st.error("Load data first.")
    else:
        df = st.session_state.df
        df_info = f"Rows: {df.shape[0]}, Columns: {df.shape[1]}"
        df_info += "\nColumns: " + ", ".join(list(df.columns))
        arima_summary = st.session_state.arima_summary or "ARIMA not run."
        # Metrics: compute simple MSE if predictions exist
        metrics_text = ""
        if st.session_state.y_pred_lstm is not None:
            mse_l = np.mean((st.session_state.y_pred_lstm.flatten() - st.session_state.y_test.flatten())**2)
            metrics_text += f"LSTM MSE (scaled): {mse_l:.6f}\n"
        if st.session_state.y_pred_bilstm is not None:
            mse_b = np.mean((st.session_state.y_pred_bilstm.flatten() - st.session_state.y_test.flatten())**2)
            metrics_text += f"Bi-LSTM MSE (scaled): {mse_b:.6f}\n"
        if metrics_text == "":
            metrics_text = "No predictions available to compute metrics."

        # Plot image bytes
        plot_buf = st.session_state.plot_image if "plot_image" in st.session_state else None
        if plot_buf is None:
            st.info("Run predictions to include a plot in the PDF. A PDF without plot will be created.")
        tmp = tempfile.gettempdir()
        pdf_path = os.path.join(tmp, "rain_project_report.pdf")

        # If we have a plot buffer, save as image file for reportlab
        img_path = None
        if plot_buf is not None:
            try:
                img_path = os.path.join(tmp, "prediction_plot.png")
                with open(img_path, "wb") as f:
                    f.write(plot_buf.getbuffer())
            except Exception as e:
                st.error(f"Could not save plot image: {e}")
                img_path = None

        # Create short byte-like object accepted by drawImage
        plot_image_for_pdf = img_path if img_path else None

        # Make PDF
        try:
            generate_pdf_report(pdf_path,
                                title="Rainfall Prediction Project Report",
                                df_info=df_info,
                                arima_summary=arima_summary,
                                metrics_text=metrics_text,
                                plot_image_bytes=plot_image_for_pdf,
                                notes=notes)
            st.success(f"PDF report created: {pdf_path}")
            with open(pdf_path, "rb") as f:
                st.download_button("Download PDF report", f, file_name="rain_project_report.pdf")
        except Exception as e:
            st.error(f"Failed to create PDF: {e}")

# Footer
st.markdown("---")
st.write("Tips:")
st.write("- Train models with care. Use small epochs for testing.")
st.write("- Sequence length affects results. Use domain knowledge.")
st.write("- TFLite files run on mobile or Raspberry Pi.")

Notes & guidance

The app uses scaled targets. PDF and plot show scaled values.
For real-world use, inverse-scale predictions to original units before final reporting. Use the stored scaler.
TFLite conversion is direct. If conversion fails for large/custom ops, consider saving a simpler model or using representative_dataset for quantization.
ARIMA runs on raw RAIN column. It is univariate.
PDF uses ReportLab. The plot is embedded if predictions exist.

If you want I can:

Add inverse-scaling code and show original-unit metrics.
Add example dataset and a demo run.
Simplify PDF layout or add additional charts.

Which one next?

project2025

Wednesday, December 3, 2025

#2 Project Code in Python

Notes & guidance

No comments:

Post a Comment

#8 Inferences and Agri Action oriented Decisions

Report Abuse