Linear regression with sklearn
In this example we use some generated data to give a basic feeling.
In the first Jupiter Notebook file we see how we can train a model:
- examples/ml/basic_linear_regression.ipynb
Then we have two files, one using Jupyter notebook, one a plain Python file demonstrating how we can use the model.
- examples/ml/use_basic_linear_expression.ipynb
from joblib import load
import sys
if len(sys.argv) < 2:
exit(f"Usage: {sys.argv[0]} Xes")
input_values = []
for val in sys.argv[1:]:
input_values.append([float(val)])
model = load('linear.joblib')
print(model.predict(input_values))
examples/ml/basic_linear_regression.py
#get_ipython().system('pip install numpy pandas scikit-learn matplotlib joblib')
import sys
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from joblib import dump
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
def generate_data_with_noise(size, noise_level):
x = np.arange(size)
noise = noise_level * (np.random.rand(size)-0.5)
y = x + noise
df = pd.DataFrame(data=[x, y]).T
df = pd.DataFrame({"x":x, "y":y})
return df
def main():
if len(sys.argv) != 3:
exit(f"Usage: {sys.argv[0]} SIZE NOISE")
size, noise = int(sys.argv[1]), int(sys.argv[2])
np.random.seed(42)
df = generate_data_with_noise(size, noise)
#df.plot()
#df.plot.scatter(x='x', y='y', c='Blue');
X = df[["x"]]
#print(X)
y = df["y"]
print(y.head(3))
#plt.scatter(X["x"], y, s=20);
#plt.plot([0, size], [0, size], color="red");
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=4)
print(len(y_train), len(y_test))
model = LinearRegression()
model.fit(x_train, y_train)
print(f"intercept: {model.intercept_} coef: {model.coef_}")
print('train coefficient of determination:', model.score(x_train, y_train))
print('test coefficient of determination:', model.score(x_test, y_test))
print('coefficient of determination:', model.score(X, y))
x1, x2 = min(df["x"]), max(df["x"]) # 0, size-1
y1, y2 = model.predict(pd.DataFrame({'x': [x1, x2]}))
plt.plot([x1, x2], [y1, y2], color="red");
plt.scatter(df["x"], df["y"]);
plt.show()
#dump(model, 'linear.joblib')
main()