ClaretPete001 wrote: ↑Sun Mar 19, 2023 11:01 am
Depends what he is doing. This is a simple object oriented AI: Machine learning linear regression model using Pandas, matplotlib, sklearn etc.
He can have it but I'd struggle to help time wise.
It's starting off with the libraries, then opening a CSV and database, designing the model, fitting the model and finally allowing the model to learn from the data.
To be honest there are loads of sites for Python just nick some code and get it working.
import pyodbc
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
import seaborn as sns
import sys
import statistics
class Querydata:
def Connect_to_database():
conn = pyodbc.connect('DRIVER={SQL Server Native Client 11.0};Trusted_Connection=Yes;SERVER=IR- _____________;DATABASE=_________;TrustServerCertificate=Yes')
cursor = conn.cursor()
cursor.execute('SELECT * FROM D_DETAILS')
Records = cursor.fetchall()
return Records
#def Importdata():
# dfAchievement_data = pd.read_csv(r'C:\Users\________\project_data\_________.csv')
# print(dfAchievement_data)
# return dfAchievement_data
class Linear_regression:
#constructor
# Calling main function
# if __name__ == "__main__":
# main
def __init__(self):
# Data extraction
dfAchievement_data = Querydata.Connect_to_database()
#Data pre-processing
self.Check_for_nulls(dfAchievement_data)
self.Generatevisuals(dfAchievement_data)
dfAchievement_data = self.outlier_removal(dfAchievement_data)
print('dfAchievement_data', dfAchievement_data)
self.ReGeneratevisuals(dfAchievement_data)
# Identifiying the x and y variables
x, y = self.creatingxandyvariables(dfAchievement_data)
self.CheckforXandy(x, y)
# Training the data
x_train, x_test, y_train, y_test = self.splitdataset(x, y)
print('print before create', x_train, y_test)
Fitted_model = self.Createmodel(x_train, y_train)
# Creating the model
self.Prediction(Fitted_model,x_test, y_test)
coefficients, intercept = self.Assessment(Fitted_model)
# Implementing the model
Predicted_writing_score = self.calculate_charges(72, coefficients, intercept)
print("calculate_charges", Predicted_writing_score)
#Triangulaing the model
self.check_calculated_score(x,y)
sys.exit()
def Check_for_nulls(dfAchievement_data):
pd.isna(dfAchievement_data)
rpisnull = pd.isnull(dfAchievement_data)
print("Are there any nulls", rpisnull)
pd.DataFrame.isna(dfAchievement_data)
def Generatevisuals(dfAchievement_data):
dfAchievement_data.plot(x="gender", y="math score", kind="bar")
sns.relplot(data=dfAchievement_data, x='reading score', y='writing score', hue='gender')
plt.show()
sns.pairplot(dfAchievement_data, hue='gender')
plt.show()
def outlier_removal(dfAchievement_data):
Reading_upper_limit = dfAchievement_data['reading score'].mean() + 3 * dfAchievement_data['reading score'].std()
Reading_lower_limit = dfAchievement_data['reading score'].mean() - 3 * dfAchievement_data['reading score'].std()
Writing_upper_limit = dfAchievement_data['writing score'].mean() + 3 * dfAchievement_data['writing score'].std()
Writing_lower_limit = dfAchievement_data['writing score'].mean() - 3 * dfAchievement_data['writing score'].std()
print("Reading upper limit: ", Reading_upper_limit)
print("Reading Lower Limit: ", Reading_lower_limit)
print("Writing upper limit: ", Writing_upper_limit)
print("Writing Lower Limit: ", Writing_lower_limit)
dfAchievement_data = dfAchievement_data[(dfAchievement_data['reading score'] > Reading_lower_limit) & (dfAchievement_data['reading score'] < Reading_upper_limit)]
dfAchievement_data = dfAchievement_data[(dfAchievement_data['writing score'] > Writing_lower_limit) & (dfAchievement_data['writing score'] < Writing_upper_limit)]
Reading_upper_limit = dfAchievement_data['reading score'].mean() + 3 * dfAchievement_data['reading score'].std()
Reading_lower_limit = dfAchievement_data['reading score'].mean() - 3 * dfAchievement_data['reading score'].std()
Writing_upper_limit = dfAchievement_data['writing score'].mean() + 3 * dfAchievement_data['writing score'].std()
Writing_lower_limit = dfAchievement_data['writing score'].mean() - 3 * dfAchievement_data['writing score'].std()
print("New Reading upper limit: ", Reading_upper_limit)
print("New Reading Lower Limit: ", Reading_lower_limit)
print("New Writing upper limit: ", Writing_upper_limit)
print("New Writing Lower Limit: ", Writing_lower_limit)
return dfAchievement_data
def ReGeneratevisuals(dfAchievement_data):
dfAchievement_data.plot(x="reading score", y="writing score", kind="bar")
# Instantiating a LinearRegression Model
sns.relplot(data=dfAchievement_data, x='reading score', y='writing score', hue='gender')
plt.show()
sns.pairplot(dfAchievement_data, hue='gender')
plt.show()
def creatingxandyvariables(dfAchievement_data):
x = dfAchievement_data['reading score']
y = dfAchievement_data['writing score']
print(x, y)
return x, y
def CheckforXandy(x, y):
print('Returned variable x', x)
print('Returned variable y', y)
def splitdataset(x, y):
x_train, x_test, y_train, y_test = train_test_split(
x, y, shuffle=True, train_size=0.3)
return x_train, x_test, y_train, y_test
def Createmodel(x_train, y_train):
print('in creatmodel func',x_train, y_train)
x_train = x_train.array.reshape(-1,1)
model = LinearRegression()
Fitted_model= model.fit(x_train, y_train)
return Fitted_model
def Prediction(model, x_test, y_test):
predictions = model.predict(x_test.array.reshape(-1,1))
r2 = r2_score(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)
print('The r2 is: ', r2)
print('The rmse is: ', rmse)
def Assessment(model):
coefficients = model.coef_
intercept = model.intercept_
return coefficients, intercept
def calculate_charges(Rdscore, coefficients, intercept):
print('Reading score', Rdscore)
return (Rdscore * coefficients) + intercept
def check_calculated_score(x,y):
print('Statistics mean x',statistics.mean(x))
print('Statistic mean y', statistics.mean(y))