import sqlite3
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import TensorBoard
from imblearn.over_sampling import SMOTE
def preprocess_data(df):
# Gestione dei valori mancanti
df.fillna(method='ffill', inplace=True)
# Conversione delle date in timestamp
df['SCADENZA'] = pd.to_datetime(df['SCADENZA']).astype(int) / 10**9
df['DATA_INVIO_FILE'] = pd.to_datetime(df['DATA_INVIO_FILE']).astype(int) / 10**9
# Separazione delle features e target
features = df.drop(columns=['TARGET'])
target = df['TARGET']
return features, target
def build_preprocessor(numerical_features, categorical_features):
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), numerical_features),
('cat', OneHotEncoder(), categorical_features)
])
return preprocessor
def build_model(input_shape):
model = Sequential([
Dense(64, activation='relu'),
Dropout(0.3),
Dense(32, activation='relu'),
Dropout(0.3),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
def ML(mode):
conn = sqlite3.connect('../MachineLearning.db')
query = "SELECT * FROM Dati" # Assuming TARGET is also in this query
df = pd.read_sql_query(query, conn)
conn.close()
numerical_features = ['PERC', 'NETTOSPLIT', 'IMPORTO1', 'IMPORTO2', 'SCADENZA', 'DATA_INVIO_FILE']
categorical_features = ['PORTAF', 'TIPO_CR', 'LAG', 'TIPO_DOC', 'TIPO_FF', 'FACTOR']
features, target = preprocess_data(df)
preprocessor = build_preprocessor(numerical_features, categorical_features)
X = preprocessor.fit_transform(features)
y = target.values
# Bilanciamento del dataset
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)
# Training e validation split
X_train, X_val, y_train, y_val = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
if mode == 'train':
model = build_model(X_train.shape[1])
log_dir = './logs'
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val), callbacks=[tensorboard_callback])
model.save('data/trained_model.h5')
print("Addestramento completato. Il modello è stato salvato in 'trained_model.h5'.")
print(f"TensorBoard logs salvati in: {log_dir}")
elif mode == 'predict':
model = tf.keras.models.load_model('data/trained_model.h5')
X_predict = preprocessor.transform(features) # Assuming predict_df corresponds to some portion of features
predictions = model.predict(X_predict)
df['PREDICTION'] = (predictions > 0.5).astype(int)
df.to_csv('risultati_previsione.csv', index=False)
print("Predizione completata. I risultati sono stati salvati in 'risultati_previsione.csv'.")
# Usage example: ML('train') or ML('predict')