from flask import *
import numpy as np # linear algebra
from sklearn.decomposition import PCA
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns # data visualization library
import matplotlib.pyplot as plt
np.random.seed(0)
data = pd.read_csv("/home/umesh/Desktop/icfoss/bcdata.csv")
col = data.columns
y = data.diagnosis # M or B
# Drop the last column, ID and diagnosis
df=data.drop(['Unnamed: 32','id'],axis=1)
x = df.drop('diagnosis',axis = 1 )
x.head()
ax = sns.countplot(y,label="Count")
B, M = y.value_counts()
print('Number of Benign: ',B)
print('Number of Malignant : ',M)
y_df= pd.get_dummies(y,drop_first=True)
y_df.head()
y_df=y_df['M']
prueba=pd.get_dummies(df,'diagnosis')
prueba.drop('diagnosis_B',axis=1)
f,ax = plt.subplots(figsize=(18, 18))
sns.heatmap(x.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax);
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
df_train, df_test = train_test_split(df, test_size = 0.3)
x_train=df_train.drop('diagnosis',axis=1)
x_test=df_test.drop('diagnosis',axis=1)
y_train=df_train['diagnosis']
y_test=df_test['diagnosis']
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
modelo_rl= LogisticRegression()
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 5)
x_train = x_train.T
x_test = x_test.T
y_train = y_train.T
y_test = y_test.T