Here is my code and I am loading the csv files from the drive
# -*- coding: utf-8 -*-
"""Titanic_Linear_Regression_Model.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1MMY9m7QGpqWVj-zyv2oaIJQZ2V__7AGX
"""
pip install -q sklearn
# Commented out IPython magic to ensure Python compatibility.
# %tensorflow_version 2.x
"""**Predicting Survivals of Titanic using linear regression model**
```
# This is formatted as code
```
Predicting Survivals of Titanic using linear regression model.
Load the data the titanic.csv data, perform analysis to gain better understaning of the data
Build the model, train and evaluate it
# New Section
"""
# Commented out IPython magic to ensure Python compatibility.
#import all necessary libraries
# %tensorflow_version 2.x
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib
import tensorflow as tf
#Install PyDrive
!pip install -U -q PyDrive
#import necessary modules for the PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
#authenticate and create a PyDrive client
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
#gender_submission_path = https://drive.google.com/file/d/1e0ZLmv8G-kVDJgeeb1b1REWJG0yNT3jc/view?...
#testing_data_path = https://drive.google.com/file/d/1SCaUViZG8qt1q5K5_cdBkFCM4VN8H_5z/view?...
#training_data_path= https://drive.google.com/file/d/1lLrhVKTGuQiJI5kTQkKF6if_r5B5S9HN/...
fileDownloaded = drive.CreateFile({"id":"1e0ZLmv8G-kVDJgeeb1b1REWJG0yNT3jc"})
fileDownloaded = drive.CreateFile({"id":"1SCaUViZG8qt1q5K5_cdBkFCM4VN8H_5z"})
fileDownloaded = drive.CreateFile({"id":"1lLrhVKTGuQiJI5kTQkKF6if_r5B5S9HN"})
#Loading the files
fileDownloaded.GetContentFile("gender_submission.csv")
fileDownloaded.GetContentFile("training_data.csv")
fileDownloaded.GetContentFile("testing_data.csv")
#Using proper encoding to read the csv files
df_train = pd.read_csv("training_data.csv", encoding='utf-8', quotechar='"', delimiter=',') #training data
df_test = pd.read_csv("testing_data.csv", encoding='utf-8', quotechar='"', delimiter=',') #test data
df_gender = pd.read_csv("gender_submission.csv", encoding='utf-8', quotechar='"', delimiter=',') #gender_submission dataset
#since our data contains NaN, lets convert them into empty strings
df_train.fillna('', inplace=True)
df_test.fillna('', inplace=True)
#removes Survival column from the train dataset and save it as y_train
y_train = df_train.pop('Survived')
#create a new column of train survival naming "y_train"
print(y_train.head())
#removes the Survived column from the test dataset as save as y_test
y_test = df_gender.pop('Survived')
print(y_test.head())
df_train.head() #prints overhead of the training datasets
"""**Analysis on the train dataset**"""
print(df_train.describe()) #print train dataset stats
print(df_train.shape) #prints the shape of the train data
#Generate histogram data for ages
#df_train.Age.hist(bins = 20)
#count sex to generate the graph of number of females and males
df_train['Sex'].value_counts().plot(kind = 'bar')
#generate a pie graph for the Pclass
df_train['Pclass'].value_counts().plot(kind = "pie")
#concatenate the train and y_train datasets and genergate a graph grouped by sex
pd.concat([df_train, y_train], axis = 1).groupby('Sex').Survived.mean().plot(kind = "bar").set_xlabel("% Survival")
#concatenate the train and y_train datasets and genergate a graph grouped by Age
pd.concat([df_train, y_train], axis = 1).groupby('Pclass').Survived.mean().plot(kind = "bar").set_xlabel("% Survival per class")
"""The data shows that the majority of the passengers were aged between 15 to 40 years and majority of the passengers were males. The data shows that most of the passengers were based in the passenger class 3. Females have much higher chance of survival than males, 70% of females survived compared to 20% of males. The data suggest that Passengers who were in class 1 had a higher chance of survival than those who were in class 2 and 3
**Create feature columns that will be used to feed the model**
"""
#feature columns that will be used to feed the model
categorical_columns = ["Pclass", "Name", "Sex", "Ticket", "Cabin", "Embarked", "SibSp"]
numerical_columns = ["Age", "Fare"]
feature_columns = []
#gets a list of unique values
for feature_name in categorical_columns:
vocabulary = df_train[feature_name].unique()
feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(
feature_name, vocabulary, dtype=None, default_value=-1, num_oov_buckets=0
))
print(feature_columns)
"""**Training the model using feature categorical_columns**
Create an input function that will convert the data into tf.data.Dataset
"""
#Create an input function that will convert the data into tf.data.Dataset
def make_input_fn(data_df, label_df, num_epochs = 10, shuffle = True, batch_size = 32):
#input fn to be returned by
def input_function():
#create tf.data.Dataset object with data and its label
ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
if shuffle:
ds.ds.shuffle(1000) #shuffles data 1000 times
ds = ds.batch(batch_size).repeat(num_epochs) #splits dataset into 32 batches and reeats the process 10 times
return ds #retursn a batch of a dataset
return input_function #returns function object for useage
#Calls the input function that we returned to create the train and test functions
train_input_fn = make_input_fn(df_train, y_train) #train function
test_input_fn = make_input_fn(df_test, y_test, num_epochs = 1, shuffle = False) #testing function
"""**Creating the Model**
Use linear estimator to utalize linear regression algorithm
"""
#creates the linear estimator by passing the feature_columns
linear_estimator = tf.estimator.LinearClassifier(feature_columns = feature_columns)
#Training the model by passing the training function
linear_estimator.train(train_input_fn)
The error message I am getting:
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
tensorflow/python/framework/fast_tensor_util.pyx in tensorflow.python.framework.fast_tensor_util.AppendObjectArrayToTensorProto()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/compat.py in as_bytes(bytes_or_text, encoding)
85 else:
86 raise TypeError('Expected binary or unicode string, got %r' %
---> 87 (bytes_or_text,))
88
89
TypeError: Expected binary or unicode string, got 22.0
enter image description here
question from:
https://stackoverflow.com/questions/66065309/how-to-fix-the-typeerror-expected-binary-or-unicode-string-got-22-0-when-pa