Fairness in machine learning

Look at pre-trained glove embeddings

In [1]:
import csv
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchtext
import numpy as np
import matplotlib.pyplot as plt

Embedding

In [2]:
glove = torchtext.vocab.GloVe(name="6B", dim=100)
In [3]:
def print_closest_words(vec, n=5):
    diff = np.linalg.norm(glove.vectors.numpy() - vec.numpy(), axis=1)
    lst = sorted(list(enumerate(diff)), key=lambda x:x[1])
    for idx, difference in lst[:n]:
        print(glove.itos[idx], difference)
In [4]:
vec = glove['him'] - glove['man'] + glove['woman']
print_closest_words(vec)
him 3.364068
herself 3.7658575
her 3.9440196
woman 4.409367
she 4.4571843
In [5]:
vec = glove['police'] - glove['africa'] + glove['america']
print_closest_words(vec)
police 5.020312
officers 5.575659
agents 6.1147323
fbi 6.21381
authorities 6.305389
In [6]:
vec = glove['king'] - glove['man'] + glove['woman']
print_closest_words(vec)
king 3.364068
queen 4.081079
monarch 4.6429076
throne 4.9055004
elizabeth 4.921559
In [7]:
vec = glove['king'] - glove['prince'] + glove['princess']
print_closest_words(vec)
queen 3.9443252
princess 4.092166
king 4.5588813
elizabeth 5.3060265
sister 5.355579
In [8]:
vec = glove['uncle'] - glove['man'] + glove['woman']
print_closest_words(vec)
aunt 3.1536405
grandmother 3.1846805
niece 3.1966183
uncle 3.364068
mother 3.4163873
In [9]:
vec = glove['grandmother'] - glove['mother'] + glove['father']
print_closest_words(vec)
grandfather 1.9321214
uncle 2.5486367
father 2.6408308
grandmother 3.0479465
brother 3.175673
In [10]:
print_closest_words(glove['old'] - glove['young'] + glove['father'])
father 5.0813956
old 5.085829
grandfather 5.747161
son 5.751157
grandmother 5.998024
In [11]:
vec = glove['prince'] - glove['man'] + glove['woman']
print_closest_words(vec)
prince 3.364068
princess 4.2954636
daughter 4.8203783
niece 4.89752
cousin 4.999925
In [12]:
print_closest_words(glove['doctor'] - glove['man'] + glove['woman'])
doctor 3.364068
nurse 4.2283154
physician 4.7054324
woman 4.873425
dentist 4.969891
In [13]:
print_closest_words(glove['doctor'] - glove['woman'] + glove['man'])
doctor 3.364068
man 4.899869
dr. 5.05853
brother 5.144743
physician 5.152549
In [14]:
vec = glove['doctor'] - glove['woman'] + glove['man']
print_closest_words(vec)
doctor 3.364068
man 4.899869
dr. 5.05853
brother 5.144743
physician 5.152549
In [15]:
vec = glove['programmer'] - glove['man'] + glove['woman']
print_closest_words(vec)
programmer 3.364068
cosmetologist 4.8504686
salesclerk 4.9466257
psychotherapist 5.0096955
adoptee 5.0135107
In [16]:
vec = glove['programmer'] - glove['woman'] + glove['man']
print_closest_words(vec)
programmer 3.364068
programmers 5.2138195
setup 5.2186975
mechanic 5.461222
hacker 5.5201344
In [17]:
vec = glove['programmer'] - glove['man'] + glove['woman']
print_closest_words(vec)
programmer 3.364068
cosmetologist 4.8504686
salesclerk 4.9466257
psychotherapist 5.0096955
adoptee 5.0135107
In [18]:
vec = glove['engineer'] - glove['man'] + glove['woman']
print_closest_words(vec)
engineer 3.364068
technician 4.6912084
educator 5.208781
contractor 5.237237
surgeon 5.2675548
In [19]:
print_closest_words(glove['engineer'] - glove['woman'] + glove['man'])
engineer 3.364068
mechanic 5.4588532
engineers 5.537788
master 5.6934347
technician 5.8191476
In [20]:
print_closest_words(glove['sales'] - glove['woman'] + glove['man'])
sales 3.364068
profits 4.976788
revenue 5.074237
profit 5.1251955
price 5.149186
In [21]:
print_closest_words(glove['sales'] - glove['man'] + glove['woman'])
sales 3.364068
consumer 5.0254583
retail 5.1101413
product 5.504749
decline 5.573562