Text to Image with Graph Database
This week’s office Hackathon, my project was “Text to Image with Graph Database” (and I won! The price was an expensed dinner). Instead of using graph queries to retrieve relationships between objects detected from the video frames, I want to give it a short text and return most similar images/frames from the graph database.
Relationships Dataset: Open Image V5
The dataset I used to insert into graph database is Open Images Dataset V5, in particularly the 2018 challenge dataset. You wound want to download and load these files:
relation_train_data = "dataset/challenge-2018-train-vrd.csv"
relation_val_data = "dataset/validation-annotations-vrd.csv"
relation_test_data = "dataset/test-annotations-vrd.csv"
image_id_mapping = "dataset/train-images-boxable-with-rotation.csv"
metadata = "dataset/class-descriptions.csv"
hierarchy = "dataset/bbox_labels_600_hierarchy.json"import pandas as pd
train_df = pd.read_csv(relation_train_data)[['ImageID','LabelName1', 'LabelName2','RelationshipLabel']]
val_df = pd.read_csv(relation_val_data)[['ImageID','LabelName1', 'LabelName2','RelationshipLabel']]
test_df = pd.read_csv(relation_test_data)[['ImageID','LabelName1', 'LabelName2','RelationshipLabel']]
metadata_df = pd.read_csv(metadata, names = ["id", "name"])
image_id_mapping_df = pd.read_csv(image_id_mapping)[['ImageID','OriginalURL']]
Next we want to do some preprocessing to combine the train, val, and test dataset and drop the duplicate relationships.
# preprocessing: dgraph identifier can only be characters so strip out the /m/ part
train_df = train_df.append(val_df, ignore_index = True)
train_df = train_df.append(test_df, ignore_index = True)
len(train_df)train_df[‘LabelName1’] = train_df[‘LabelName1’].str.replace(‘/m/.’, ‘’, regex=True)
train_df[‘LabelName2’] = train_df[‘LabelName2’].str.replace(‘/m/.’, ‘’, regex=True)
metadata_df[‘id’] = metadata_df[‘id’].str.replace(‘/m/.’, ‘’, regex=True)
The bounding box hierarchy here contains 600 entities. I use DFS to traverse the json file and create the has_subcategory links between parent and child entities.
def id_lookup_metadata(id):
id_lookup = metadata_df.loc[metadata_df[‘id’] == id][‘name’].tolist()
return id_lookupdef dfs_build_nodes(entity):
# create entity node
data = “”
e_id = entity[‘LabelName’].split(‘/’)[2][1:]
name = id_lookup_metadata(e_id)[0]
data += format_node(e_id, name,”article”)
# stop if reach leaves
if ‘Subcategory’ not in entity:
return data
# loop through all subcategories and do dfs again
for s in entity[‘Subcategory’]:
s_id = s[‘LabelName’].split(‘/’)[2][1:]
name = id_lookup_metadata(s_id)[0]
data += format_node(s_id, name,”article”)
data += format_relationship(e_id, s_id, “has_subcategory”)
data += dfs_build_nodes(s)
return data
Now we are ready to format the nodes hierarchy json file and from the Objects overlapped forming relationships. Here we loop through the grouped LabelName1s and insert 391,083 (subject, verb, object) relationships with imageId as edge attribute (@facets annotation in Dgraph).
from tqdm import tqdm
import json
hierarchy = "bbox_labels_600_hierarchy.json"def format_rdf():
data = ‘{ set {\n’
with open(hierarchy) as json_file:
entities = json.load(json_file)
data += dfs_build_nodes(entities)
grouped = train_df.groupby(‘LabelName1’)
for label, group in tqdm(grouped):
cleaned_group = group.drop_duplicates(subset=[‘LabelName2’,’RelationshipLabel’])
for index, row in cleaned_group.iterrows():
imageID, id1, id2, relation = row[‘ImageID’], row[‘LabelName1’], row[‘LabelName2’], row[‘RelationshipLabel’]
id1_lookup = id_lookup_metadata(id1)
id2_lookup = id_lookup_metadata(id2)if len(id1_lookup) == 0 or len(id2_lookup) == 0:
continuedata += format_relationship_with_facet(id1, id2, relation, “imageId”, imageID)
data += ‘} }’
print(data)
return datadef image_id_lookup(id):
id_lookup = image_id_mapping_df.loc[image_id_mapping_df[‘ImageID’] == id][‘OriginalURL’].tolist()
return id_lookupdata = format_rdf() # format the nodes to be inserted
insert_nodes(data) # insert into dgraph
Now we can do some interesting queries to retrieve images following graph based on the input text.
import pydgraphdef query_imageId(sub, relation, obj):
client_stub = pydgraph.DgraphClientStub(“localhost:9080”)
client = pydgraph.DgraphClient(client_stub)
query = f”””{{
everyone(func: anyoftext(name, {sub}))
@recurse (depth:4, loop:true)
{{
name
{relation} @facets
}}
}}”””
variables = {‘$sub’: sub, ‘$rel’: relation}
res = client.txn(read_only=True).query(query, variables=variables)res_bytes = res.json
res_json = res_bytes.decode(‘utf8’)data = json.loads(res_json)
results = data[‘everyone’][0][relation]
imageId = “”
for r in results:
if r[‘name’] == obj:
imageId = r[relation+”|imageId”]
client_stub.close()
return imageId
And finally lookup the original image url with imageId on edge attribute and print out the image.
from skimage import io
import matplotlib.pyplot as pltdef image_id_lookup(id):
id_lookup = image_id_mapping_df.loc[image_id_mapping_df['ImageID'] == id]['OriginalURL'].tolist()
return id_lookupimage_id = query_imageId(“Woman”, “wears”, “Handbag”)imgUrl = image_id_lookup(image_id)try:
image = io.imread(imgUrl[0])
plt.imshow(image)
plt.show()
except:
print(“The image link might not work, check again.”)
pass