1313
1414
1515def setup_schema (client ):
16- if client .collections .exists ("Package" ):
17- client .collections .delete ("Package" )
18- client .collections .create (
19- "Package" ,
20- properties = [
21- Property (name = "name" , data_type = DataType .TEXT ),
22- Property (name = "type" , data_type = DataType .TEXT ),
23- Property (name = "status" , data_type = DataType .TEXT ),
24- Property (name = "description" , data_type = DataType .TEXT ),
25- ]
26- )
16+ if not client .collections .exists ("Package" ):
17+ client .collections .create (
18+ "Package" ,
19+ properties = [
20+ Property (name = "name" , data_type = DataType .TEXT ),
21+ Property (name = "type" , data_type = DataType .TEXT ),
22+ Property (name = "status" , data_type = DataType .TEXT ),
23+ Property (name = "description" , data_type = DataType .TEXT ),
24+ ]
25+ )
2726
2827
2928def generate_vector_string (package ):
@@ -59,6 +58,17 @@ def generate_vector_string(package):
5958def add_data (client ):
6059 collection = client .collections .get ("Package" )
6160
61+ # read all the data from db, we will only add if there is no data, or is different
62+ existing_packages = list (collection .iterator ())
63+ packages_dict = {}
64+ for package in existing_packages :
65+ key = package .properties ['name' ]+ "/" + package .properties ['type' ]
66+ value = {
67+ 'status' : package .properties ['status' ],
68+ 'description' : package .properties ['description' ],
69+ }
70+ packages_dict [key ] = value
71+
6272 for json_file in json_files :
6373 with open (json_file , 'r' ) as f :
6474 print ("Adding data from" , json_file )
@@ -76,7 +86,15 @@ def add_data(client):
7686 else :
7787 package ['status' ] = 'unknown'
7888
89+ # check for the existing package and only add if different
90+ key = package ['name' ]+ "/" + package ['type' ]
91+ if key in packages_dict :
92+ if packages_dict [key ]['status' ] == package ['status' ] and packages_dict [key ]['description' ] == package ['description' ]:
93+ print ("Package already exists" , key )
94+ continue
95+
7996 # prepare the object for embedding
97+ print ("Generating data for" , key )
8098 vector_str = generate_vector_string (package )
8199 vector = generate_embeddings (vector_str )
82100
0 commit comments