66
77
88json_files = [
9- ' data/archived.jsonl' ,
10- ' data/deprecated.jsonl' ,
11- ' data/malicious.jsonl' ,
9+ " data/archived.jsonl" ,
10+ " data/deprecated.jsonl" ,
11+ " data/malicious.jsonl" ,
1212]
1313
1414
@@ -21,7 +21,7 @@ def setup_schema(client):
2121 Property (name = "type" , data_type = DataType .TEXT ),
2222 Property (name = "status" , data_type = DataType .TEXT ),
2323 Property (name = "description" , data_type = DataType .TEXT ),
24- ]
24+ ],
2525 )
2626
2727
@@ -47,11 +47,14 @@ def generate_vector_string(package):
4747
4848 # add extra status
4949 if package ["status" ] == "archived" :
50- vector_str += f". However, this package is found to be archived and no longer maintained. For additional information refer to { package_url } "
50+ vector_str += f". However, this package is found to be archived and no longer \
51+ maintained. For additional information refer to { package_url } "
5152 elif package ["status" ] == "deprecated" :
52- vector_str += f". However, this package is found to be deprecated and no longer recommended for use. For additional information refer to { package_url } "
53+ vector_str += f". However, this package is found to be deprecated and no \
54+ longer recommended for use. For additional information refer to { package_url } "
5355 elif package ["status" ] == "malicious" :
54- vector_str += f". However, this package is found to be malicious. For additional information refer to { package_url } "
56+ vector_str += f". However, this package is found to be malicious. For \
57+ additional information refer to { package_url } "
5558 return vector_str
5659
5760
@@ -62,34 +65,38 @@ def add_data(client):
6265 existing_packages = list (collection .iterator ())
6366 packages_dict = {}
6467 for package in existing_packages :
65- key = package .properties [' name' ] + "/" + package .properties [' type' ]
68+ key = package .properties [" name" ] + "/" + package .properties [" type" ]
6669 value = {
67- ' status' : package .properties [' status' ],
68- ' description' : package .properties [' description' ],
70+ " status" : package .properties [" status" ],
71+ " description" : package .properties [" description" ],
6972 }
7073 packages_dict [key ] = value
7174
7275 for json_file in json_files :
73- with open (json_file , 'r' ) as f :
76+ with open (json_file , "r" ) as f :
7477 print ("Adding data from" , json_file )
7578 with collection .batch .dynamic () as batch :
7679 for line in f :
7780 package = json .loads (line )
7881
7982 # now add the status column
80- if ' archived' in json_file :
81- package [' status' ] = ' archived'
82- elif ' deprecated' in json_file :
83- package [' status' ] = ' deprecated'
84- elif ' malicious' in json_file :
85- package [' status' ] = ' malicious'
83+ if " archived" in json_file :
84+ package [" status" ] = " archived"
85+ elif " deprecated" in json_file :
86+ package [" status" ] = " deprecated"
87+ elif " malicious" in json_file :
88+ package [" status" ] = " malicious"
8689 else :
87- package [' status' ] = ' unknown'
90+ package [" status" ] = " unknown"
8891
8992 # check for the existing package and only add if different
90- key = package [' name' ] + "/" + package [' type' ]
93+ key = package [" name" ] + "/" + package [" type" ]
9194 if key in packages_dict :
92- if packages_dict [key ]['status' ] == package ['status' ] and packages_dict [key ]['description' ] == package ['description' ]:
95+ if (
96+ packages_dict [key ]["status" ] == package ["status" ]
97+ and packages_dict [key ]["description" ]
98+ == package ["description" ]
99+ ):
93100 print ("Package already exists" , key )
94101 continue
95102
@@ -104,17 +111,16 @@ def add_data(client):
104111def run_import ():
105112 client = weaviate .WeaviateClient (
106113 embedded_options = EmbeddedOptions (
107- persistence_data_path = "./weaviate_data" ,
108- grpc_port = 50052
114+ persistence_data_path = "./weaviate_data" , grpc_port = 50052
109115 ),
110116 )
111117 with client :
112118 client .connect ()
113- print (' is_ready:' , client .is_ready ())
119+ print (" is_ready:" , client .is_ready ())
114120
115121 setup_schema (client )
116122 add_data (client )
117123
118124
119- if __name__ == ' __main__' :
125+ if __name__ == " __main__" :
120126 run_import ()
0 commit comments