Fixed linting issues

ptelang · ptelang · commit 995921acc0c3 · 2024-11-25T17:02:11.000-05:00
diff --git a/scripts/import_packages.py b/scripts/import_packages.py
@@ -6,9 +6,9 @@
 
 
 json_files = [
-    'data/archived.jsonl',
-    'data/deprecated.jsonl',
-    'data/malicious.jsonl',
+    "data/archived.jsonl",
+    "data/deprecated.jsonl",
+    "data/malicious.jsonl",
 ]
 
 
@@ -21,7 +21,7 @@ def setup_schema(client):
                 Property(name="type", data_type=DataType.TEXT),
                 Property(name="status", data_type=DataType.TEXT),
                 Property(name="description", data_type=DataType.TEXT),
-            ]
+            ],
         )
 
 
@@ -47,11 +47,14 @@ def generate_vector_string(package):
 
     # add extra status
     if package["status"] == "archived":
-        vector_str += f". However, this package is found to be archived and no longer maintained. For additional information refer to {package_url}"
+        vector_str += f". However, this package is found to be archived and no longer \
+maintained. For additional information refer to {package_url}"
     elif package["status"] == "deprecated":
-        vector_str += f". However, this package is found to be deprecated and no longer recommended for use. For additional information refer to {package_url}"
+        vector_str += f". However, this package is found to be deprecated and no \
+longer recommended for use. For additional information refer to {package_url}"
     elif package["status"] == "malicious":
-        vector_str += f". However, this package is found to be malicious. For additional information refer to {package_url}"
+        vector_str += f". However, this package is found to be malicious. For \
+additional information refer to {package_url}"
     return vector_str
 
 
@@ -62,34 +65,38 @@ def add_data(client):
     existing_packages = list(collection.iterator())
     packages_dict = {}
     for package in existing_packages:
-        key = package.properties['name']+"/"+package.properties['type']
+        key = package.properties["name"] + "/" + package.properties["type"]
         value = {
-            'status': package.properties['status'],
-            'description': package.properties['description'],
+            "status": package.properties["status"],
+            "description": package.properties["description"],
         }
         packages_dict[key] = value
 
     for json_file in json_files:
-        with open(json_file, 'r') as f:
+        with open(json_file, "r") as f:
             print("Adding data from", json_file)
             with collection.batch.dynamic() as batch:
                 for line in f:
                     package = json.loads(line)
 
                     # now add the status column
-                    if 'archived' in json_file:
-                        package['status'] = 'archived'
-                    elif 'deprecated' in json_file:
-                        package['status'] = 'deprecated'
-                    elif 'malicious' in json_file:
-                        package['status'] = 'malicious'
+                    if "archived" in json_file:
+                        package["status"] = "archived"
+                    elif "deprecated" in json_file:
+                        package["status"] = "deprecated"
+                    elif "malicious" in json_file:
+                        package["status"] = "malicious"
                     else:
-                        package['status'] = 'unknown'
+                        package["status"] = "unknown"
 
                     # check for the existing package and only add if different
-                    key = package['name']+"/"+package['type']
+                    key = package["name"] + "/" + package["type"]
                     if key in packages_dict:
-                        if packages_dict[key]['status'] == package['status'] and packages_dict[key]['description'] == package['description']:
+                        if (
+                            packages_dict[key]["status"] == package["status"]
+                            and packages_dict[key]["description"]
+                            == package["description"]
+                        ):
                             print("Package already exists", key)
                             continue
 
@@ -104,17 +111,16 @@ def add_data(client):
 def run_import():
     client = weaviate.WeaviateClient(
         embedded_options=EmbeddedOptions(
-            persistence_data_path="./weaviate_data",
-            grpc_port=50052
+            persistence_data_path="./weaviate_data", grpc_port=50052
         ),
     )
     with client:
         client.connect()
-        print('is_ready:', client.is_ready())
+        print("is_ready:", client.is_ready())
 
         setup_schema(client)
         add_data(client)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     run_import()
diff --git a/src/codegate/config.py b/src/codegate/config.py
@@ -137,8 +137,12 @@ def from_file(cls, config_path: Union[str, Path]) -> "Config":
                 log_level=config_data.get("log_level", cls.log_level.value),
                 log_format=config_data.get("log_format", cls.log_format.value),
                 chat_model_path=config_data.get("chat_model_path", cls.chat_model_path),
-                chat_model_n_ctx=config_data.get("chat_model_n_ctx", cls.chat_model_n_ctx),
-                chat_model_n_gpu_layers=config_data.get("chat_model_n_gpu_layers", cls.chat_model_n_gpu_layers),
+                chat_model_n_ctx=config_data.get(
+                    "chat_model_n_ctx", cls.chat_model_n_ctx
+                ),
+                chat_model_n_gpu_layers=config_data.get(
+                    "chat_model_n_gpu_layers", cls.chat_model_n_gpu_layers
+                ),
                 prompts=prompts_config,
             )
         except yaml.YAMLError as e:
diff --git a/src/codegate/inference/inference_engine.py b/src/codegate/inference/inference_engine.py
@@ -1,7 +1,7 @@
 from llama_cpp import Llama
 
 
-class LlamaCppInferenceEngine():
+class LlamaCppInferenceEngine:
     _inference_engine = None
 
     def __new__(cls):
@@ -10,26 +10,37 @@ def __new__(cls):
         return cls._inference_engine
 
     def __init__(self):
-        if not hasattr(self, 'models'):
+        if not hasattr(self, "models"):
             self.__models = {}
 
     async def get_model(self, model_path, embedding=False, n_ctx=512, n_gpu_layers=0):
         if model_path not in self.__models:
             self.__models[model_path] = Llama(
-                model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=False, n_ctx=n_ctx,
-                embedding=embedding)
+                model_path=model_path,
+                n_gpu_layers=n_gpu_layers,
+                verbose=False,
+                n_ctx=n_ctx,
+                embedding=embedding,
+            )
 
         return self.__models[model_path]
 
-    async def generate(self, model_path, prompt, n_ctx=512, n_gpu_layers=0, stream=True):
-        model = await self.get_model(model_path=model_path, n_ctx=n_ctx, n_gpu_layers=n_gpu_layers)
+    async def generate(
+        self, model_path, prompt, n_ctx=512, n_gpu_layers=0, stream=True
+    ):
+        model = await self.get_model(
+            model_path=model_path, n_ctx=n_ctx, n_gpu_layers=n_gpu_layers
+        )
 
         for chunk in model.create_completion(prompt=prompt, stream=stream):
             yield chunk
 
-    async def chat(self, model_path, n_ctx=512, n_gpu_layers=0, **chat_completion_request):
-        model = await self.get_model(model_path=model_path, n_ctx=n_ctx,
-                                     n_gpu_layers=n_gpu_layers)
+    async def chat(
+        self, model_path, n_ctx=512, n_gpu_layers=0, **chat_completion_request
+    ):
+        model = await self.get_model(
+            model_path=model_path, n_ctx=n_ctx, n_gpu_layers=n_gpu_layers
+        )
         return model.create_completion(**chat_completion_request)
 
     async def embed(self, model_path, content):
diff --git a/utils/embedding_util.py b/utils/embedding_util.py
@@ -1,6 +1,6 @@
 from transformers import AutoTokenizer, AutoModel
 import torch
-import torch.nn.functional as F
+import torch.nn.functional as f
 from torch import Tensor
 import os
 import warnings
@@ -35,6 +35,6 @@ def generate_embeddings(text):
     embeddings = average_pool(outputs.last_hidden_state, attention_mask)
 
     # (Optionally) normalize embeddings
-    embeddings = F.normalize(embeddings, p=2, dim=1)
+    embeddings = f.normalize(embeddings, p=2, dim=1)
 
     return embeddings.numpy().tolist()[0]