InternLM · vansin · Dec 19, 2023 · Dec 19, 2023
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+*.drawio
+lmdeploy/c4/
diff --git a/lmdeploy/cover.jpg b/lmdeploy/cover.jpg
diff --git a/lmdeploy/img/0.png b/lmdeploy/img/0.png
diff --git a/lmdeploy/img/1.png b/lmdeploy/img/1.png
diff --git a/lmdeploy/img/10.png b/lmdeploy/img/10.png
diff --git a/lmdeploy/img/11.png b/lmdeploy/img/11.png
diff --git a/lmdeploy/img/12.png b/lmdeploy/img/12.png
diff --git a/lmdeploy/img/13.png b/lmdeploy/img/13.png
diff --git a/lmdeploy/img/14.png b/lmdeploy/img/14.png
diff --git a/lmdeploy/img/15.png b/lmdeploy/img/15.png
diff --git a/lmdeploy/img/16.png b/lmdeploy/img/16.png
diff --git a/lmdeploy/img/17.png b/lmdeploy/img/17.png
diff --git a/lmdeploy/img/18.png b/lmdeploy/img/18.png
diff --git a/lmdeploy/img/19.png b/lmdeploy/img/19.png
diff --git a/lmdeploy/img/2.png b/lmdeploy/img/2.png
diff --git a/lmdeploy/img/20.png b/lmdeploy/img/20.png
diff --git a/lmdeploy/img/3.png b/lmdeploy/img/3.png
diff --git a/lmdeploy/img/4.png b/lmdeploy/img/4.png
diff --git a/lmdeploy/img/5.png b/lmdeploy/img/5.png
diff --git a/lmdeploy/img/6.png b/lmdeploy/img/6.png
diff --git a/lmdeploy/img/7.png b/lmdeploy/img/7.png
diff --git a/lmdeploy/img/8.png b/lmdeploy/img/8.png
diff --git a/lmdeploy/img/9.png b/lmdeploy/img/9.png
diff --git a/lmdeploy/img/add0.png b/lmdeploy/img/add0.png
diff --git a/lmdeploy/img/add1.png b/lmdeploy/img/add1.png
diff --git a/lmdeploy/img/add3.png b/lmdeploy/img/add3.png
diff --git a/lmdeploy/img/add4.png b/lmdeploy/img/add4.png
diff --git a/lmdeploy/img/lmdeploy.drawio.png b/lmdeploy/img/lmdeploy.drawio.png
diff --git a/lmdeploy/img/quant.drawio.png b/lmdeploy/img/quant.drawio.png
diff --git a/lmdeploy/infer_compare.py b/lmdeploy/infer_compare.py
@@ -0,0 +1,79 @@
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+
+from lmdeploy import turbomind as tm
+
+
+def get_prompt(query: str) -> str:
+    prompt = f"""<|System|>:You are an AI assistant whose name is InternLM (书生·浦语).
+- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
+- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文.
+
+<|User|>:{query}
+<|Bot|>:"""
+    return prompt
+
+
+
+def gen_lmdeploy(query: str, tokenizer, model) -> str:
+    prompt = get_prompt(query)
+    input_ids = tokenizer.encode(prompt)
+    for outputs in model.stream_infer(
+            session_id=0,
+            input_ids=[input_ids],
+            request_output_len=512,
+            temperature=0.0,
+        ):
+        ...
+    res, _tokens = outputs[0]
+    output = tokenizer.decode(res)
+    return output
+
+
+
+def gen_transformers(query: str, tokenizer, model) -> str:
+    prompt = get_prompt(query)
+    inputs = tokenizer([prompt], return_tensors="pt").to(device)
+    res = model.generate(
+        **inputs,
+        max_new_tokens=512,
+    )
+    output = tokenizer.decode(res[0])
+    output = output.replace(prompt, "").replace("<eoa>", "").strip()
+    return output
+
+
+
+import sys
+import time
+
+device = "cuda:0"
+
+model_path = "/root/share/temp/model_repos/internlm-chat-7b/"
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+
+engine = sys.argv[1]
+
+if engine == "hf":
+    model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True)
+    model.to(device).eval();
+    gen_func = gen_transformers
+else:
+    tm_model = tm.TurboMind.from_pretrained("./workspace/")
+    model = tm_model.create_instance()
+    gen_func = gen_lmdeploy
+
+
+count = 0
+start = time.time()
+for season in ["春天", "夏天", "秋天", "冬天"]:
+    q = f"写一篇关于{season}的300字小作文。"
+    output = gen_func(q, tokenizer, model)
+    count += len(output)
+end = time.time()
+cost = (end - start)
+throughput = round(count / cost)
+
+print(f"{engine} cost {cost:.2f}s  {throughput} tokens/s")
+
+