From c8c0ce48f4c7ec90b720adcc1f3552a80a03b461 Mon Sep 17 00:00:00 2001 From: jupyterjazz Date: Mon, 3 Apr 2023 08:44:15 +0200 Subject: [PATCH 1/3] docs: integrations section Signed-off-by: jupyterjazz --- docs/integrations/fastapi.md | 125 +++++++++++++++++++++++++++++++++++ mkdocs.yml | 2 + 2 files changed, 127 insertions(+) create mode 100644 docs/integrations/fastapi.md diff --git a/docs/integrations/fastapi.md b/docs/integrations/fastapi.md new file mode 100644 index 00000000000..b379e584222 --- /dev/null +++ b/docs/integrations/fastapi.md @@ -0,0 +1,125 @@ +# Use Docarray with FastAPI + +You might already know that Docarray documents are Pydantic Models (with a twist), and as such they are fully compatible with FastAPI: + +```python +import numpy as np +from fastapi import FastAPI +from httpx import AsyncClient + +from docarray import BaseDoc +from docarray.documents import ImageDoc +from docarray.typing import NdArray +from docarray.base_doc import DocumentResponse + + +class InputDoc(BaseDoc): + img: ImageDoc + + +class OutputDoc(BaseDoc): + embedding_clip: NdArray + embedding_bert: NdArray + + +input_doc = InputDoc(img=ImageDoc(tensor=np.zeros((3, 224, 224)))) + +app = FastAPI() + + +@app.post("/doc/", response_model=OutputDoc, response_class=DocumentResponse) +async def create_item(doc: InputDoc) -> OutputDoc: + ## call my fancy model to generate the embeddings + doc = OutputDoc( + embedding_clip=np.zeros((100, 1)), embedding_bert=np.zeros((100, 1)) + ) + return doc + + +async with AsyncClient(app=app, base_url="http://test") as ac: + response = await ac.post("/doc/", data=input_doc.json()) + resp_doc = await ac.get("/docs") + resp_redoc = await ac.get("/redoc") +``` + +The big advantage here is **first-class support for ML centric data**, such as {Torch, TF, ...}Tensor, Embedding, etc. + +This includes handy features such as validating the shape of a tensor: + +```python +from docarray import BaseDoc +from docarray.typing import TorchTensor +import torch + + +class MyDoc(BaseDoc): + tensor: TorchTensor[3, 224, 224] + + +doc = MyDoc(tensor=torch.zeros(3, 224, 224)) # works +doc = MyDoc(tensor=torch.zeros(224, 224, 3)) # works by reshaping +doc = MyDoc(tensor=torch.zeros(224)) # fails validation + + +class Image(BaseDoc): + tensor: TorchTensor[3, 'x', 'x'] + + +Image(tensor=torch.zeros(3, 224, 224)) # works +Image( + tensor=torch.zeros(3, 64, 128) +) # fails validation because second dimension does not match third +Image( + tensor=torch.zeros(4, 224, 224) +) # fails validation because of the first dimension +Image( + tensor=torch.zeros(3, 64) +) # fails validation because it does not have enough dimensions +``` + + +Further, you can send and receive lists of documents represented as a `DocArray` object: +> **Note** +> Currently, FastAPI receives DocArray objects as lists, so you have to construct a DocArray inside the function. +> Also, if you want to return DocArray, first you have to convert it to a list. +> (shown in the example below) + +```python +from typing import List + +import numpy as np +from fastapi import FastAPI +from httpx import AsyncClient + +from docarray import DocArray +from docarray.base_doc import DocArrayResponse +from docarray.documents import TextDoc + +# Create a docarray +docs = DocArray[TextDoc]([TextDoc(text='first'), TextDoc(text='second')]) + +app = FastAPI() + + +# Always use our custom response class (needed to dump tensors) +@app.post("/doc/", response_class=DocArrayResponse) +async def create_embeddings(docs: List[TextDoc]) -> List[TextDoc]: + # The docs FastAPI will receive will be treated as List[TextDoc] + # so you need to cast it to DocArray + docs = DocArray[TextDoc].construct(docs) + + # Embed docs + for doc in docs: + doc.embedding = np.zeros((3, 224, 224)) + + # Return your DocArray as a list + return list(docs) + + +async with AsyncClient(app=app, base_url="http://test") as ac: + response = await ac.post("/doc/", data=docs.to_json()) # sending docs as json + +assert response.status_code == 200 +# You can read FastAPI's response in the following way +docs = DocArray[TextDoc].from_json(response.content.decode()) +``` diff --git a/mkdocs.yml b/mkdocs.yml index e7749bc2874..1c87f0415c7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -80,5 +80,7 @@ nav: - how_to/add_doc_index.md - how_to/multimodal_training_and_serving.md - how_to/optimize_performance_with_id_generation.md + - Integrations: + - integrations/fastapi.md - ... - Contributing: CONTRIBUTING.md From 65aaaae37fd3768c9e4d49cf52bd90ed2f4ebb57 Mon Sep 17 00:00:00 2001 From: jupyterjazz Date: Mon, 3 Apr 2023 09:12:40 +0200 Subject: [PATCH 2/3] refactor: change example Signed-off-by: jupyterjazz --- docs/integrations/fastapi.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/integrations/fastapi.md b/docs/integrations/fastapi.md index b379e584222..5484831d491 100644 --- a/docs/integrations/fastapi.md +++ b/docs/integrations/fastapi.md @@ -1,6 +1,6 @@ # Use Docarray with FastAPI -You might already know that Docarray documents are Pydantic Models (with a twist), and as such they are fully compatible with FastAPI: +You might already know that `DocArray` documents are Pydantic Models (with a twist) [Reference relevant part], and as such they are fully compatible with `FastAPI`: ```python import numpy as np @@ -38,8 +38,8 @@ async def create_item(doc: InputDoc) -> OutputDoc: async with AsyncClient(app=app, base_url="http://test") as ac: response = await ac.post("/doc/", data=input_doc.json()) - resp_doc = await ac.get("/docs") - resp_redoc = await ac.get("/redoc") + +doc = OutputDoc.parse_raw(response.content.decode()) ``` The big advantage here is **first-class support for ML centric data**, such as {Torch, TF, ...}Tensor, Embedding, etc. @@ -80,9 +80,9 @@ Image( Further, you can send and receive lists of documents represented as a `DocArray` object: > **Note** -> Currently, FastAPI receives DocArray objects as lists, so you have to construct a DocArray inside the function. -> Also, if you want to return DocArray, first you have to convert it to a list. -> (shown in the example below) +> Currently, `FastAPI` receives `DocArray` objects as lists, so you have to construct a DocArray inside the function. +> Also, if you want to return a `DocArray` object, first you have to convert it to a list. +> (Shown in the example below) ```python from typing import List From de2bf083f74ce3aec806840fb0a1d4a54d7a7da4 Mon Sep 17 00:00:00 2001 From: jupyterjazz Date: Wed, 12 Apr 2023 15:20:24 +0200 Subject: [PATCH 3/3] docs: minor changes Signed-off-by: jupyterjazz --- docs/integrations/fastapi.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/integrations/fastapi.md b/docs/integrations/fastapi.md index 50ec8834958..e55b09fba9e 100644 --- a/docs/integrations/fastapi.md +++ b/docs/integrations/fastapi.md @@ -1,13 +1,11 @@ -# Use Docarray with FastAPI - -You might already know that `DocArray` documents are Pydantic Models (with a twist) [Reference relevant part], and as such they are fully compatible with `FastAPI`: +# Use DocArray with FastAPI FastAPI is a high-performance web framework for building APIs with Python. It's designed to be easy to use and supports asynchronous programming. -Since `DocArray` documents are Pydantic Models (with a twist)[Reference relevant part] they can be easily integrated with FastAPI, +Since [`DocArray` documents are Pydantic Models (with a twist)](../user_guide/representing/first_step.md) they can be easily integrated with FastAPI, and provide a seamless and efficient way to work with multimodal data in FastAPI-powered APIs. -First, you should define document schemas: +First, you should define schemas for your input and/or output Documents: ```python from docarray import BaseDoc from docarray.documents import ImageDoc @@ -23,7 +21,7 @@ class OutputDoc(BaseDoc): embedding_bert: NdArray ``` -Afterwards, you can use your documents with FastAPI: +Afterwards, you can use your Documents with FastAPI: ```python import numpy as np from fastapi import FastAPI @@ -88,7 +86,7 @@ Image( ``` -Further, you can send and receive lists of documents represented as a `DocArray` object: +Further, you can send and receive lists of Documents represented as a `DocArray` object: !!! note Currently, `FastAPI` receives `DocArray` objects as lists, so you have to construct a DocArray inside the function.