Improved handling of file to work with script.

Cyrus Radfar · Cyrus Radfar · commit 922e88f98d63 · 2024-12-12T17:26:05.000-08:00
diff --git a/python_anvil/models.py b/python_anvil/models.py
@@ -1,5 +1,8 @@
 from typing import Any
 from io import BytesIO, BufferedReader
+from mimetypes import guess_type
+import base64
+import os
 
 try:        
     from pydantic import BaseModel
@@ -14,10 +17,10 @@
     from pydantic import ConfigDict
     class FileCompatibleBaseModel(BaseModel):
         """
-            Patched model_dump to extract file objects from SerializationIterator in V2
-            and return as BufferedReader
+        Patched model_dump to extract file objects from SerializationIterator in V2
+        and return as BufferedReader or base64 encoded dict as needed.
         """ 
-            # Allow extra fields even if it is not defined. This will allow models
+        # Allow extra fields even if it is not defined. This will allow models
         # to be more flexible if features are added in the Anvil API, but
         # explicit support hasn't been added yet to this library.
         model_config = ConfigDict(
@@ -46,21 +49,41 @@ def _iterator_to_buffered_reader(self, value):
         def _check_if_serialization_iterator(self, value):
             return str(type(value).__name__) == 'SerializationIterator' and hasattr(value, '__next__')
 
+        def _process_file_data(self, file_obj):
+            """Process file object into base64 encoded dict format."""
+            # Read the file data and encode it as base64
+            file_content = file_obj.read()
+            
+            # Get filename - handle both regular files and BytesIO objects
+            filename = getattr(file_obj, 'name', "document.pdf")
+            
+            if isinstance(filename, (bytes, bytearray)):
+                filename = filename.decode('utf-8')
+            
+            # manage mimetype based on file extension
+            mimetype = guess_type(filename)[0] or 'application/pdf'
+            
+            return {
+                'data': base64.b64encode(file_content).decode('utf-8'),
+                'mimetype': mimetype,
+                'filename': os.path.basename(filename)
+            }
+
         def model_dump(self, **kwargs):
             data = super().model_dump(**kwargs)
             for key, value in data.items():
                 if key == 'file' and self._check_if_serialization_iterator(value):
                     # Direct file case
-                    data[key] = self._iterator_to_buffered_reader(value)
+                    file_obj = self._iterator_to_buffered_reader(value)
+                    data[key] = self._process_file_data(file_obj)
                 elif key == 'files' and isinstance(value, list):
                     # List of objects case
                     for index, item in enumerate(value):
                         if isinstance(item, dict) and 'file' in item:
                             if self._check_if_serialization_iterator(item['file']):
-                                data[key][index]['file'] = self._iterator_to_buffered_reader(item['file'])
+                                file_obj = self._iterator_to_buffered_reader(item['file'])
+                                data[key][index]['file'] = self._process_file_data(file_obj)
             return data
-        
-        
 
 else:
     FileCompatibleBaseModel = BaseModel