diff --git a/app/src/main/kotlin/com/google/ai/sample/ApiKeyDialog.kt b/app/src/main/kotlin/com/google/ai/sample/ApiKeyDialog.kt index 05756ce..0f8ff43 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ApiKeyDialog.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ApiKeyDialog.kt @@ -75,7 +75,7 @@ fun ApiKeyDialog( .horizontalScroll(rememberScrollState()), horizontalArrangement = Arrangement.spacedBy(8.dp) ) { - listOf(ApiProvider.VERCEL, ApiProvider.CEREBRAS, ApiProvider.GOOGLE, ApiProvider.MISTRAL).forEach { provider -> + listOf(ApiProvider.VERCEL, ApiProvider.CEREBRAS, ApiProvider.GOOGLE, ApiProvider.MISTRAL, ApiProvider.PUTER).forEach { provider -> FilterChip( selected = selectedProvider == provider, onClick = { @@ -97,6 +97,7 @@ fun ApiKeyDialog( ApiProvider.CEREBRAS -> "https://cloud.cerebras.ai/" ApiProvider.VERCEL -> "https://vercel.com/ai-gateway" ApiProvider.MISTRAL -> "https://console.mistral.ai/home?profile_dialog=api-keys" + ApiProvider.PUTER -> "https://puter.com/dashboard" ApiProvider.HUMAN_EXPERT -> return@Button } val intent = Intent(Intent.ACTION_VIEW, Uri.parse(url)) diff --git a/app/src/main/kotlin/com/google/ai/sample/GenerativeAiViewModelFactory.kt b/app/src/main/kotlin/com/google/ai/sample/GenerativeAiViewModelFactory.kt index ceba485..b984d1f 100644 --- a/app/src/main/kotlin/com/google/ai/sample/GenerativeAiViewModelFactory.kt +++ b/app/src/main/kotlin/com/google/ai/sample/GenerativeAiViewModelFactory.kt @@ -17,6 +17,7 @@ enum class ApiProvider { GOOGLE, CEREBRAS, MISTRAL, + PUTER, HUMAN_EXPERT } @@ -25,13 +26,15 @@ enum class ModelOption( val modelName: String, val apiProvider: ApiProvider = ApiProvider.GOOGLE, val downloadUrl: String? = null, - val size: String? = null + val size: String? = null, + val supportsScreenshot: Boolean = true ) { + PUTER_GLM5("GLM-5 (Puter)", "z-ai/glm-5", ApiProvider.PUTER, supportsScreenshot = false), MISTRAL_LARGE_3("Mistral Large 3", "mistral-large-latest", ApiProvider.MISTRAL), GPT_5_1_CODEX_MAX("GPT-5.1 Codex Max (Vercel)", "openai/gpt-5.1-codex-max", ApiProvider.VERCEL), GPT_5_1_CODEX_MINI("GPT-5.1 Codex Mini (Vercel)", "openai/gpt-5.1-codex-mini", ApiProvider.VERCEL), GPT_5_NANO("GPT-5 Nano (Vercel)", "openai/gpt-5-nano", ApiProvider.VERCEL), - GPT_OSS_120B("GPT-OSS 120B (Cerebras)", "gpt-oss-120b", ApiProvider.CEREBRAS), + GPT_OSS_120B("GPT-OSS 120B (Cerebras)", "gpt-oss-120b", ApiProvider.CEREBRAS, supportsScreenshot = false), GEMINI_3_FLASH("Gemini 3 Flash", "gemini-3-flash-preview"), GEMINI_PRO("Gemini 2.5 Pro", "gemini-2.5-pro"), GEMINI_FLASH_PREVIEW("Gemini 2.5 Flash", "gemini-2.5-flash"), @@ -39,7 +42,7 @@ enum class ModelOption( GEMINI_FLASH_LITE_PREVIEW("Gemini 2.5 Flash Lite Preview", "gemini-2.5-flash-lite-preview-06-17"), GEMINI_FLASH("Gemini 2.0 Flash", "gemini-2.0-flash"), GEMINI_FLASH_LITE("Gemini 2.0 Flash Lite", "gemini-2.0-flash-lite"), - GEMMA_3_27B_IT("Gemma 3 27B IT", "gemma-3-27b-it"), + GEMMA_3_27B_IT("Gemma 3 27B IT", "gemma-3-27b-it", supportsScreenshot = false), GEMMA_3N_E4B_IT( "Gemma 3n E4B it (offline)", "gemma-3n-e4b-it", diff --git a/app/src/main/kotlin/com/google/ai/sample/MenuScreen.kt b/app/src/main/kotlin/com/google/ai/sample/MenuScreen.kt index 2b01324..1da94c9 100644 --- a/app/src/main/kotlin/com/google/ai/sample/MenuScreen.kt +++ b/app/src/main/kotlin/com/google/ai/sample/MenuScreen.kt @@ -582,6 +582,9 @@ fun MenuScreen( withStyle(boldStyle) { append("Preview Models") } append(" could be deactivated by Google without being handed over to the final release.\n") append("ÔÇó ") + withStyle(boldStyle) { append("GLM-5 (Puter)") } + append(" is provided by Puter API. Enter your Puter Auth-Token as the API Key.\n") + append("ÔÇó ") withStyle(boldStyle) { append("Mistral Large 3") } append(" is a multimodal model (supports screenshots) and requires an API key.\n") append("ÔÇó ") diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt index ad3d5c0..bc97c47 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt @@ -285,6 +285,10 @@ class ScreenCaptureService : Service() { val result = callMistralApi(modelName, apiKey, chatHistory, inputContent) responseText = result.first errorMessage = result.second + } else if (apiProvider == ApiProvider.PUTER) { + val result = callPuterApi(modelName, apiKey, chatHistory, inputContent) + responseText = result.first + errorMessage = result.second } else { val generativeModel = GenerativeModel( modelName = modelName, @@ -955,3 +959,52 @@ private suspend fun callMistralApi(modelName: String, apiKey: String, chatHistor return Pair(responseText, errorMessage) } + +private suspend fun callPuterApi(modelName: String, apiKey: String, chatHistory: List, inputContent: Content): Pair { + var responseText: String? = null + var errorMessage: String? = null + + val currentModelOption = com.google.ai.sample.ModelOption.values().find { it.modelName == modelName } + val supportsScreenshot = currentModelOption?.supportsScreenshot ?: true + + try { + val apiMessages = mutableListOf() + + // Combine history and input, but handle system role if needed + (chatHistory + inputContent).forEach { content -> + val parts = content.parts.mapNotNull { part -> + when (part) { + is TextPart -> if (part.text.isNotBlank()) com.google.ai.sample.network.PuterTextContent(text = part.text) else null + is ImagePart -> { + if (supportsScreenshot) { + val base64Uri = com.google.ai.sample.network.PuterApiClient.bitmapToBase64DataUri(part.image) + com.google.ai.sample.network.PuterImageContent(image_url = com.google.ai.sample.network.PuterImageUrl(url = base64Uri)) + } else null + } + else -> null + } + } + if (parts.isNotEmpty()) { + val role = when (content.role) { + "user" -> "user" + "system" -> "system" + else -> "assistant" + } + apiMessages.add(com.google.ai.sample.network.PuterMessage(role = role, content = parts)) + } + } + + val requestBody = com.google.ai.sample.network.PuterRequest( + model = modelName, + messages = apiMessages + ) + + responseText = com.google.ai.sample.network.PuterApiClient.call(apiKey, requestBody) + + } catch (e: Exception) { + errorMessage = e.localizedMessage ?: "Puter API call failed" + Log.e("ScreenCaptureService", "Puter API failure", e) + } + + return Pair(responseText, errorMessage) +} diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt index ea308ef..cb1cd9b 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt @@ -785,6 +785,11 @@ class PhotoReasoningViewModel( return } + if (currentModel.apiProvider == ApiProvider.PUTER) { + reasonWithPuter(userInput, selectedImages, screenInfoForPrompt, imageUrisForChat) + return + } + if (currentModel.apiProvider == ApiProvider.CEREBRAS) { reasonWithCerebras(userInput, selectedImages, screenInfoForPrompt) return @@ -1150,6 +1155,135 @@ class PhotoReasoningViewModel( } } + private fun reasonWithPuter( + userInput: String, + selectedImages: List, + screenInfoForPrompt: String?, + imageUrisForChat: List? + ) { + val apiKey = mainActivity?.getCurrentApiKey(ApiProvider.PUTER) ?: "" + if (apiKey.isEmpty()) { + _uiState.value = PhotoReasoningUiState.Error("Puter Authentication Token (API Key) is missing") + return + } + + val context = getApplication().applicationContext + val currentModel = com.google.ai.sample.GenerativeAiViewModelFactory.getCurrentModel() + val genSettings = com.google.ai.sample.util.GenerationSettingsPreferences.loadSettings(context, currentModel.modelName) + + val userMessageText = if (!screenInfoForPrompt.isNullOrBlank()) { + "$userInput\n\n$screenInfoForPrompt" + } else { + userInput + } + + val userMessage = PhotoReasoningMessage( + text = userMessageText, + participant = PhotoParticipant.USER, + imageUris = imageUrisForChat ?: emptyList(), + isPending = false + ) + _chatState.addMessage(userMessage) + + val pendingAiMessage = PhotoReasoningMessage( + text = "", + participant = PhotoParticipant.MODEL, + isPending = true + ) + _chatState.addMessage(pendingAiMessage) + _chatMessagesFlow.value = _chatState.getAllMessages() + + _uiState.value = PhotoReasoningUiState.Loading + + // Reset tracking vars + incrementalCommandCount = 0 + streamingAccumulatedText.clear() + CommandParser.clearBuffer() + _detectedCommands.value = emptyList() + _commandExecutionStatus.value = "" + + viewModelScope.launch(Dispatchers.IO) { + try { + val apiMessages = mutableListOf() + + // Add System Message and DB Entries + val systemContent = mutableListOf() + if (_systemMessage.value.isNotBlank()) { + systemContent.add(com.google.ai.sample.network.PuterTextContent(text = _systemMessage.value)) + } + val formattedDbEntries = formatDatabaseEntriesAsText(context) + if (formattedDbEntries.isNotBlank()) { + systemContent.add(com.google.ai.sample.network.PuterTextContent(text = "Additional context from database:\n$formattedDbEntries")) + } + if (systemContent.isNotEmpty()) { + apiMessages.add(com.google.ai.sample.network.PuterMessage(role = "system", content = systemContent)) + } + + // Add Chat History (exclude the last added user message) + val allMessages = _chatState.getAllMessages() + // exclude the last pending message and the last user message we just added + val historyMessages = allMessages.filter { !it.isPending && it.participant != PhotoParticipant.ERROR }.dropLast(1) + + historyMessages.forEach { message -> + val role = if (message.participant == PhotoParticipant.USER) "user" else "assistant" + val contentParts = mutableListOf() + if (message.text.isNotBlank()) { + contentParts.add(com.google.ai.sample.network.PuterTextContent(text = message.text)) + } + if (contentParts.isNotEmpty()) { + apiMessages.add(com.google.ai.sample.network.PuterMessage(role = role, content = contentParts)) + } + } + + // Add Current User Request (Text + Images) + val currentContentParts = mutableListOf() + if (userMessageText.isNotBlank()) { + currentContentParts.add(com.google.ai.sample.network.PuterTextContent(text = userMessageText)) + } + for (bitmap in selectedImages) { + val base64Uri = com.google.ai.sample.network.PuterApiClient.bitmapToBase64DataUri(bitmap) + currentContentParts.add(com.google.ai.sample.network.PuterImageContent(image_url = com.google.ai.sample.network.PuterImageUrl(url = base64Uri))) + } + if (currentContentParts.isNotEmpty()) { + apiMessages.add(com.google.ai.sample.network.PuterMessage(role = "user", content = currentContentParts)) + } + + val requestBody = com.google.ai.sample.network.PuterRequest( + model = currentModel.modelName, + messages = apiMessages, + temperature = genSettings.temperature.toDouble(), + top_p = genSettings.topP.toDouble(), + max_tokens = 4096 + ) + + val aiResponseText = com.google.ai.sample.network.PuterApiClient.call(apiKey, requestBody) + + withContext(Dispatchers.Main) { + _uiState.value = PhotoReasoningUiState.Success(aiResponseText) + finalizeAiMessage(aiResponseText) + processCommands(aiResponseText) + saveChatHistory(context) + } + } catch (e: kotlinx.coroutines.CancellationException) { + throw e + } catch (e: Exception) { + withContext(Dispatchers.Main) { + Log.e(TAG, "Puter API call failed", e) + _uiState.value = PhotoReasoningUiState.Error(e.message ?: "Unknown error") + _chatState.replaceLastPendingMessage() + _chatState.addMessage( + PhotoReasoningMessage( + text = "Error: ${e.message}", + participant = PhotoParticipant.ERROR + ) + ) + _chatMessagesFlow.value = _chatState.getAllMessages() + saveChatHistory(context) + } + } + } + } + fun collectLiveApiMessages() { if (liveApiManager != null) { // Set system message and history when connecting @@ -1991,6 +2125,8 @@ data class MistralResponseMessage( val content: String ) + + /** * Save chat history to SharedPreferences */