Issue
I have this code which works fine:
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import numpy as np
import os
from PIL import Image
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
# Set directories for generation images and edit images
base_image_dir = os.path.join("IMG_4297.png")
mask_dir = os.path.join("masks")
edit_image_dir = os.path.join("03_edits")
# Point to your downloaded SAM model
sam_model_filepath = "../segment-anything/segment_anything/sam_vit_h_4b8939.pth"
#sam_model_filepath = "./sam_vit_h_4b8939.pth"
# Initiate SAM model
sam = sam_model_registry["default"](checkpoint=sam_model_filepath)
# Function to display mask using matplotlib
def show_mask(mask, ax):
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
# Function to display where we've "clicked"
def show_points(coords, labels, ax, marker_size=375):
pos_points = coords[labels == 1]
neg_points = coords[labels == 0]
ax.scatter(
pos_points[:, 0],
pos_points[:, 1],
color="green",
marker="*",
s=marker_size,
edgecolor="white",
linewidth=1.25,
)
ax.scatter(
neg_points[:, 0],
neg_points[:, 1],
color="red",
marker="*",
s=marker_size,
edgecolor="white",
linewidth=1.25,
)
# Load chosen image using opencv
image = cv2.imread("./IMG_4297.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Display our chosen image
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.axis("on")
plt.show()
# Set the pixel coordinates for our "click" to assign masks
input_point = np.array([[525, 325]])
input_label = np.array([1])
# Display the point we've clicked on
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_points(input_point, input_label, plt.gca())
plt.axis("on")
plt.show()
# Initiate predictor with Segment Anything model
predictor = SamPredictor(sam)
predictor.set_image(image)
# Use the predictor to gather masks for the point we clicked
masks, scores, logits = predictor.predict(
point_coords=input_point,
point_labels=input_label,
multimask_output=True,
)
# Check the shape - should be three masks of the same dimensions as our image
masks.shape
# Display the possible masks we can select along with their confidence
for i, (mask, score) in enumerate(zip(masks, scores)):
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_mask(mask, plt.gca())
show_points(input_point, input_label, plt.gca())
plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
plt.axis("off")
plt.show()
# Choose which mask you'd like to use
chosen_mask = masks[1]
# We'll now reverse the mask so that it is clear and everything else is white
chosen_mask = chosen_mask.astype("uint8")
chosen_mask[chosen_mask != 0] = 255
chosen_mask[chosen_mask == 0] = 1
chosen_mask[chosen_mask == 255] = 0
chosen_mask[chosen_mask == 1] = 255
# create a base blank mask
width = 1512
height = 1512
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1)) # create an opaque image mask
# Convert mask back to pixels to add our mask replacing the third dimension
pix = np.array(mask)
pix[:, :, 3] = chosen_mask
# Convert pixels back to an RGBA image and display
new_mask = Image.fromarray(pix, "RGBA")
new_mask
# We'll save this mask for re-use for our edit
new_mask.save(os.path.join(mask_dir, "new_mask.png"))
But I am trying to use the second half with a slightly different program / AI language model:
import numpy as np
from lang_sam.utils import draw_image
from PIL import Image
from lang_sam import LangSAM
from heic2png import HEIC2PNG
if __name__ == '__main__':
heic_img = HEIC2PNG('/Users/Downloads/IMG_4316.heic', quality=70) # Specify the quality of the converted image
heic_img.save() # The converted image will be saved as `test.png`
model = LangSAM()
image_pil = Image.open("/Users/Downloads/IMG_4316.png").convert("RGB")
text_prompt = "wall"
masks, boxes, phrases, logits = model.predict(image_pil, text_prompt)
masks.shape
labels = [f"{phrase} {logit:.2f}" for phrase, logit in zip(phrases, logits)]
image_array = np.asarray(image_pil)
image = draw_image(image_array, masks, boxes, labels)
image = Image.fromarray(np.uint8(image)).convert("RGB")
image.show()
chosen_mask = np.array(image).astype("uint8")
chosen_mask[chosen_mask != 0] = 255
chosen_mask[chosen_mask == 0] = 1
chosen_mask[chosen_mask == 255] = 0
chosen_mask[chosen_mask == 1] = 255
# create a base blank mask
width = 3024
height = 3024
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1)) # create an opaque image mask
# Convert mask back to pixels to add our mask replacing the third dimension
pix = np.array(mask)
pix[:, :, 3] = chosen_mask
# Convert pixels back to an RGBA image and display
new_mask = Image.fromarray(pix, "RGBA")
new_mask.show()
new_mask.save()
I believe that the problem lies within the format of the converted image on this line:
pix[:, :, 3] = chosen_mask
Is there a transformation or some operation I need to perform on chosen_mask
to make to image work here?
The full error is:
> Traceback (most recent call last):
File "/Users/Desktop/code/lang-segment-anything/app.py", line 112, in <module>
pix[:, :, 2] = chosen_mask
~~~^^^^^^^^^
ValueError: could not broadcast input array from shape (3024,3024,3) into shape (3024,3024)
~~~^^^^^^^^^
Solution
When you do this:
width = 3024
height = 3024
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1)) # create an opaque image mask
# Convert mask back to pixels to add our mask replacing the third dimension
pix = np.array(mask)
you are creating a 3024x3024 image with 4 channels (i.e. RGBA), so your Numpy array pix
will have a shape of [3024, 3024, 4].
When you do this:
image = Image.fromarray(np.uint8(image)).convert("RGB")
chosen_mask = np.array(image).astype("uint8")
you make an RGB image with 3 channels (i.e. RGB), so your Numpy array chosen_mask
will have a shape of [3024, 3024, 3].
So, the problem is when you do this:
pix[:, :, 3] = chosen_mask
you are saying you want to set the Alpha channel at every pixel location in pix
to the 3 RGB channels at that location in chosen_mask
and that can't work... you cannot put the R and the G and the B channel from chosen_mask
into the Alpha channel because there's only one space in the Alpha channel at each location.
So you either need to make chosen_mask
a single channel image by creating it in L
mode:
image = Image.fromarray(np.uint8(image)).convert("L")
chosen_mask = np.array(image).astype("uint8")
or, you need to chose which one of the RGB channels from chosen_mask
it is that you want to put into pix
's A channel, e.g. just put the Green channel from chosen_mask
into pix
's A channel:
pix[:, :, 3] = chosen_mask[..., 1]
Answered By - Mark Setchell
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.