Spaces:
Sleeping
Sleeping
| import numpy as np | |
| from PIL import ImageDraw, Image, ImageFont | |
| from transformers import DPTFeatureExtractor, DPTForDepthEstimation | |
| import torch | |
| import streamlit as st | |
| FONTS = [ | |
| "Font: Serif - EBGaramond", | |
| "Font: Serif - Cinzel", | |
| "Font: Sans - Roboto", | |
| "Font: Sans - Lato", | |
| "Font: Display - Lobster", | |
| "Font: Display - LilitaOne", | |
| "Font: Handwriting - GreatVibes", | |
| "Font: Handwriting - Pacifico", | |
| "Font: Mono - Inconsolata", | |
| "Font: Mono - Cutive", | |
| ] | |
| def hex_to_rgb(hex): | |
| rgb = [] | |
| for i in (0, 2, 4): | |
| decimal = int(hex[i : i + 2], 16) | |
| rgb.append(decimal) | |
| return tuple(rgb) | |
| def load(): | |
| feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") | |
| model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") | |
| return model, feature_extractor | |
| model, feature_extractor = load() | |
| def compute_depth(image): | |
| inputs = feature_extractor(images=image, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| predicted_depth = outputs.predicted_depth | |
| prediction = torch.nn.functional.interpolate( | |
| predicted_depth.unsqueeze(1), | |
| size=image.size[::-1], | |
| mode="bicubic", | |
| align_corners=False, | |
| ) | |
| return prediction.cpu().numpy()[0, 0, :, :] | |
| def get_mask1( | |
| shape, x, y, caption, font=None, font_size=0.08, color=(0, 0, 0), alpha=0.8 | |
| ): | |
| img_text = Image.new("RGBA", (shape[1], shape[0]), (0, 0, 0, 0)) | |
| draw = ImageDraw.Draw(img_text) | |
| font = ImageFont.truetype(font, int(font_size * shape[1])) | |
| draw.text( | |
| (x * shape[1], (1 - y) * shape[0]), | |
| caption, | |
| fill=(*color, int(max(min(1, alpha), 0) * 255)), | |
| font=font, | |
| ) | |
| text = np.array(img_text) | |
| mask1 = np.dot(np.expand_dims(text[:, :, -1] / 255, -1), np.ones((1, 3))) | |
| return text[:, :, :-1], mask1 | |
| def get_mask2(depth_map, depth): | |
| return np.expand_dims( | |
| (depth_map[:, :] < depth * np.min(depth_map) + (1 - depth) * np.max(depth_map)), | |
| -1, | |
| ) | |
| def add_caption( | |
| img, | |
| caption, | |
| depth_map=None, | |
| x=0.5, | |
| y=0.5, | |
| depth=0.5, | |
| font_size=50, | |
| color=(255, 255, 255), | |
| font="", | |
| alpha=1, | |
| ): | |
| text, mask1 = get_mask1( | |
| img.shape, | |
| x, | |
| y, | |
| caption, | |
| font=font, | |
| font_size=font_size, | |
| color=color, | |
| alpha=alpha, | |
| ) | |
| mask2 = get_mask2(depth_map, depth) | |
| mask = mask1 * np.dot(mask2, np.ones((1, 3))) | |
| return ((1 - mask) * img + mask * text).astype(np.uint8) | |
| def load_img(uploaded_file): | |
| if uploaded_file is None: | |
| img = Image.open("pulp.jpg") | |
| default = True | |
| else: | |
| img = Image.open(uploaded_file) | |
| if img.size[0] > 800 or img.size[1] > 800: | |
| if img.size[0] < img.size[1]: | |
| new_size = (int(800 * img.size[0] / img.size[1]), 800) | |
| else: | |
| new_size = (800, int(800 * img.size[1] / img.size[0])) | |
| img = img.resize(new_size) | |
| default = False | |
| return np.array(img), compute_depth(img), default | |
| def main(): | |
| st.markdown( | |
| """ | |
| <style> | |
| label{ | |
| height: 0px !important; | |
| min-height: 0px !important; | |
| margin-bottom: 0px !important; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| st.sidebar.markdown( | |
| """ | |
| # Depth-aware text addition | |
| Add text ***inside*** an image! | |
| Upload an image, enter some text and adjust the ***depth*** where you want the text to be displayed. You can also define its location and appearance (font, color, transparency and size). | |
| Built with [PyTorch](https://pytorch.org/), Intel's [MiDaS model](https://pytorch.org/hub/intelisl_midas_v2/), [Streamlit](https://streamlit.io/), [pillow](https://python-pillow.org/) and inspired by the official [video](https://youtu.be/eTa1jHk1Lxc) of *Jenny of Oldstones* by Florence + the Machine | |
| To go further: | |
| - [blog post](https://vivien000.github.io/blog/journal/adding-text-inside-pictures-and-videos.html) | |
| - [notebook](https://colab.research.google.com/github/vivien000/depth-aware_captioning/blob/master/Depth_aware_Video_Captioning.ipynb) for videos | |
| - [examples](https://youtu.be/RtkBplRuWhg?list=PLlPB25tBWqtVhj4Ink8hl9Evc2dlIX4Jh) of videos | |
| """ | |
| ) | |
| uploaded_file = st.file_uploader("", type=["jpg", "jpeg"]) | |
| with st.spinner("Analyzing the image - Please wait a few seconds"): | |
| img, depth_map, default = load_img(uploaded_file) | |
| if default: | |
| x0, y0, alpha0, font_size0, depth0, font0 = 0.02, 0.68, 0.99, 0.07, 0.12, 4 | |
| text0 = "Pulp Fiction" | |
| else: | |
| x0, y0, alpha0, font_size0, depth0, font0 = 0.1, 0.9, 0.8, 0.08, 0.5, 0 | |
| text0 = "Enter your text here" | |
| colA, colB, colC = st.columns((13, 1, 1)) | |
| with colA: | |
| text = st.text_input("", text0) | |
| with colB: | |
| st.markdown("Color:") | |
| with colC: | |
| color = st.color_picker("", value="#FFFFFF") | |
| col1, _, col2 = st.columns((4, 1, 4)) | |
| with col1: | |
| depth = st.select_slider( | |
| "", | |
| options=[i / 100 for i in range(101)], | |
| value=depth0, | |
| format_func=lambda x: "Foreground" | |
| if x == 0.0 | |
| else "Background" | |
| if x == 1.0 | |
| else "", | |
| ) | |
| x = st.select_slider( | |
| "", | |
| options=[i / 100 for i in range(101)], | |
| value=x0, | |
| format_func=lambda x: "Left" if x == 0.0 else "Right" if x == 1.0 else "", | |
| ) | |
| y = st.select_slider( | |
| "", | |
| options=[i / 100 for i in range(101)], | |
| value=y0, | |
| format_func=lambda x: "Bottom" if x == 0.0 else "Top" if x == 1.0 else "", | |
| ) | |
| with col2: | |
| font_size = st.select_slider( | |
| "", | |
| options=[0.04 + i / 100 for i in range(0, 17)], | |
| value=font_size0, | |
| format_func=lambda x: "Small font" | |
| if x == 0.04 | |
| else "Large font" | |
| if x == 0.2 | |
| else "", | |
| ) | |
| alpha = st.select_slider( | |
| "", | |
| options=[i / 100 for i in range(101)], | |
| value=alpha0, | |
| format_func=lambda x: "Transparent" | |
| if x == 0.0 | |
| else "Opaque" | |
| if x == 1.0 | |
| else "", | |
| ) | |
| font = st.selectbox("", FONTS, index=font0) | |
| font = f"fonts/{font[6:]}.ttf" | |
| captioned = add_caption( | |
| img, | |
| text, | |
| x=x, | |
| y=y, | |
| depth=depth, | |
| depth_map=depth_map, | |
| font=font, | |
| font_size=font_size, | |
| alpha=alpha, | |
| color=hex_to_rgb(color[1:]), | |
| ) | |
| st.image(captioned) | |
| if __name__ == "__main__": | |
| main() | |