-
Notifications
You must be signed in to change notification settings - Fork 0
/
metric.py
58 lines (45 loc) · 2.06 KB
/
metric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# pip install pytorch-fid
# pip install git+https://github.com/openai/CLIP.git
import os
import subprocess
import torch
import clip
from PIL import Image
def calculate_fid(real_images_path, generated_images_path):
# Ensure the paths exist
if not os.path.exists(real_images_path):
raise FileNotFoundError(f"Real images path '{real_images_path}' does not exist.")
if not os.path.exists(generated_images_path):
raise FileNotFoundError(f"Generated images path '{generated_images_path}' does not exist.")
# Run the FID calculation
result = subprocess.run(['python', '-m', 'pytorch_fid', real_images_path, generated_images_path],
capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FID calculation failed with the following error:\n{result.stderr}")
print(result.stdout)
def calculate_clip_score(image_path, text):
# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
# Load and preprocess the image
image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
# Encode image and text
text_inputs = clip.tokenize([text]).to(device)
with torch.no_grad():
image_features = model.encode_image(image)
text_features = model.encode_text(text_inputs)
# Calculate cosine similarity
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (image_features @ text_features.T).item()
return similarity
if __name__ == '__main__' :
# FID
real_images_folder = '/home/jisoo6687/OneStepVideo/example/1'
generated_images_folder = '/home/jisoo6687/OneStepVideo/example/2'
calculate_fid(real_images_path, generated_images_path)
# CLIP
image_path = '/home/jisoo6687/OneStepVideo/example/1/Figure_1.png'
text = "A description of the image"
clip_score = calculate_clip_score(image_path, text)
print(f"CLIP Score: {clip_score}")