UnsupervisedTwoStreamDF/us_labelling.py at main · arin-dev/UnsupervisedTwoStreamDF · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
"""
Unsupervised Deepfake Video Labeling Script

This script implements an unsupervised learning approach to label an unlabeled dataset of videos as either 'deepfake' or 'real'. It is designed to handle large datasets without the need for pre-trained models or human intervention.

**Objectives**:
- **Data Loading**: Load and preprocess 12 consecutive face-cropped frames per video.
- **Feature Extraction**: Extract visual features from the frames, including:
  - Texture features using Local Binary Patterns (LBP).
  - Color histograms from each color channel.
  - Frequency domain features using Fast Fourier Transform (FFT).
- **Feature Normalization and Dimensionality Reduction**:
  - Normalize the extracted features using StandardScaler.
  - Reduce dimensionality with Principal Component Analysis (PCA) while retaining 95% of the variance.
- **Clustering**:
  - Apply KMeans clustering to group videos into two clusters, representing 'deepfake' and 'real' videos.
- **Label Assignment**:
  - Assign predicted labels to each video based on the clustering results.

**Instructions**:
- Replace the placeholder `load_video_frames()` function with your code to load the 12 face-cropped frames per video.
- Ensure all dependencies are installed:
  - `numpy`
  - `opencv-python` (`cv2`)
  - `scikit-learn`
  - `scikit-image` (for LBP feature extraction)

**Usage**:
- Execute the script to process your dataset and generate predicted labels for each video.
- Optionally, modify or extend feature extraction methods to better suit your data.

**Note**:
- This approach relies solely on classical computer vision techniques and unsupervised learning algorithms.
- It is suitable for scenarios where labeled data is unavailable and computational resources are limited.

**Disclaimer**:
- The accuracy of the labeling depends on the quality of feature extraction and the inherent separability of the data.
- Further refinement and validation may be necessary to achieve desired performance levels.

"""

# Import necessary libraries
import numpy as np
import cv2
import os
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
# You can add more imports if needed

# --------------------------- User Input Section --------------------------- #
# Define the path to your dataset or placeholder for your data loading function

def load_video_frames():
    """
    Placeholder function: Replace this function with your code to load
    the 12 consecutive face-cropped frames per video.

    Returns:
        data_dict (dict): A dictionary where keys are video IDs and values are lists of frames (numpy arrays).
    """
    data_dict = {}

    # Example structure:
    # data_dict['video1'] = [frame1, frame2, ..., frame12]
    # data_dict['video2'] = [frame1, frame2, ..., frame12]
    # Each frame is a numpy array representing the image.

    # TODO: Implement your data loading and preprocessing here.

    return data_dict

# -------------------------------------------------------------------------- #

def extract_features(frames):
    """
    Extract features from the list of frames for a single video.

    Args:
        frames (list): List of numpy arrays, each representing a frame.

    Returns:
        features (np.array): Feature vector for the video.
    """
    # Initialize lists to hold features
    texture_features = []
    color_features = []
    frequency_features = []
    # Add more feature lists as needed

    for frame in frames:
        # Convert frame to grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # ---------------------- Texture Feature Extraction ---------------------- #
        # Example: Local Binary Patterns (LBP)
        lbp = local_binary_pattern(gray)
        texture_features.append(lbp)

        # ---------------------- Color Feature Extraction ------------------------ #
        # Example: Color histogram
        color_hist = color_histogram(frame)
        color_features.append(color_hist)

        # ------------------- Frequency Domain Feature Extraction ---------------- #
        # Example: FFT of the grayscale image
        freq_feat = frequency_domain_features(gray)
        frequency_features.append(freq_feat)

        # You can add more feature extraction methods here

    # Aggregate features over all frames (e.g., by averaging)
    texture_features = np.mean(texture_features, axis=0)
    color_features = np.mean(color_features, axis=0)
    frequency_features = np.mean(frequency_features, axis=0)

    # Concatenate all features into a single feature vector
    features = np.concatenate([texture_features, color_features, frequency_features])

    return features

# Implementations of feature extraction functions
def local_binary_pattern(gray_image):
    """
    Compute Local Binary Pattern (LBP) features.

    Args:
        gray_image (np.array): Grayscale image.

    Returns:
        hist (np.array): LBP histogram.
    """
    from skimage.feature import local_binary_pattern

    # Parameters for LBP
    radius = 1
    n_points = 8 * radius

    lbp = local_binary_pattern(gray_image, n_points, radius, method='uniform')
    # Compute histogram of LBP
    (hist, _) = np.histogram(lbp.ravel(),
                             bins=np.arange(0, n_points + 3),
                             range=(0, n_points + 2))
    # Normalize the histogram
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)

    return hist

def color_histogram(image):
    """
    Compute color histogram features.

    Args:
        image (np.array): Color image.

    Returns:
        hist (np.array): Concatenated color histograms for each channel.
    """
    # Compute histograms for each color channel
    chans = cv2.split(image)
    colors = ("b", "g", "r")
    features = []

    for (chan, color) in zip(chans, colors):
        hist = cv2.calcHist([chan], [0], None, [256], [0, 256])
        hist = cv2.normalize(hist, hist).flatten()
        features.extend(hist)

    return np.array(features)

def frequency_domain_features(gray_image):
    """
    Compute frequency domain features using FFT.

    Args:
        gray_image (np.array): Grayscale image.

    Returns:
        features (np.array): Flattened magnitude spectrum.
    """
    f = np.fft.fft2(gray_image)
    fshift = np.fft.fftshift(f)
    magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1e-7)

    # Flatten and normalize the spectrum
    features = magnitude_spectrum.flatten()
    features = features / np.linalg.norm(features)

    return features

# Main processing function
def main():
    # Load the dataset
    data_dict = load_video_frames()  # Replace this with your data loading function

    # List to hold feature vectors and corresponding video IDs
    feature_list = []
    video_ids = []

    # Process each video
    for video_id, frames in data_dict.items():
        print(f"Processing video: {video_id}")
        features = extract_features(frames)
        feature_list.append(features)
        video_ids.append(video_id)

    # Convert feature_list to a NumPy array
    X = np.array(feature_list)

    # --------------------------- Feature Normalization --------------------------- #
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # ------------------------- Dimensionality Reduction -------------------------- #
    pca = PCA(n_components=0.95)  # Retain 95% of variance
    X_pca = pca.fit_transform(X_scaled)

    # ------------------------------- Clustering ---------------------------------- #
    # Choose the number of clusters (K=2 for deepfake and real)
    kmeans = KMeans(n_clusters=2, random_state=42)
    cluster_labels = kmeans.fit_predict(X_pca)

    # ------------------------------ Output Results ------------------------------- #
    # Map cluster labels to video IDs
    results = dict(zip(video_ids, cluster_labels))

    # Print or save the results
    for video_id, label in results.items():
        if label == 0:
            print(f"Video {video_id}: Predicted Label - Real")
        else:
            print(f"Video {video_id}: Predicted Label - Deepfake")

    # Optionally, save results to a file
    # with open('video_labels.csv', 'w') as f:
    #     for video_id, label in results.items():
    #         f.write(f"{video_id},{label}\n")

if __name__ == "__main__":
    main()