Pulsefy · Robinsonchiziterem · Mar 23, 2026 · Mar 23, 2026
diff --git a/app/ai-service/main.py b/app/ai-service/main.py
@@ -0,0 +1,106 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import List, Optional
+import numpy as np
+import pandas as pd
+from sklearn.cluster import DBSCAN
+from sklearn.preprocessing import StandardScaler
+import hashlib
+
+app = FastAPI(title="Soter Fraud Detection ML Service")
+
+class Claim(BaseModel):
+    id: str
+    amount: float
+    recipientRef: str
+    evidenceRef: Optional[str] = None
+    ipAddress: Optional[str] = None
+
+class ClaimWithRisk(BaseModel):
+    id: str
+    fraudRiskScore: float
+
+class BatchAnalyzeRequest(BaseModel):
+    claims: List[Claim]
+
+@app.post("/analyze-batch", response_model=List[ClaimWithRisk])
+def analyze_batch(request: BatchAnalyzeRequest):
+    claims = request.claims
+    if not claims:
+        return []
+
+    # If too few claims to cluster effectively, return default low risk
+    if len(claims) < 3:
+        return [ClaimWithRisk(id=c.id, fraudRiskScore=0.0) for c in claims]
+
+    data = []
+    for c in claims:
+        # Encode IP
+        encoded_ip = 0
+        if c.ipAddress:
+            encoded_ip = int(hashlib.md5(c.ipAddress.encode()).hexdigest(), 16) % 10000 
+
+        # Encode evidence
+        encoded_evidence = 0
+        if c.evidenceRef:
+            encoded_evidence = int(hashlib.md5(c.evidenceRef.encode()).hexdigest(), 16) % 10000
+
+        data.append({
+            "id": c.id,
+            "amount": float(c.amount),
+            "ip_encoded": encoded_ip,
+            "evidence_encoded": encoded_evidence
+        })
+
+    df = pd.DataFrame(data)
+
+    # Scale features
+    scaler = StandardScaler()
+    scaled_features = scaler.fit_transform(df[['amount', 'ip_encoded', 'evidence_encoded']])
+
+    # Cluster using DBSCAN.
+    # eps = 0.5, min_samples = 3 (we consider tight clusters of highly similar features to be suspicious)
+    db = DBSCAN(eps=0.5, min_samples=3).fit(scaled_features)
+    labels = db.labels_
+
+    df['cluster'] = labels
+    fraud_scores = {}
+
+    for cluster_id in set(labels):
+        if cluster_id == -1:
+            # Noise / outliers - normal distinct claims or random small anomalies
+            cluster_mask = (df['cluster'] == cluster_id)
+            for idx in df[cluster_mask].index:
+                fraud_scores[df.loc[idx, 'id']] = 0.1 # low base risk
+            continue
+
+        cluster_claims = df[df['cluster'] == cluster_id]
+
+        # Calculate cluster purity on IP or Evidence
+        ip_purity = cluster_claims['ip_encoded'].nunique()
+        evidence_purity = cluster_claims['evidence_encoded'].nunique()
+
+        # If clustered mainly because of identical IP or Evidence (purity 1 for cluster size >= 3) -> very high risk
+        risk = 0.4 # Baseline for a cluster
+
+        # High risk if same IP
+        if ip_purity == 1 and cluster_claims.iloc[0]['ip_encoded'] != 0:
+            risk = 0.95
+        # Even higher if same evidence
+        elif evidence_purity == 1 and cluster_claims.iloc[0]['evidence_encoded'] != 0:
+            risk = 0.99
+
+        for idx in cluster_claims.index:
+            fraud_scores[df.loc[idx, 'id']] = risk
+
+    # Format result
+    result = []
+    for c in claims:
+        score = fraud_scores.get(c.id, 0.0)
+        result.append(ClaimWithRisk(id=c.id, fraudRiskScore=score))
+
+    return result
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/app/ai-service/requirements.txt b/app/ai-service/requirements.txt
@@ -0,0 +1,6 @@
+fastapi==0.111.0
+uvicorn==0.30.0
+pydantic==2.7.3
+scikit-learn==1.5.0
+pandas==2.2.2
+numpy==1.26.4
diff --git a/app/backend/prisma/schema.prisma b/app/backend/prisma/schema.prisma
@@ -80,6 +80,8 @@ model Claim {
 
   recipientRef String
   evidenceRef  String?
+  ipAddress    String?
+  fraudRiskScore Float?
 }
 
 model AuditLog {

diff --git a/app/backend/src/app.module.ts b/app/backend/src/app.module.ts
@@ -27,6 +27,7 @@ import { ApiKeyGuard } from './common/guards/api-key.guard';
 import { RolesGuard } from './auth/roles.guard';
 import { ObservabilityModule } from './observability/observability.module';
 import { ClaimsModule } from './claims/claims.module';
+import { FraudModule } from './fraud/fraud.module';
 import { LoggingInterceptor } from './interceptors/logging.interceptor';
 import { LoggerService } from './logger/logger.service';
 import { AllExceptionsFilter } from './common/filters/http-exception.filter';
@@ -69,6 +70,7 @@ import { AllExceptionsFilter } from './common/filters/http-exception.filter';
     CampaignsModule,
     ObservabilityModule,
     ClaimsModule,
+    FraudModule,
     NotificationsModule,
     JobsModule,
   ],

diff --git a/app/backend/src/claims/claims.service.ts b/app/backend/src/claims/claims.service.ts
@@ -54,6 +54,7 @@ export class ClaimsService {
         amount: createClaimDto.amount,
         recipientRef: createClaimDto.recipientRef,
         evidenceRef: createClaimDto.evidenceRef,
+        ipAddress: createClaimDto.ipAddress,
       },
       include: {
         campaign: true,

diff --git a/app/backend/src/claims/dto/create-claim.dto.ts b/app/backend/src/claims/dto/create-claim.dto.ts
@@ -42,4 +42,12 @@ export class CreateClaimDto {
   @IsOptional()
   @IsString()
   evidenceRef?: string;
+
+  @ApiPropertyOptional({
+    description: 'IP address of the claimant',
+    example: '192.168.1.1',
+  })
+  @IsOptional()
+  @IsString()
+  ipAddress?: string;
 }
diff --git a/app/backend/src/fraud/fraud.module.ts b/app/backend/src/fraud/fraud.module.ts
@@ -0,0 +1,11 @@
+import { Module } from '@nestjs/common';
+import { HttpModule } from '@nestjs/axios';
+import { FraudService } from './fraud.service';
+import { PrismaModule } from '../prisma/prisma.module';
+
+@Module({
+  imports: [HttpModule, PrismaModule],
+  providers: [FraudService],
+  exports: [FraudService],
+})
+export class FraudModule {}
diff --git a/app/backend/src/fraud/fraud.service.spec.ts b/app/backend/src/fraud/fraud.service.spec.ts
@@ -0,0 +1,96 @@
+import { Test, TestingModule } from '@nestjs/testing';
+import { FraudService } from './fraud.service';
+import { HttpService } from '@nestjs/axios';
+import { ConfigService } from '@nestjs/config';
+import { PrismaService } from '../prisma/prisma.service';
+import { of } from 'rxjs';
+
+describe('FraudService', () => {
+  let service: FraudService;
+  let httpService: jest.Mocked<HttpService>;
+  let prismaService: jest.Mocked<PrismaService>;
+
+  beforeEach(async () => {
+    httpService = {
+      post: jest.fn(),
+    } as unknown as jest.Mocked<HttpService>;
+
+    prismaService = {
+      claim: {
+        findMany: jest.fn(),
+        update: jest.fn(),
+      },
+    } as unknown as jest.Mocked<PrismaService>;
+
+    const module: TestingModule = await Test.createTestingModule({
+      providers: [
+        FraudService,
+        {
+          provide: HttpService,
+          useValue: httpService,
+        },
+        {
+          provide: PrismaService,
+          useValue: prismaService,
+        },
+        {
+          provide: ConfigService,
+          useValue: { get: jest.fn().mockReturnValue('http://localhost:8000') },
+        },
+      ],
+    }).compile();
+
+    service = module.get<FraudService>(FraudService);
+  });
+
+  it('should process new claims alongside historical claims', async () => {
+    const mockPendingClaims = [
+      { id: '1', amount: 100, recipientRef: 'R1', evidenceRef: 'E1', ipAddress: 'IP1', fraudRiskScore: null },
+    ];
+
+    const mockHistoricalClaims = [
+      { id: '2', amount: 50, recipientRef: 'R2', evidenceRef: 'E2', ipAddress: 'IP2', fraudRiskScore: 0.1 },
+    ];
+
+    // Mock sequential calls to findMany
+    prismaService.claim.findMany
+      .mockResolvedValueOnce(mockPendingClaims as any)
+      .mockResolvedValueOnce(mockHistoricalClaims as any);
+
+    const mlResponse = {
+      data: [
+        { id: '1', fraudRiskScore: 0.95 },
+        { id: '2', fraudRiskScore: 0.1 }
+      ]
+    };
+
+    httpService.post.mockReturnValue(of(mlResponse as any));
+    prismaService.claim.update.mockResolvedValue(null as any);
+
+    const result = await service.analyzePendingClaimsBatch();
+
+    expect(prismaService.claim.findMany).toHaveBeenCalledTimes(2);
+    expect(httpService.post).toHaveBeenCalledWith('http://localhost:8000/analyze-batch', {
+      claims: [
+        { id: '1', amount: 100, recipientRef: 'R1', evidenceRef: 'E1', ipAddress: 'IP1' },
+        { id: '2', amount: 50, recipientRef: 'R2', evidenceRef: 'E2', ipAddress: 'IP2' }
+      ]
+    });
+    // Only updates the pending claim
+    expect(prismaService.claim.update).toHaveBeenCalledTimes(1);
+    expect(prismaService.claim.update).toHaveBeenCalledWith({
+      where: { id: '1' },
+      data: { fraudRiskScore: 0.95 },
+    });
+    expect(result).toEqual(mlResponse.data);
+  });
+
+  it('should return empty array if no pending claims are found', async () => {
+    prismaService.claim.findMany.mockResolvedValue([]);
+
+    const result = await service.analyzePendingClaimsBatch();
+
+    expect(result).toEqual([]);
+    expect(httpService.post).not.toHaveBeenCalled();
+  });
+});
diff --git a/app/backend/src/fraud/fraud.service.ts b/app/backend/src/fraud/fraud.service.ts
@@ -0,0 +1,79 @@
+import { Injectable, Logger } from '@nestjs/common';
+import { HttpService } from '@nestjs/axios';
+import { ConfigService } from '@nestjs/config';
+import { PrismaService } from '../prisma/prisma.service';
+import { firstValueFrom } from 'rxjs';
+
+@Injectable()
+export class FraudService {
+  private readonly logger = new Logger(FraudService.name);
+  private readonly aiServiceUrl: string;
+
+  constructor(
+    private readonly httpService: HttpService,
+    private readonly prisma: PrismaService,
+    private readonly configService: ConfigService,
+  ) {
+    this.aiServiceUrl = this.configService.get<string>('AI_SERVICE_URL') || 'http://localhost:8000';
+  }
+
+  async analyzePendingClaimsBatch() {
+    this.logger.log('Starting batch fraud analysis for pending claims');
+
+    // New claims
+    const pendingClaims = await this.prisma.claim.findMany({
+      where: { fraudRiskScore: null },
+      take: 100,
+    });
+
+    if (pendingClaims.length === 0) {
+      this.logger.log('No pending claims to analyze');
+      return [];
+    }
+
+    // Historical claims for context (last 500)
+    const historicalClaims = await this.prisma.claim.findMany({
+      where: { fraudRiskScore: { not: null } },
+      take: 500,
+      orderBy: { createdAt: 'desc' },
+    });
+
+    const combinedClaims = [...pendingClaims, ...historicalClaims];
+
+    try {
+      // Map to the format expected by the AI service
+      const payload = {
+        claims: combinedClaims.map(c => ({
+          id: c.id,
+          amount: Number(c.amount),
+          recipientRef: c.recipientRef,
+          evidenceRef: c.evidenceRef,
+          ipAddress: c.ipAddress,
+        }))
+      };
+
+      const response = await firstValueFrom(
+        this.httpService.post(`${this.aiServiceUrl}/analyze-batch`, payload),
+      );
+
+      const analyzedMap = new Map(response.data.map((c: any) => [c.id, c.fraudRiskScore]));
+
+      // Update only the originally pending claims
+      for (const pending of pendingClaims) {
+        const newScore = analyzedMap.get(pending.id);
+        if (newScore !== undefined) {
+          await this.prisma.claim.update({
+            where: { id: pending.id },
+            data: { fraudRiskScore: newScore },
+          });
+        }
+      }
+
+      this.logger.log(`Successfully analyzed and updated ${pendingClaims.length} claims against ${historicalClaims.length} historical records.`);
+      return response.data;
+    } catch (error) {
+      this.logger.error('Failed to analyze claims batch', error);
+      throw error;
+    }
+  }
+}