matrix_multiplication

import numpy as np
from multiprocessing import Pool, cpu_count
from concurrent.futures import ThreadPoolExecutor
import threading

class ParallelMatrixMultiplication:
    def __init__(self, n_workers=None):
        """
        Parallel Matrix Multiplication
        Time Complexity: O(n^3/p) where p is number of processors
        Space Complexity: O(n^2)
        """
        self.n_workers = n_workers or cpu_count()
    
    def multiply(self, A, B):
        """Multiply matrices A and B using parallel processing"""
        n = len(A)
        m = len(B[0])
        p = len(B)
        
        # Initialize result matrix
        C = np.zeros((n, m))
        
        # Create thread pool
        with ThreadPoolExecutor(max_workers=self.n_workers) as executor:
            # Submit tasks for each element in result matrix
            futures = []
            for i in range(n):
                for j in range(m):
                    futures.append(executor.submit(self._compute_element, A, B, i, j))
            
            # Collect results
            for future in futures:
                i, j, value = future.result()
                C[i][j] = value
        
        return C
    
    def _compute_element(self, A, B, i, j):
        """Compute single element of result matrix"""
        value = 0
        for k in range(len(B)):
            value += A[i][k] * B[k][j]
        return i, j, value

class ParallelMatrixMultiplicationWithBlocks(ParallelMatrixMultiplication):
    def __init__(self, n_workers=None, block_size=32):
        super().__init__(n_workers)
        self.block_size = block_size
    
    def multiply(self, A, B):
        """Multiply matrices using blocked parallel processing"""
        n = len(A)
        m = len(B[0])
        p = len(B)
        
        # Initialize result matrix
        C = np.zeros((n, m))
        
        # Create process pool
        with Pool(processes=self.n_workers) as pool:
            # Submit tasks for each block
            futures = []
            for i in range(0, n, self.block_size):
                for j in range(0, m, self.block_size):
                    futures.append(pool.apply_async(
                        self._compute_block,
                        (A, B, i, j, min(self.block_size, n-i), min(self.block_size, m-j))
                    ))
            
            # Collect results
            for future in futures:
                block_i, block_j, block = future.get()
                C[block_i:block_i+len(block), block_j:block_j+len(block[0])] = block
        
        return C
    
    def _compute_block(self, A, B, start_i, start_j, block_size_i, block_size_j):
        """Compute a block of the result matrix"""
        block = np.zeros((block_size_i, block_size_j))
        for i in range(block_size_i):
            for j in range(block_size_j):
                for k in range(len(B)):
                    block[i][j] += A[start_i + i][k] * B[k][start_j + j]
        return start_i, start_j, block

class ParallelMatrixMultiplicationWithStrassen(ParallelMatrixMultiplication):
    def multiply(self, A, B):
        """Multiply matrices using Strassen's algorithm with parallel processing"""
        n = len(A)
        
        # Base case
        if n <= 32:
            return super().multiply(A, B)
        
        # Split matrices into quadrants
        mid = n // 2
        A11, A12, A21, A22 = self._split_matrix(A, mid)
        B11, B12, B21, B22 = self._split_matrix(B, mid)
        
        # Create thread pool
        with ThreadPoolExecutor(max_workers=self.n_workers) as executor:
            # Compute Strassen's algorithm steps in parallel
            P1 = executor.submit(self.multiply, A11, self._subtract(B12, B22))
            P2 = executor.submit(self.multiply, self._add(A11, A12), B22)
            P3 = executor.submit(self.multiply, self._add(A21, A22), B11)
            P4 = executor.submit(self.multiply, A22, self._subtract(B21, B11))
            P5 = executor.submit(self.multiply, self._add(A11, A22), self._add(B11, B22))
            P6 = executor.submit(self.multiply, self._subtract(A12, A22), self._add(B21, B22))
            P7 = executor.submit(self.multiply, self._subtract(A11, A21), self._add(B11, B12))
            
            # Get results
            P1, P2, P3, P4, P5, P6, P7 = [f.result() for f in [P1, P2, P3, P4, P5, P6, P7]]
        
        # Combine results
        C11 = self._add(self._subtract(self._add(P5, P4), P2), P6)
        C12 = self._add(P1, P2)
        C21 = self._add(P3, P4)
        C22 = self._subtract(self._subtract(self._add(P5, P1), P3), P7)
        
        # Combine quadrants
        return self._combine_matrices(C11, C12, C21, C22)
    
    def _split_matrix(self, matrix, mid):
        """Split matrix into quadrants"""
        return (
            matrix[:mid, :mid],
            matrix[:mid, mid:],
            matrix[mid:, :mid],
            matrix[mid:, mid:]
        )
    
    def _combine_matrices(self, C11, C12, C21, C22):
        """Combine quadrants into single matrix"""
        n = len(C11) * 2
        C = np.zeros((n, n))
        mid = n // 2
        C[:mid, :mid] = C11
        C[:mid, mid:] = C12
        C[mid:, :mid] = C21
        C[mid:, mid:] = C22
        return C
    
    def _add(self, A, B):
        """Add two matrices"""
        return A + B
    
    def _subtract(self, A, B):
        """Subtract two matrices"""
        return A - B

# Example usage
if __name__ == "__main__":
    # Create sample matrices
    A = np.random.rand(4, 4)
    B = np.random.rand(4, 4)
    
    # Basic parallel multiplication
    pmm = ParallelMatrixMultiplication()
    C1 = pmm.multiply(A, B)
    print("Basic parallel multiplication result:")
    print(C1)
    
    # Blocked parallel multiplication
    pmm_block = ParallelMatrixMultiplicationWithBlocks(block_size=2)
    C2 = pmm_block.multiply(A, B)
    print("\nBlocked parallel multiplication result:")
    print(C2)
    
    # Strassen's parallel multiplication
    pmm_strassen = ParallelMatrixMultiplicationWithStrassen()
    C3 = pmm_strassen.multiply(A, B)
    print("\nStrassen's parallel multiplication result:")
    print(C3)
    
    # Verify results
    print("\nVerification (should be close to zero):")
    print(np.max(np.abs(C1 - C2)))
    print(np.max(np.abs(C1 - C3)))

Explanation

Parallel Matrix Multiplication implements different approaches to multiply matrices using parallel processing.

How it works:

Basic Parallel Multiplication:
- Uses thread pool to compute each element in parallel
- Each thread computes one element of result matrix
Blocked Parallel Multiplication:
- Divides matrices into blocks
- Uses process pool to compute blocks in parallel
- Reduces memory access overhead
Strassen's Parallel Multiplication:
- Uses Strassen's algorithm with parallel processing
- Recursively divides matrices into quadrants
- Computes intermediate products in parallel

Time Complexity:

Basic: O(n^3/p)
Blocked: O(n^3/p)
Strassen: O(n^log2(7)/p)

Space Complexity: O(n^2)

Applications:

Scientific computing
Machine learning
Computer graphics
Signal processing
Image processing

Advantages:

Faster than sequential multiplication
Better cache utilization
Scalable with number of processors
Can handle large matrices
Flexible implementation

Disadvantages:

Overhead from parallelization
Memory intensive
Complex implementation
May not be efficient for small matrices
Requires careful tuning