From acdeee5c38d98f8ae8def5e9688f0f773c157fe8 Mon Sep 17 00:00:00 2001
From: Matin Salami <s306400@studenti.polito.it>
Date: Wed, 4 Jun 2025 17:10:03 +0200
Subject: [PATCH 1/2] done

---
 flaskr/helpers.py      | 231 +++++++++++++++++++++++------------------
 flaskr/musicProject.py |  94 +++++++++++++----
 2 files changed, 204 insertions(+), 121 deletions(-)

diff --git a/flaskr/helpers.py b/flaskr/helpers.py
index 3f872c8..f079589 100644
--- a/flaskr/helpers.py
+++ b/flaskr/helpers.py
@@ -1,14 +1,16 @@
 import ffmpeg
 from scipy.signal import butter, lfilter
 import scipy.io.wavfile as wav
+from scipy.signal import wiener
 
 
 import numpy as np
-# from pydub import AudioSegment
 import os
 _AUDIO_FILE_ = "audio.wav"
 APP_ROOT = os.path.dirname(os.path.abspath(__file__))
-#UPLOAD_FOLDER = os.path.join(APP_ROOT, 'static', "result.mp4")
+
+UPLOAD_FOLDER = os.path.join(APP_ROOT, "static")
+pathMaker = lambda prefix, fileName: os.path.join(UPLOAD_FOLDER, f"{prefix}.{fileName.split('.')[-1]}")
 
 
 def upscaler(tw, th, readFrom, writeTo):
@@ -27,8 +29,7 @@ def makePhoneLike(filterOrder, sideGain, readFrom, writeTo):
     if os.path.exists(_AUDIO_FILE_):
         os.remove(_AUDIO_FILE_)
 
-    os.system(f'ffmpeg -i {readFrom} -af "pan=2c|c0={sideGain}*c0|c1={1-sideGain}*c1" {_AUDIO_FILE_}')
-    
+    os.system(f'ffmpeg -i "{readFrom}" -af "pan=2c|c0={sideGain}*c0|c1={1-sideGain}*c1" {_AUDIO_FILE_}')
     sample_rate, samples_original = wav.read(_AUDIO_FILE_)
     num, denom = butter(filterOrder,  [800, 3400] , "bandpass", fs=sample_rate) 
     ot = lfilter(num, denom, samples_original)
@@ -39,109 +40,71 @@ def makePhoneLike(filterOrder, sideGain, readFrom, writeTo):
     # ot = ffmpeg.output(prob, au, "video.mp4")
     return 
 
-def denoise_and_delay(readFrom, writeTo, noise_power_db, delay_ms, delay_gain_percent):
-    
-    global _AUDIO_FILE_
-    vid = ffmpeg.input(readFrom).video
-    au = ffmpeg.input(readFrom).audio
-    info = ffmpeg.probe(readFrom, cmd="ffprobe")
-    
-    noChannels = info["streams"][1]["channels"] 
-    audioStream = au.output(_AUDIO_FILE_, ac=noChannels).overwrite_output().run()
-    
-    sample_rate, samples_original = wav.read(_AUDIO_FILE_)
-    
-    filter_order = 4
-    low_cutoff = 300
-    high_cutoff = 3400
-    
-    noise_factor = 10 ** (noise_power_db / 20)
-    
-    b, a = butter(filter_order, [low_cutoff, high_cutoff], btype='bandpass', fs=sample_rate)
-    
-    
-    if len(samples_original.shape) > 1:  # For stereo audio
-        denoised_audio = np.zeros_like(samples_original)
-        for channel in range(samples_original.shape[1]):
-            denoised_audio[:, channel] = lfilter(b, a, samples_original[:, channel])
-    else:  # For mono audio
-        denoised_audio = lfilter(b, a, samples_original)
-    
-    # reducing noise
-    denoised_audio = denoised_audio * noise_factor
-    
-    
-    delay_samples = int((delay_ms / 1000) * sample_rate)
-    delay_gain = delay_gain_percent / 100
-    
-    
-    if len(denoised_audio.shape) > 1:  # For stereo audio
-        delayed_audio = np.zeros_like(denoised_audio)
-        for channel in range(denoised_audio.shape[1]):
-            delayed_channel = np.zeros_like(denoised_audio[:, channel])
-            delayed_channel[delay_samples:] = denoised_audio[:-delay_samples, channel] if delay_samples < len(denoised_audio) else 0
-            #scale the delayed audio        
-            delayed_channel = delayed_channel * delay_gain
-            # Mix with the original
-            delayed_audio[:, channel] = denoised_audio[:, channel] + delayed_channel
-    else:  # For mono audio
-        delayed_audio = np.zeros_like(denoised_audio)
-        delayed_audio[delay_samples:] = denoised_audio[:-delay_samples] if delay_samples < len(denoised_audio) else 0
-        delayed_audio = delayed_audio * delay_gain
-        delayed_audio = denoised_audio + delayed_audio
-    
-    # Normalize to prevent clipping
-    max_val = np.max(np.abs(delayed_audio))
-    if max_val > 32767:  # Max value for 16-bit audio
-        delayed_audio = delayed_audio * (32767 / max_val)
-    
-    # Convert back to int16 for saving
-    delayed_audio = np.asarray(delayed_audio, dtype=np.int16)
-    
-    # Save the processed audio
-    wav.write(_AUDIO_FILE_, sample_rate, delayed_audio)
-    
-    # Combine processed audio with the original video
-    auInpF = ffmpeg.input(_AUDIO_FILE_)
-    ffmpeg.output(vid, auInpF, writeTo).overwrite_output().run()
-    
-    return
 
+def denoise_and_delay( noise_power, delay_ms, delay_gain, readFrom, writeTo):
+   global _AUDIO_FILE_
+   vid = ffmpeg.input(readFrom).video
+   au = ffmpeg.input(readFrom).audio
+   info = ffmpeg.probe(readFrom , cmd = "ffprobe")
+   noChannels = info ["streams"][1]["channels"]
+   au.output(_AUDIO_FILE_ , ac = noChannels).overwrite_output().run()
+   sample_rate , sample_originals = wav.read(_AUDIO_FILE_)
+   sample_float = sample_originals.astype(np.float64) #converting to float since we need higher percision 
 
+   noise_reduction_streanght = max(3,min(15,int(abs(noise_power)/2)))
 
+   denoised_audio = wiener (sample_float , mysize = noise_reduction_streanght)
 
-def applyGainCompression(threshold_db, limiter_db, readFrom, writeTo):
-    
-    stream = ffmpeg.input(readFrom)
-    
-    # Handle threshold parameter (ensure it's negative)
-    abs_threshold = abs(threshold_db) if threshold_db < 0 else threshold_db
-    threshold_point = f"-{abs_threshold}/-{abs_threshold}"
-    
-    # Handle limiter parameter
-    mid_point = f"-{abs_threshold/2}/-{abs_threshold+limiter_db/2}"
-    limiter_point = f"0/-{limiter_db}"
-    
-    # Combine points to create the compression curve
-    points = f"{threshold_point}|{mid_point}|{limiter_point}"
-    
-    # Apply compression filter using 'compand'
-    compressed_audio = stream.audio.filter(
-        'compand',
-        attacks='0.01',
-        decays='0.5',
-        points=points,
-        gain='0'
-    )
-    
-    # Combine original video with compressed audio
-    result = ffmpeg.output(stream.video, compressed_audio, writeTo).overwrite_output().run()
+   delay_samples = int ((delay_ms/1000) * sample_rate )
+   delay_gain_decimal = delay_gain / 100.0
+
+   delayed_signal = np.zeros_like(denoised_audio)
+   if delay_samples < len(denoised_audio) :
+       delayed_signal[delay_samples:] = denoised_audio[:-delay_samples]
+       delayed_audio = denoised_audio + delayed_signal * delay_gain_decimal
 
+   if np.max(np.abs(delayed_audio)) > 0:
+       delayed_audio = delayed_audio * (32767 / np.max(np.abs(delayed_audio))*0.9)
 
+   data2 = np.asarray(delayed_audio , dtype = np.int16)
+   wav.write(_AUDIO_FILE_ , sample_rate , data2)
+   ffmpeg.output(vid,ffmpeg.input(_AUDIO_FILE_) , writeTo).overwrite_output().run()
+   return
 
+def frameInterpolation (targetFps , readFrom , writeTo):
+    stream = ffmpeg.input(readFrom)
 
+    vid = stream.video 
+    audio = stream.audio
+
+    info = ffmpeg.probe(readFrom , cmd = "ffprobe")
+    video_stream = next ((s for s in info['streams'] if s['codec_type'] == 'video' ), None)
+    if 'avg_frame_rate' in video_stream:
+        frame_rate_fraction = video_stream['avg_frame_rate']
+        if '/' in frame_rate_fraction:
+            num,den = map(int , frame_rate_fraction.split('/'))
+            original_fps = num / den if den != 0 else 0
+        else:
+            original_fps = float(frame_rate_fraction)
+    else:
+        original_fps = 30
+
+    if targetFps >= original_fps:
+        interpolated = vid.filter('minterpolate' ,
+                                  fps = targetFps,
+                                  mi_mode = 'mci',
+                                  mc_mode = 'aobmc',
+                                  me_mode = 'bidir')
+        ffmpeg.output(interpolated , audio , writeTo ,
+                      vcodec = 'libx264',
+                      acodec = 'aac').overwrite_output().run()
+    else:
+        decreased = vid.filter('fps',fps = targetFps)
+        ffmpeg.output(decreased , audio , writeTo , vcodec = 'libx264' , acodec = 'aac').overwrite_output().run()
+        
 
 
+    return
 
 
 def voiceEnhancement(preEmphasisAlpha, filterOrder, readFrom, writeTo):
@@ -212,6 +175,7 @@ def voiceEnhancement(preEmphasisAlpha, filterOrder, readFrom, writeTo):
 
 
 
+
 def applyGrayscale(readFrom, writeTo):
     # Load video input
     stream = ffmpeg.input(readFrom)
@@ -238,9 +202,76 @@ def colorInvert(readFrom, writeTo):
         acodec='copy'  # Copy audio to avoid re-encoding
     )
     # Run the ffmpeg command
-    out, err = output.overwrite_output().run(capture_stdout=True, capture_stderr=True)
-    print("FFmpeg stdout:", out)
-    print("FFmpeg stderr:", err)
+    output.overwrite_output().run()
 
     return
 
+
+
+def makeCarLike(sideGain_db, filterOrder, readFrom, writeTo):
+    global _AUDIO_FILE_
+    
+    # Extract video stream
+    vid = ffmpeg.input(readFrom).video
+    
+    # Clean up existing audio file
+    if os.path.exists(_AUDIO_FILE_):
+        os.remove(_AUDIO_FILE_)
+        
+    # Convert sideGain from dB to linear
+    sideGain_linear = 10 ** (sideGain_db / 20)
+    
+    # Calculate pan coefficients for stereo enhancement
+    # In fact, for each output channel (new left or new right), we take 50% of the original left and 50% of the original right, 
+    # then push them outward or inward depending on sideGain_linear.
+    # This widens or narrows the stereo image.
+    left_c0 = 0.5 + sideGain_linear * 0.5
+    left_c1 = 0.5 - sideGain_linear * 0.5
+    right_c0 = 0.5 - sideGain_linear * 0.5
+    right_c1 = 0.5 + sideGain_linear * 0.5
+
+    # Extract audio with stereo enhancement
+    os.system(f'ffmpeg -i "{readFrom}" -af "pan=2c|c0={left_c0}*c0+{left_c1}*c1|c1={right_c0}*c0+{right_c1}*c1" {_AUDIO_FILE_}')
+    
+    # Read audio file. samples can be numpy array of shape (num_samples,) or is it is stereo it is (num_samples,2)
+    # Each element is a signed 16-bit integer (int16) in the range [–32768, +32767].
+    sample_rate, samples = wav.read(_AUDIO_FILE_)
+    
+    # Convert to float for processing(as lfilter works best with float numbers
+    samples_float = samples.astype(np.float32) / 32768.0
+    
+    # Apply low-pass filter at 10000 Hz as specified
+    # Butterworth design routines expect a cutoff in the range [0, 1], where 1 corresponds to Nyquist.
+    nyquist = sample_rate / 2
+    normalized_cutoff = 10000 / nyquist
+    num, denom = butter(filterOrder, normalized_cutoff, btype="low")
+    
+    # Process each channel separately if stereo[Processes left channel separately: samples_float[:, 0],Processes right channel separately: samples_float[:, 1]]
+    # For mono just apply the filter
+    if len(samples_float.shape) == 2:
+        filtered_samples = np.zeros_like(samples_float)
+        for channel in range(samples_float.shape[1]):
+            filtered_samples[:, channel] = lfilter(num, denom, samples_float[:, channel])
+    else:
+        filtered_samples = lfilter(num, denom, samples_float)
+    
+    
+    # Convert back to int16 with proper scaling
+    max_val = np.max(np.abs(filtered_samples))
+    if max_val > 1.0:
+        filtered_samples = filtered_samples / max_val
+    
+    # Final outputed sample which is then written back to the wav file
+    output_samples = (filtered_samples * 32767).astype(np.int16)
+    
+    
+    # Write processed audio back
+    wav.write(_AUDIO_FILE_, sample_rate, output_samples)
+    
+    # Combine audio and video
+    auInpF = ffmpeg.input(_AUDIO_FILE_)
+    ffmpeg.output(vid, auInpF, writeTo).overwrite_output().run()
+    
+    return
+        
+    
diff --git a/flaskr/musicProject.py b/flaskr/musicProject.py
index 8620894..585c7e2 100644
--- a/flaskr/musicProject.py
+++ b/flaskr/musicProject.py
@@ -1,94 +1,146 @@
 from flask import Flask, request, render_template, make_response, send_from_directory
 import os
-from helpers import upscaler, makePhoneLike , denoise_and_delay, applyGainCompression, applyGrayscale, colorInvert, voiceEnhancement
 
-app = Flask(__name__, static_folder="static",instance_relative_config=True)
+from helpers import upscaler, makePhoneLike , denoise_and_delay, applyGrayscale, colorInvert, voiceEnhancement,  pathMaker, makeCarLike
+app = Flask(__name__, static_folder="static", instance_relative_config=True)
+
+
 _UPLOADED_ = 0
+
+
+"""
+
+Once a file is uploaded, we save the name of that file in these 2 variables.
+_FILE_NAME_ is then changed accordingly as we start to apply our filters
+
+"""
 _FILE_NAME_ = ""
+_INITIAL_FILE_NAME_ = ""
+
+# Filter configs are stored in here
 _CONFIGS_ = []
 
 APP_ROOT = os.path.dirname(os.path.abspath(__file__))
 UPLOAD_FOLDER = os.path.join(APP_ROOT, "static")
 
+
+# Project template is served at "/"
 @app.route("/")
-def hello_world():
+def landingPage():
     return render_template('project_template.html')
 
 
+"""
+Upload: it's not possible to upload a file if one has already been uploaded. 
+"""
 @app.route("/post/", methods=["POST"])
 def uploadedVideo():
-    global _UPLOADED_, _FILE_NAME_
+    global _UPLOADED_, _FILE_NAME_, _INITIAL_FILE_NAME_
     if _UPLOADED_ == 1:
-        mr = make_response(render_template("project_template.html"), 404)
+        # Return 403 status code to indicate forbidden operation
+        mr = make_response(render_template("project_template.html"), 403)
         mr.headers["res"] = "You've already uploaded a file!"
         return mr
     else:
+        # saving the file in our folder
         request.files["file"].save(request.files["file"].filename)
         _UPLOADED_ = 1
+        # assigning file names! 
         _FILE_NAME_ = request.files["file"].filename
-        print(f"File uploaded: {_FILE_NAME_}")#
+        _INITIAL_FILE_NAME_ = _FILE_NAME_
     return render_template("project_template.html")
 
 
-
+"""
+Delete: Delete file from folder if there's been an upload! 
+"""
 @app.route("/delete/", methods=["DELETE"])
 def deletedVideo():
-    global _UPLOADED_, _FILE_NAME_
+    global _UPLOADED_, _FILE_NAME_, _INITIAL_FILE_NAME_
     if _UPLOADED_ == 1:
         _UPLOADED_ = 0
-        os.remove(_FILE_NAME_)
-        _FILE_NAME_ = ""        
+        # removing the original uploaded file
+        os.remove(_INITIAL_FILE_NAME_)
+        _FILE_NAME_ = ""
+        _INITIAL_FILE_NAME_ = ""        
+        # 204 to indicate the success of an op but no content to return
         mr = make_response(render_template("project_template.html"), 204)
         mr.headers["res"] = "file deleted!"
         return mr
     else: 
         return render_template('project_template.html')
 
+"""
+Configure Filter: appends the desired filters to the CONFIGS list. 
+"""
 @app.route("/configurefilter/", methods=["POST"])
 def saveConfiguration():
-    global _CONFIGS_
+    global _CONFIGS_, _FILE_NAME_
+    # We want to apply the filters to the initial file
+    if _INITIAL_FILE_NAME_:  
+        _FILE_NAME_ = _INITIAL_FILE_NAME_
+    # clearing all previous filters! 
     _CONFIGS_.clear()
     for l in request.get_json():
-        _CONFIGS_.append([l["name"], {v["name"]: v["value"] for v in l["props"]}]) ## For easier use!
+        # Coupling between filter name and parameters -- using dict comprehension
+        _CONFIGS_.append([l["name"], {v["name"]: v["value"] for v in l["props"]}]) 
     return render_template('project_template.html')
     
 
 @app.route("/applyfilter/", methods=["GET"])
 def applyFilter():
-    global _CONFIGS_, _FILE_NAME_
+    global _CONFIGS_, _FILE_NAME_, _INITIAL_FILE_NAME_
+    # should not be able to apply a filter if nothing has been uploaded! 
     if not _CONFIGS_ or not _FILE_NAME_:
         mr = make_response(render_template("project_template.html"), 403)
         mr.headers["res"] = "Missing file or config!"
         return mr   
     else:
         configSize = len(_CONFIGS_)
+        # Going through the config couplings and applying each filter
         for (i, (k, v)) in enumerate(_CONFIGS_):
+            """ 
+            We cannot read and write to a file at the same time, so we're forced 
+            to save some intermediary files 
+            """  
             prevFileName = _FILE_NAME_
             if i == (configSize - 1):
-                _FILE_NAME_ = os.path.join(UPLOAD_FOLDER, f"result.{_FILE_NAME_.split('.')[-1]}")
+                # The resulting file is saved as result. + format of file in the static folder
+                _FILE_NAME_ = pathMaker("result", _FILE_NAME_)
             else:
-                _FILE_NAME_ = os.path.join(UPLOAD_FOLDER, f"temp{i}.{_FILE_NAME_.split('.')[-1]}")
+                _FILE_NAME_ = pathMaker(f"temp{i}", _FILE_NAME_)
             if k == "phone":
                 makePhoneLike(int(v["phoneFilterOrder"]), float(v["phoneSideGain"]), prevFileName, _FILE_NAME_)
             elif k == "upscale":
                 upscaler(int(v["upscaleTargetWidth"]), int(v["upscaleTargetHeight"]), prevFileName, _FILE_NAME_)
             elif k == "denoiseDelay":
-                denoise_and_delay(_FILE_NAME_ , int (v["noisePower"]) , int(v["delay"]) , int(v["delayGain"]) )
+                denoise_and_delay( float (v["noisePower"]) , int(v["delay"]) , int(v["delayGain"]) , prevFileName , _FILE_NAME_)
             elif k == "grayscale":
                 applyGrayscale(prevFileName,_FILE_NAME_)
-            elif k == "gainCompressor":
-                applyGainCompression(float(v["gainCompressorThreshold"]), float(v["limiterThreshold"]), prevFileName, _FILE_NAME_)
+            elif k == "car":
+                makeCarLike(float(v["carSideGain"]), int(v["carFilterOrder"]), prevFileName, _FILE_NAME_)
             elif k == "voiceEnhancement":
                 voiceEnhancement(int(v["preemphasisAlpha"]), int(v["highPassFilter"]), prevFileName, _FILE_NAME_)
             elif k == "colorinvert":
                 colorInvert(prevFileName, _FILE_NAME_)
-            os.remove(prevFileName)
+            elif k == "frameInterpolate":
+                frameInterpolation(float (v["frameInterpolateTargetFps"]) , prevFileName , _FILE_NAME_)
+            """ 
+            we don't want to remove the original file as we want to be able to
+            clean -> config -> apply
+            """ 
+            if i:
+                os.remove(prevFileName)
+        return render_template("project_template.html")
 
 
-        return render_template("project_template.html")
-    
+"""
+Serving the filtered or unfiltered file
+"""
 @app.route("/stream/", methods=["GET"])
 def stream():
+    if not _FILE_NAME_:
+        return  make_response(render_template("project_template.html"), 404)
     return send_from_directory(UPLOAD_FOLDER,
                                f"result.{_FILE_NAME_.split('.')[-1]}", as_attachment=True)  
         
\ No newline at end of file

From 99886683eb171c9881b031784866571a9a3dee65 Mon Sep 17 00:00:00 2001
From: Matin Salami <matinsalami075@gmail.com>
Date: Fri, 29 Aug 2025 16:54:48 +0200
Subject: [PATCH 2/2] Create README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3673ed1
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+A Music Project for developing different A/V filters in Python and FFmpeg library