From acdeee5c38d98f8ae8def5e9688f0f773c157fe8 Mon Sep 17 00:00:00 2001 From: Matin Salami Date: Wed, 4 Jun 2025 17:10:03 +0200 Subject: [PATCH 1/2] done --- flaskr/helpers.py | 231 +++++++++++++++++++++++------------------ flaskr/musicProject.py | 94 +++++++++++++---- 2 files changed, 204 insertions(+), 121 deletions(-) diff --git a/flaskr/helpers.py b/flaskr/helpers.py index 3f872c8..f079589 100644 --- a/flaskr/helpers.py +++ b/flaskr/helpers.py @@ -1,14 +1,16 @@ import ffmpeg from scipy.signal import butter, lfilter import scipy.io.wavfile as wav +from scipy.signal import wiener import numpy as np -# from pydub import AudioSegment import os _AUDIO_FILE_ = "audio.wav" APP_ROOT = os.path.dirname(os.path.abspath(__file__)) -#UPLOAD_FOLDER = os.path.join(APP_ROOT, 'static', "result.mp4") + +UPLOAD_FOLDER = os.path.join(APP_ROOT, "static") +pathMaker = lambda prefix, fileName: os.path.join(UPLOAD_FOLDER, f"{prefix}.{fileName.split('.')[-1]}") def upscaler(tw, th, readFrom, writeTo): @@ -27,8 +29,7 @@ def makePhoneLike(filterOrder, sideGain, readFrom, writeTo): if os.path.exists(_AUDIO_FILE_): os.remove(_AUDIO_FILE_) - os.system(f'ffmpeg -i {readFrom} -af "pan=2c|c0={sideGain}*c0|c1={1-sideGain}*c1" {_AUDIO_FILE_}') - + os.system(f'ffmpeg -i "{readFrom}" -af "pan=2c|c0={sideGain}*c0|c1={1-sideGain}*c1" {_AUDIO_FILE_}') sample_rate, samples_original = wav.read(_AUDIO_FILE_) num, denom = butter(filterOrder, [800, 3400] , "bandpass", fs=sample_rate) ot = lfilter(num, denom, samples_original) @@ -39,109 +40,71 @@ def makePhoneLike(filterOrder, sideGain, readFrom, writeTo): # ot = ffmpeg.output(prob, au, "video.mp4") return -def denoise_and_delay(readFrom, writeTo, noise_power_db, delay_ms, delay_gain_percent): - - global _AUDIO_FILE_ - vid = ffmpeg.input(readFrom).video - au = ffmpeg.input(readFrom).audio - info = ffmpeg.probe(readFrom, cmd="ffprobe") - - noChannels = info["streams"][1]["channels"] - audioStream = au.output(_AUDIO_FILE_, ac=noChannels).overwrite_output().run() - - sample_rate, samples_original = wav.read(_AUDIO_FILE_) - - filter_order = 4 - low_cutoff = 300 - high_cutoff = 3400 - - noise_factor = 10 ** (noise_power_db / 20) - - b, a = butter(filter_order, [low_cutoff, high_cutoff], btype='bandpass', fs=sample_rate) - - - if len(samples_original.shape) > 1: # For stereo audio - denoised_audio = np.zeros_like(samples_original) - for channel in range(samples_original.shape[1]): - denoised_audio[:, channel] = lfilter(b, a, samples_original[:, channel]) - else: # For mono audio - denoised_audio = lfilter(b, a, samples_original) - - # reducing noise - denoised_audio = denoised_audio * noise_factor - - - delay_samples = int((delay_ms / 1000) * sample_rate) - delay_gain = delay_gain_percent / 100 - - - if len(denoised_audio.shape) > 1: # For stereo audio - delayed_audio = np.zeros_like(denoised_audio) - for channel in range(denoised_audio.shape[1]): - delayed_channel = np.zeros_like(denoised_audio[:, channel]) - delayed_channel[delay_samples:] = denoised_audio[:-delay_samples, channel] if delay_samples < len(denoised_audio) else 0 - #scale the delayed audio - delayed_channel = delayed_channel * delay_gain - # Mix with the original - delayed_audio[:, channel] = denoised_audio[:, channel] + delayed_channel - else: # For mono audio - delayed_audio = np.zeros_like(denoised_audio) - delayed_audio[delay_samples:] = denoised_audio[:-delay_samples] if delay_samples < len(denoised_audio) else 0 - delayed_audio = delayed_audio * delay_gain - delayed_audio = denoised_audio + delayed_audio - - # Normalize to prevent clipping - max_val = np.max(np.abs(delayed_audio)) - if max_val > 32767: # Max value for 16-bit audio - delayed_audio = delayed_audio * (32767 / max_val) - - # Convert back to int16 for saving - delayed_audio = np.asarray(delayed_audio, dtype=np.int16) - - # Save the processed audio - wav.write(_AUDIO_FILE_, sample_rate, delayed_audio) - - # Combine processed audio with the original video - auInpF = ffmpeg.input(_AUDIO_FILE_) - ffmpeg.output(vid, auInpF, writeTo).overwrite_output().run() - - return +def denoise_and_delay( noise_power, delay_ms, delay_gain, readFrom, writeTo): + global _AUDIO_FILE_ + vid = ffmpeg.input(readFrom).video + au = ffmpeg.input(readFrom).audio + info = ffmpeg.probe(readFrom , cmd = "ffprobe") + noChannels = info ["streams"][1]["channels"] + au.output(_AUDIO_FILE_ , ac = noChannels).overwrite_output().run() + sample_rate , sample_originals = wav.read(_AUDIO_FILE_) + sample_float = sample_originals.astype(np.float64) #converting to float since we need higher percision + noise_reduction_streanght = max(3,min(15,int(abs(noise_power)/2))) + denoised_audio = wiener (sample_float , mysize = noise_reduction_streanght) -def applyGainCompression(threshold_db, limiter_db, readFrom, writeTo): - - stream = ffmpeg.input(readFrom) - - # Handle threshold parameter (ensure it's negative) - abs_threshold = abs(threshold_db) if threshold_db < 0 else threshold_db - threshold_point = f"-{abs_threshold}/-{abs_threshold}" - - # Handle limiter parameter - mid_point = f"-{abs_threshold/2}/-{abs_threshold+limiter_db/2}" - limiter_point = f"0/-{limiter_db}" - - # Combine points to create the compression curve - points = f"{threshold_point}|{mid_point}|{limiter_point}" - - # Apply compression filter using 'compand' - compressed_audio = stream.audio.filter( - 'compand', - attacks='0.01', - decays='0.5', - points=points, - gain='0' - ) - - # Combine original video with compressed audio - result = ffmpeg.output(stream.video, compressed_audio, writeTo).overwrite_output().run() + delay_samples = int ((delay_ms/1000) * sample_rate ) + delay_gain_decimal = delay_gain / 100.0 + + delayed_signal = np.zeros_like(denoised_audio) + if delay_samples < len(denoised_audio) : + delayed_signal[delay_samples:] = denoised_audio[:-delay_samples] + delayed_audio = denoised_audio + delayed_signal * delay_gain_decimal + if np.max(np.abs(delayed_audio)) > 0: + delayed_audio = delayed_audio * (32767 / np.max(np.abs(delayed_audio))*0.9) + data2 = np.asarray(delayed_audio , dtype = np.int16) + wav.write(_AUDIO_FILE_ , sample_rate , data2) + ffmpeg.output(vid,ffmpeg.input(_AUDIO_FILE_) , writeTo).overwrite_output().run() + return +def frameInterpolation (targetFps , readFrom , writeTo): + stream = ffmpeg.input(readFrom) + vid = stream.video + audio = stream.audio + + info = ffmpeg.probe(readFrom , cmd = "ffprobe") + video_stream = next ((s for s in info['streams'] if s['codec_type'] == 'video' ), None) + if 'avg_frame_rate' in video_stream: + frame_rate_fraction = video_stream['avg_frame_rate'] + if '/' in frame_rate_fraction: + num,den = map(int , frame_rate_fraction.split('/')) + original_fps = num / den if den != 0 else 0 + else: + original_fps = float(frame_rate_fraction) + else: + original_fps = 30 + + if targetFps >= original_fps: + interpolated = vid.filter('minterpolate' , + fps = targetFps, + mi_mode = 'mci', + mc_mode = 'aobmc', + me_mode = 'bidir') + ffmpeg.output(interpolated , audio , writeTo , + vcodec = 'libx264', + acodec = 'aac').overwrite_output().run() + else: + decreased = vid.filter('fps',fps = targetFps) + ffmpeg.output(decreased , audio , writeTo , vcodec = 'libx264' , acodec = 'aac').overwrite_output().run() + + return def voiceEnhancement(preEmphasisAlpha, filterOrder, readFrom, writeTo): @@ -212,6 +175,7 @@ def voiceEnhancement(preEmphasisAlpha, filterOrder, readFrom, writeTo): + def applyGrayscale(readFrom, writeTo): # Load video input stream = ffmpeg.input(readFrom) @@ -238,9 +202,76 @@ def colorInvert(readFrom, writeTo): acodec='copy' # Copy audio to avoid re-encoding ) # Run the ffmpeg command - out, err = output.overwrite_output().run(capture_stdout=True, capture_stderr=True) - print("FFmpeg stdout:", out) - print("FFmpeg stderr:", err) + output.overwrite_output().run() return + + +def makeCarLike(sideGain_db, filterOrder, readFrom, writeTo): + global _AUDIO_FILE_ + + # Extract video stream + vid = ffmpeg.input(readFrom).video + + # Clean up existing audio file + if os.path.exists(_AUDIO_FILE_): + os.remove(_AUDIO_FILE_) + + # Convert sideGain from dB to linear + sideGain_linear = 10 ** (sideGain_db / 20) + + # Calculate pan coefficients for stereo enhancement + # In fact, for each output channel (new left or new right), we take 50% of the original left and 50% of the original right, + # then push them outward or inward depending on sideGain_linear. + # This widens or narrows the stereo image. + left_c0 = 0.5 + sideGain_linear * 0.5 + left_c1 = 0.5 - sideGain_linear * 0.5 + right_c0 = 0.5 - sideGain_linear * 0.5 + right_c1 = 0.5 + sideGain_linear * 0.5 + + # Extract audio with stereo enhancement + os.system(f'ffmpeg -i "{readFrom}" -af "pan=2c|c0={left_c0}*c0+{left_c1}*c1|c1={right_c0}*c0+{right_c1}*c1" {_AUDIO_FILE_}') + + # Read audio file. samples can be numpy array of shape (num_samples,) or is it is stereo it is (num_samples,2) + # Each element is a signed 16-bit integer (int16) in the range [–32768, +32767]. + sample_rate, samples = wav.read(_AUDIO_FILE_) + + # Convert to float for processing(as lfilter works best with float numbers + samples_float = samples.astype(np.float32) / 32768.0 + + # Apply low-pass filter at 10000 Hz as specified + # Butterworth design routines expect a cutoff in the range [0, 1], where 1 corresponds to Nyquist. + nyquist = sample_rate / 2 + normalized_cutoff = 10000 / nyquist + num, denom = butter(filterOrder, normalized_cutoff, btype="low") + + # Process each channel separately if stereo[Processes left channel separately: samples_float[:, 0],Processes right channel separately: samples_float[:, 1]] + # For mono just apply the filter + if len(samples_float.shape) == 2: + filtered_samples = np.zeros_like(samples_float) + for channel in range(samples_float.shape[1]): + filtered_samples[:, channel] = lfilter(num, denom, samples_float[:, channel]) + else: + filtered_samples = lfilter(num, denom, samples_float) + + + # Convert back to int16 with proper scaling + max_val = np.max(np.abs(filtered_samples)) + if max_val > 1.0: + filtered_samples = filtered_samples / max_val + + # Final outputed sample which is then written back to the wav file + output_samples = (filtered_samples * 32767).astype(np.int16) + + + # Write processed audio back + wav.write(_AUDIO_FILE_, sample_rate, output_samples) + + # Combine audio and video + auInpF = ffmpeg.input(_AUDIO_FILE_) + ffmpeg.output(vid, auInpF, writeTo).overwrite_output().run() + + return + + diff --git a/flaskr/musicProject.py b/flaskr/musicProject.py index 8620894..585c7e2 100644 --- a/flaskr/musicProject.py +++ b/flaskr/musicProject.py @@ -1,94 +1,146 @@ from flask import Flask, request, render_template, make_response, send_from_directory import os -from helpers import upscaler, makePhoneLike , denoise_and_delay, applyGainCompression, applyGrayscale, colorInvert, voiceEnhancement -app = Flask(__name__, static_folder="static",instance_relative_config=True) +from helpers import upscaler, makePhoneLike , denoise_and_delay, applyGrayscale, colorInvert, voiceEnhancement, pathMaker, makeCarLike +app = Flask(__name__, static_folder="static", instance_relative_config=True) + + _UPLOADED_ = 0 + + +""" + +Once a file is uploaded, we save the name of that file in these 2 variables. +_FILE_NAME_ is then changed accordingly as we start to apply our filters + +""" _FILE_NAME_ = "" +_INITIAL_FILE_NAME_ = "" + +# Filter configs are stored in here _CONFIGS_ = [] APP_ROOT = os.path.dirname(os.path.abspath(__file__)) UPLOAD_FOLDER = os.path.join(APP_ROOT, "static") + +# Project template is served at "/" @app.route("/") -def hello_world(): +def landingPage(): return render_template('project_template.html') +""" +Upload: it's not possible to upload a file if one has already been uploaded. +""" @app.route("/post/", methods=["POST"]) def uploadedVideo(): - global _UPLOADED_, _FILE_NAME_ + global _UPLOADED_, _FILE_NAME_, _INITIAL_FILE_NAME_ if _UPLOADED_ == 1: - mr = make_response(render_template("project_template.html"), 404) + # Return 403 status code to indicate forbidden operation + mr = make_response(render_template("project_template.html"), 403) mr.headers["res"] = "You've already uploaded a file!" return mr else: + # saving the file in our folder request.files["file"].save(request.files["file"].filename) _UPLOADED_ = 1 + # assigning file names! _FILE_NAME_ = request.files["file"].filename - print(f"File uploaded: {_FILE_NAME_}")# + _INITIAL_FILE_NAME_ = _FILE_NAME_ return render_template("project_template.html") - +""" +Delete: Delete file from folder if there's been an upload! +""" @app.route("/delete/", methods=["DELETE"]) def deletedVideo(): - global _UPLOADED_, _FILE_NAME_ + global _UPLOADED_, _FILE_NAME_, _INITIAL_FILE_NAME_ if _UPLOADED_ == 1: _UPLOADED_ = 0 - os.remove(_FILE_NAME_) - _FILE_NAME_ = "" + # removing the original uploaded file + os.remove(_INITIAL_FILE_NAME_) + _FILE_NAME_ = "" + _INITIAL_FILE_NAME_ = "" + # 204 to indicate the success of an op but no content to return mr = make_response(render_template("project_template.html"), 204) mr.headers["res"] = "file deleted!" return mr else: return render_template('project_template.html') +""" +Configure Filter: appends the desired filters to the CONFIGS list. +""" @app.route("/configurefilter/", methods=["POST"]) def saveConfiguration(): - global _CONFIGS_ + global _CONFIGS_, _FILE_NAME_ + # We want to apply the filters to the initial file + if _INITIAL_FILE_NAME_: + _FILE_NAME_ = _INITIAL_FILE_NAME_ + # clearing all previous filters! _CONFIGS_.clear() for l in request.get_json(): - _CONFIGS_.append([l["name"], {v["name"]: v["value"] for v in l["props"]}]) ## For easier use! + # Coupling between filter name and parameters -- using dict comprehension + _CONFIGS_.append([l["name"], {v["name"]: v["value"] for v in l["props"]}]) return render_template('project_template.html') @app.route("/applyfilter/", methods=["GET"]) def applyFilter(): - global _CONFIGS_, _FILE_NAME_ + global _CONFIGS_, _FILE_NAME_, _INITIAL_FILE_NAME_ + # should not be able to apply a filter if nothing has been uploaded! if not _CONFIGS_ or not _FILE_NAME_: mr = make_response(render_template("project_template.html"), 403) mr.headers["res"] = "Missing file or config!" return mr else: configSize = len(_CONFIGS_) + # Going through the config couplings and applying each filter for (i, (k, v)) in enumerate(_CONFIGS_): + """ + We cannot read and write to a file at the same time, so we're forced + to save some intermediary files + """ prevFileName = _FILE_NAME_ if i == (configSize - 1): - _FILE_NAME_ = os.path.join(UPLOAD_FOLDER, f"result.{_FILE_NAME_.split('.')[-1]}") + # The resulting file is saved as result. + format of file in the static folder + _FILE_NAME_ = pathMaker("result", _FILE_NAME_) else: - _FILE_NAME_ = os.path.join(UPLOAD_FOLDER, f"temp{i}.{_FILE_NAME_.split('.')[-1]}") + _FILE_NAME_ = pathMaker(f"temp{i}", _FILE_NAME_) if k == "phone": makePhoneLike(int(v["phoneFilterOrder"]), float(v["phoneSideGain"]), prevFileName, _FILE_NAME_) elif k == "upscale": upscaler(int(v["upscaleTargetWidth"]), int(v["upscaleTargetHeight"]), prevFileName, _FILE_NAME_) elif k == "denoiseDelay": - denoise_and_delay(_FILE_NAME_ , int (v["noisePower"]) , int(v["delay"]) , int(v["delayGain"]) ) + denoise_and_delay( float (v["noisePower"]) , int(v["delay"]) , int(v["delayGain"]) , prevFileName , _FILE_NAME_) elif k == "grayscale": applyGrayscale(prevFileName,_FILE_NAME_) - elif k == "gainCompressor": - applyGainCompression(float(v["gainCompressorThreshold"]), float(v["limiterThreshold"]), prevFileName, _FILE_NAME_) + elif k == "car": + makeCarLike(float(v["carSideGain"]), int(v["carFilterOrder"]), prevFileName, _FILE_NAME_) elif k == "voiceEnhancement": voiceEnhancement(int(v["preemphasisAlpha"]), int(v["highPassFilter"]), prevFileName, _FILE_NAME_) elif k == "colorinvert": colorInvert(prevFileName, _FILE_NAME_) - os.remove(prevFileName) + elif k == "frameInterpolate": + frameInterpolation(float (v["frameInterpolateTargetFps"]) , prevFileName , _FILE_NAME_) + """ + we don't want to remove the original file as we want to be able to + clean -> config -> apply + """ + if i: + os.remove(prevFileName) + return render_template("project_template.html") - return render_template("project_template.html") - +""" +Serving the filtered or unfiltered file +""" @app.route("/stream/", methods=["GET"]) def stream(): + if not _FILE_NAME_: + return make_response(render_template("project_template.html"), 404) return send_from_directory(UPLOAD_FOLDER, f"result.{_FILE_NAME_.split('.')[-1]}", as_attachment=True) \ No newline at end of file From 99886683eb171c9881b031784866571a9a3dee65 Mon Sep 17 00:00:00 2001 From: Matin Salami Date: Fri, 29 Aug 2025 16:54:48 +0200 Subject: [PATCH 2/2] Create README.md --- README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..3673ed1 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +A Music Project for developing different A/V filters in Python and FFmpeg library