Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
|
|
|
| 5 |
_D='./video_data/'
|
| 6 |
_C='value'
|
| 7 |
_B='label'
|
|
@@ -21,8 +22,8 @@ def plot_images(image_paths):
|
|
| 21 |
if A>=7:break
|
| 22 |
return B
|
| 23 |
def download_video(video_url,output_video_path=_D):
|
| 24 |
-
B=output_video_path;D={'format':'bestvideo+bestaudio/best','merge_output_format':'mp4','outtmpl':f"{B}/input_vid.mp4",'noplaylist':_A,'quiet':
|
| 25 |
-
with yt_dlp.YoutubeDL(D)as C:A=C.extract_info(video_url,download=_A);A=C.sanitize_info(A);return{
|
| 26 |
def video_to_images(video_path,output_folder):A=output_folder;Path(A).mkdir(parents=_A,exist_ok=_A);B=VideoFileClip(video_path);B.write_images_sequence(os.path.join(A,'frame%04d.png'),fps=.2)
|
| 27 |
def video_to_audio(video_path,output_audio_path):A=VideoFileClip(video_path);B=A.audio;B.write_audiofile(output_audio_path)
|
| 28 |
def audio_to_text(audio_path):
|
|
@@ -31,12 +32,12 @@ def audio_to_text(audio_path):
|
|
| 31 |
with sr.AudioFile(audio_path)as B:C=A.record(B);D=A.recognize_google(C);return D
|
| 32 |
except sr.UnknownValueError:print('Google Speech Recognition could not understand the audio.')
|
| 33 |
except sr.RequestError as E:print(f"Could not request results: {E}")
|
| 34 |
-
def prepare_all_videos(video_folder=_D,output_folder=
|
| 35 |
'\n Processes all video files in video_folder, extracting images and text for each,\n and stores them in unique subfolders under output_folder.\n Returns a list of metadata dicts for all videos.\n ';J='unknown';F=output_folder;E=video_folder;Path(F).mkdir(parents=_A,exist_ok=_A);K=[A for A in os.listdir(E)if A.lower().endswith(('.mp4','.mov','.avi','.mkv'))];G=[]
|
| 36 |
for B in K:
|
| 37 |
H=os.path.join(E,B);I=Path(B).stem;A=os.path.join(F,I);Path(A).mkdir(parents=_A,exist_ok=_A);C=os.path.join(A,'output_audio.wav');video_to_images(H,A);video_to_audio(H,C);D=audio_to_text(C);L=os.path.join(A,'output_text.txt')
|
| 38 |
with open(L,'w')as M:M.write(D if D else'')
|
| 39 |
-
os.remove(C);N={
|
| 40 |
return G
|
| 41 |
from llama_index.core.indices import MultiModalVectorStoreIndex
|
| 42 |
from llama_index.core import SimpleDirectoryReader,StorageContext
|
|
@@ -60,13 +61,13 @@ available_models=[{_C:'meta-llama/llama-4-maverick:free',_B:'Llama'},{_C:'qwen/q
|
|
| 60 |
model_value_to_label={A[_C]:A[_B]for A in available_models}
|
| 61 |
model_label_to_value={A[_B]:A[_C]for A in available_models}
|
| 62 |
def gradio_chat(query,model_label):
|
| 63 |
-
K='image_url';J='type';D=query;C='content';L=_D;E=
|
| 64 |
try:
|
| 65 |
-
M=prepare_all_videos(L,E);N=json.dumps([A['meta']for A in M]);O=create_vector_db_for_all(E);P,Q=retrieve(retriever_engine=O,query_str=D);R=''.join(Q);S=qa_tmpl_str.format(context_str=R,query_str=D,metadata_str=N);T=os.environ['
|
| 66 |
for A in P:
|
| 67 |
try:W=Image.open(A);H=BytesIO();W.save(H,format='JPEG');X=base64.b64encode(H.getvalue()).decode('utf-8');F[0][C].append({J:K,K:{'url':f"data:image/jpeg;base64,{X}"}});G.append(A)
|
| 68 |
except Exception as B:print(f"Error loading image {A}: {B}")
|
| 69 |
Y={'model':V,'messages':F};I=requests.post(url='https://openrouter.ai/api/v1/chat/completions',headers=U,data=json.dumps(Y));I.raise_for_status();Z=I.json()['choices'][0]['message'][C];return Z,G
|
| 70 |
except Exception as B:return f"Error: {str(B)}",[]
|
| 71 |
-
gradio_ui=gr.Interface(fn=gradio_chat,inputs=[gr.Textbox(label='',placeholder='Try: Best island in Maldives'),gr.Dropdown(choices=[A[_B]for A in available_models],value=available_models[0][_B],label='Select Model:')],outputs=[gr.Textbox(label='Vega Response:'),gr.Gallery(label='Relevant Images',allow_preview=_A)],title='',description='',theme=gr.themes.Default(primary_hue='sky'))
|
| 72 |
-
if __name__=='__main__':gradio_ui.launch(share=
|
|
|
|
| 1 |
+
_I='./mixed_data/'
|
| 2 |
+
_H='text'
|
| 3 |
+
_G='uploader'
|
| 4 |
+
_F='title'
|
| 5 |
+
_E=False
|
| 6 |
_D='./video_data/'
|
| 7 |
_C='value'
|
| 8 |
_B='label'
|
|
|
|
| 22 |
if A>=7:break
|
| 23 |
return B
|
| 24 |
def download_video(video_url,output_video_path=_D):
|
| 25 |
+
B=output_video_path;D={'format':'bestvideo+bestaudio/best','merge_output_format':'mp4','outtmpl':f"{B}/input_vid.mp4",'noplaylist':_A,'quiet':_E};Path(B).mkdir(parents=_A,exist_ok=_A)
|
| 26 |
+
with yt_dlp.YoutubeDL(D)as C:A=C.extract_info(video_url,download=_A);A=C.sanitize_info(A);return{_F:A.get(_F),_G:A.get(_G),'views':A.get('view_count')}
|
| 27 |
def video_to_images(video_path,output_folder):A=output_folder;Path(A).mkdir(parents=_A,exist_ok=_A);B=VideoFileClip(video_path);B.write_images_sequence(os.path.join(A,'frame%04d.png'),fps=.2)
|
| 28 |
def video_to_audio(video_path,output_audio_path):A=VideoFileClip(video_path);B=A.audio;B.write_audiofile(output_audio_path)
|
| 29 |
def audio_to_text(audio_path):
|
|
|
|
| 32 |
with sr.AudioFile(audio_path)as B:C=A.record(B);D=A.recognize_google(C);return D
|
| 33 |
except sr.UnknownValueError:print('Google Speech Recognition could not understand the audio.')
|
| 34 |
except sr.RequestError as E:print(f"Could not request results: {E}")
|
| 35 |
+
def prepare_all_videos(video_folder=_D,output_folder=_I):
|
| 36 |
'\n Processes all video files in video_folder, extracting images and text for each,\n and stores them in unique subfolders under output_folder.\n Returns a list of metadata dicts for all videos.\n ';J='unknown';F=output_folder;E=video_folder;Path(F).mkdir(parents=_A,exist_ok=_A);K=[A for A in os.listdir(E)if A.lower().endswith(('.mp4','.mov','.avi','.mkv'))];G=[]
|
| 37 |
for B in K:
|
| 38 |
H=os.path.join(E,B);I=Path(B).stem;A=os.path.join(F,I);Path(A).mkdir(parents=_A,exist_ok=_A);C=os.path.join(A,'output_audio.wav');video_to_images(H,A);video_to_audio(H,C);D=audio_to_text(C);L=os.path.join(A,'output_text.txt')
|
| 39 |
with open(L,'w')as M:M.write(D if D else'')
|
| 40 |
+
os.remove(C);N={_F:I,_G:J,'views':J,'file':B};G.append({'meta':N,_H:D,'folder':A})
|
| 41 |
return G
|
| 42 |
from llama_index.core.indices import MultiModalVectorStoreIndex
|
| 43 |
from llama_index.core import SimpleDirectoryReader,StorageContext
|
|
|
|
| 61 |
model_value_to_label={A[_C]:A[_B]for A in available_models}
|
| 62 |
model_label_to_value={A[_B]:A[_C]for A in available_models}
|
| 63 |
def gradio_chat(query,model_label):
|
| 64 |
+
K='image_url';J='type';D=query;C='content';L=_D;E=_I
|
| 65 |
try:
|
| 66 |
+
M=prepare_all_videos(L,E);N=json.dumps([A['meta']for A in M]);O=create_vector_db_for_all(E);P,Q=retrieve(retriever_engine=O,query_str=D);R=''.join(Q);S=qa_tmpl_str.format(context_str=R,query_str=D,metadata_str=N);T=os.environ['API_KEY'];U={'Authorization':f"Bearer {T}",'Content-Type':'application/json','HTTP-Referer':'<YOUR_SITE_URL>','X-Title':'<YOUR_SITE_NAME>'};V=model_label_to_value.get(model_label,available_models[0][_C]);F=[{'role':'user',C:[{J:_H,_H:S}]}];G=[]
|
| 67 |
for A in P:
|
| 68 |
try:W=Image.open(A);H=BytesIO();W.save(H,format='JPEG');X=base64.b64encode(H.getvalue()).decode('utf-8');F[0][C].append({J:K,K:{'url':f"data:image/jpeg;base64,{X}"}});G.append(A)
|
| 69 |
except Exception as B:print(f"Error loading image {A}: {B}")
|
| 70 |
Y={'model':V,'messages':F};I=requests.post(url='https://openrouter.ai/api/v1/chat/completions',headers=U,data=json.dumps(Y));I.raise_for_status();Z=I.json()['choices'][0]['message'][C];return Z,G
|
| 71 |
except Exception as B:return f"Error: {str(B)}",[]
|
| 72 |
+
gradio_ui=gr.Interface(fn=gradio_chat,inputs=[gr.Textbox(label='',placeholder='Try: Best island in Maldives'),gr.Dropdown(choices=[A[_B]for A in available_models],value=available_models[0][_B],label='Select Model:')],outputs=[gr.Textbox(label='Vega Response:'),gr.Gallery(label='Relevant Images',allow_preview=_A)],title='',description='',theme=gr.themes.Default(primary_hue='sky'),css='footer {visibility: hidden}',deep_link=_E)
|
| 73 |
+
if __name__=='__main__':gradio_ui.launch(share=_E)
|