Pythonda imo-ishoralar yordamida ovoz balandligini boshqarish

Pythonda imo-ishoralar yordamida ovoz balandligini boshqarish

Ushbu maqolada men OpenCV Python kutubxonasidan bir imkoniyat haqida yozmoqchiman. To’liq kodni Github-da ko’rishingiz mumkin.

Maqsad imo-ishoralar yordamida kompyuterdagi ovoz balandligini o’zgartirish. Ovoz balandligi ko’rsatgich va bosh barmoq orasidagi masofa bilan boshqariladi va egilgan kichik barmoq tasdiqlash uchun signal bo’ladi.

Avvalo, barcha kerakli kutubxonalarni o’rnatib oling:

pip install mediapipe
pip install opencv-python
pip install numpy
pip install pycaw

hand_tracking_module.py nomi bilan fayl hosil qiling va quyidagi kodni yozing:

import cv2
import mediapipe as mp
import time
import math

class handDetector():
    def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.modelComplexity = modelComplexity
        self.detectionCon = detectionCon
        self.trackCon = trackCon


        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity, self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20] 


    def findHands(self, img, draw: True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)


        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
        return img


    def findPosition(self, img, handNo=0, draw=True):
        xList = []
        yList = []
        bbox = []
        self.lmList = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                #print(id, lm)
                h, w, c = img.shape
                cx, cy = int(lm.x*w), int(lm.y*h)
                xList.append(cx)
                yList.append(cy)
                self.lmList.append([id, cx, cy])
                if draw:
                    cv2.circle(img, (cx, cy), 5, (255,0,255), cv2.FILLED)
            xmin, xmax = min(xList), max(xList)
            ymin, ymax = min(yList), max(yList)
            bbox = xmin, ymin, xmax, ymax


            if draw:
                cv2.rectangle(img, (bbox[0]-20, bbox[1]-20), (bbox[2]+20, bbox[3]+20), (0, 255, 0), 2)
        return self.lmList, bbox


    def findDistance(self, p1, p2, img, draw=True):
        x1, y1 = self.lmList[p1][1], self.lmList[p1][2]
        x2, y2 = self.lmList[p2][1], self.lmList[p2][2]
        cx, cy = (x1+x2)//2, (y1+y2)//2


        if draw:
            cv2.circle(img, (x1,y1), 15, (255,0,255), cv2.FILLED)
            cv2.circle(img, (x2,y2), 15, (255,0,255), cv2.FILLED)
            cv2.line(img, (x1,y1), (x2,y2), (255,0,255), 3)
            cv2.circle(img, (cx,cy), 15, (255,0,255), cv2.FILLED)


        length = math.hypot(x2-x1, y2-y1)
        return length, img, [x1, y1, x2, y2, cx, cy]


    def fingersUp(self):
        fingers = []


        # Thumb
        if self.lmList[self.tipIds[0]][1] 

Agar yaxshilab ushbu class nima qilishi haqida bosh qotirsangiz unda hech qanday qiyin ishlar yo'qligini va barchasi oson ekanini tushunasiz.


Endi dasturni asosiy qisimi yozishni boshlaymiz.


Kerakli narsalarni kodga import qilishdan boshlaymiz

import cv2
import time
import numpy as np
import HandTrackingModule as htm
import math
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume


Kamera sozmalari :


wCam, hCam = 1280, 720 # размер окна


cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
pTime = 0

Kamerani ulashda xatoliklar yuz berishi mumkin, 0 ni `cap = cv2.VideoCapture(0)` dan 1 yoki 2 ga o'zgartiring.

Global o'zgaruvchilarni belgilaymiz:

detector = htm.handDetector(detectionCon=0.7, maxHands=1)
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
volRange = volume.GetVolumeRange()
volume.SetMasterVolumeLevel(0, None)
minVol = volRange[0]
maxVol = volRange[1]
vol = 0
volBar = 400
volPer = 0
area = 0
colorVol = (255, 0, 0)

Biz cheksiz tsiklni boshlaymiz va kameradan tasvirni olishga harakat qilamiz.

_, img = cap.read()

Qo'limizni ramkada topamiz va uni belgilaymiz:

img = detector.findHands(img)
lmList, bbox = detector.findPosition(img, draw=True)

Qo'lni tanlashga hojat bo'lmaganlar uchun draw=True ni False ga o'zgartiring.

Agar lmList ro'yxati bo'sh bo'lmasa, biz ko'rsatgich va bosh barmoq orasidagi masofani hisoblashimiz mumkin. Buning uchun findDistance methodi yuqorida tavsiflangan handDetector classida amalga oshiriladi.

length, img, lineInfo = detector.findDistance(4, 8, img)

Agar biz buni shunday qoldirsak, ovoz balandligini o'zgartirish to'g'ri ishlamaydi, shuning uchun siz qabul qilingan parametrlarni o'zgartirishingiz kerak:

# Convert Volume
volBar = np.interp(length, [50,200], [400, 150])
volPer = np.interp(length, [50,200], [0, 100])

Oxirgi qadam tovush balandligidagi o'zgarishlarni tasdiqlash uchun kichik barmoqning egilganligini aniqlash bo'ladi.

Biz barcha barmoqlarning holatini quyidagicha olamiz:

ingers = detector.fingersUp()

Agar kichkina barmoq egilgan bo'lsa, ovoz balandligini o'zgartiladi:

if not fingers[4]:
                volume.SetMasterVolumeLevelScalar(volPer/100, None)

pythonda imo ishoralar yordamida ovoz balandligini boshqarish 660f6b169fdb7

To'liq kodni mening Github'imda topish mumkin. Bu mening birinchi maqolam va birinchi OpenCV loyihalarimdan biri, shuning uchun juda qattiq muhokama qilmang =)

Pavel Dats

Umumiy Dasturlash
Pythonda imo-ishoralar yordamida ovoz balandligini boshqarish