Pyaudio : how to compress audio stream

Pyaudio : how to compress audio stream - compression

I'm currently developping a VOIP tool in python working as a client-server as follows :
CHUNK = 1024
p = pyaudio.PyAudio()
stream = p.open(format = pyaudio.paInt16,
channels = 1,
rate = 44100,
input = True,
frames_per_buffer = CHUNK)
while 1:
connection.sendVoice(stream.read(CHUNK))
How could I proceed to compress the sent data to spare connection, maybe increase speed, ...

import time, sys,io
import pymedia.audio.sound as sound
import pymedia.audio.acodec as acodec
import pymedia.muxer as muxer
def voiceRecorder( secs, name ):
f = open(name,'wb')
secs = secs*5
dm= muxer.Demuxer('mp3')
snds= sound.getODevices()
rt = 44100
cparams= { 'id': acodec.getCodecID( 'mp3' ),
'bitrate': 128000/4,
'sample_rate': rt,
'channels': 2 }
ac= acodec.Encoder( cparams )
snd= sound.Input( rt, 2, sound.AFMT_S16_LE )
snd.start()
start_time = time.time()
while snd.getPosition()<= secs:
s= snd.getData()
if s and len( s ):
for fr in ac.encode( s ):
f.write( fr)
else:
time.sleep(.25)
snd.stop()
if __name__ == "__main__":
if len( sys.argv )!= 3:
print 'Usage: voice_recorder <seconds> <file_name>'
else:
voiceRecorder( int( sys.argv[ 1 ] ), sys.argv[ 2 ] )

Wow for my calculations are you sending a little more that 2KB, to be exact 2.0480KB of audio at every loop to your socket connection:
(16 * 44.1 / 8) * 1024/44100 = 2.0480
One simple way to send less data is reducing the chunk size if you change the chunk to 512 now are you sending just a bit more that 1KB.
Another way is try use zlib package in python to compress the stream.read data after send it through your socket, but for it do not forget to unzip in the other side.

Related

Error exchanging list of floats in a topic

I think that the issue is silly.
I'd like to run the code on two computers and I need to use a list. I followed this Tutorials
I used my PC as a talker and computer of the robot as a listener.
when running the code on my PC, the output is good as I needed.
[INFO] [1574230834.705510]: [3.0, 2.1]
[INFO] [1574230834.805443]: [3.0, 2.1]
but once running the code on the computer of the robot, the output is:
Traceback (most recent call last):
File "/home/redhwan/learn.py", line 28, in <module>
talker()
File "/home/redhwan/learn.py", line 23, in talker
pub.publish(position.data)
File "/opt/ros/kinetic/lib/python2.7/dist-packages/rospy/topics.py", line 886, in publish
raise ROSSerializationException(str(e))
rospy.exceptions.ROSSerializationException: <class 'struct.error'>: 'required argument is not a float' when writing 'data: [3.0, 2.1]'
full code on PC:
#!/usr/bin/env python
import rospy
from std_msgs.msg import Float32
x = 3.0
y = 2.1
def talker():
# if a == None:
pub = rospy.Publisher('position', Float32, queue_size=10)
rospy.init_node('talker', anonymous=True)
# rospy.init_node('talker')
rate = rospy.Rate(10) # 10hz
while not rospy.is_shutdown():
position = Float32()
a = [x,y]
# a = x
position.data = list(a)
# position.data = a
# hello_str = [5.0 , 6.1]
rospy.loginfo(position.data)
pub.publish(position.data)
rate.sleep()
if __name__ == '__main__':
try:
talker()
except rospy.ROSInterruptException:
pass
full code on the computer of the robot:
#!/usr/bin/env python
import rospy
from std_msgs.msg import Float32
def callback(data):
# a = list(data)
a = data.data
print a
def listener():
rospy.init_node('listener', anonymous=True)
rospy.Subscriber("position", Float32, callback)
# spin() simply keeps python from exiting until this node is stopped
rospy.spin()
if __name__ == '__main__':
listener()
when using one number as float everything is OK.
I understand how to publish and subscribe to them separately as the float but I'd like to do it as list
Any ideas or suggestion, it would be appreciated.

When you exchange messages in ROS is preferred to adopt standard messages if there is something relatively simple. Of course, when you develop more sophisticated systems (or modules), you can implement your own custom messages.
So in the case of float array, Float32MultiArray is your friend.
Populating the message in one side will look like that (just an example using a 2 elements float32 array) in C++:
.
.
.
while (ros::ok())
{
std_msgs::Float32MultiArray velocities;
velocities.layout.dim.push_back(std_msgs::MultiArrayDimension());
velocities.layout.dim[0].label = "velocities";
velocities.layout.dim[0].size = 2;
velocities.layout.dim[0].stride = 1;
velocities.data.clear();
velocities.data.push_back(count % 255);
velocities.data.push_back(-(count % 255));
velocities_demo_pub.publish(velocities);
ros::spinOnce();
loop_rate.sleep();
++count;
}
.
.
.
in Python for 8 elements array an example will look like:
.
.
.
while not rospy.is_shutdown():
# compose the multiarray message
pwmVelocities = Float32MultiArray()
myLayout = MultiArrayLayout()
myMultiArrayDimension = MultiArrayDimension()
myMultiArrayDimension.label = "motion_cmd"
myMultiArrayDimension.size = 1
myMultiArrayDimension.stride = 8
myLayout.dim = [myMultiArrayDimension]
myLayout.data_offset = 0
pwmVelocities.layout = myLayout
pwmVelocities.data = [0, 10.0, 0, 10.0, 0, 10.0, 0, 10.0]
# publish the message and log in terminal
pub.publish(pwmVelocities)
rospy.loginfo("I'm publishing: [%f, %f, %f, %f, %f, %f, %f, %f]" % (pwmVelocities.data[0], pwmVelocities.data[1],
pwmVelocities.data[2], pwmVelocities.data[3], pwmVelocities.data[4], pwmVelocities.data[5],
pwmVelocities.data[6], pwmVelocities.data[7]))
# repeat
r.sleep()
.
.
.
and on the other side your callback (in C++), will look like:
.
.
.
void hardware_interface::velocity_callback(const std_msgs::Float32MultiArray::ConstPtr &msg) {
//velocities.clear();
if (velocities.size() == 0) {
velocities.push_back(msg->data[0]);
velocities.push_back(msg->data[1]);
} else {
velocities[0] = msg->data[0];
velocities[1] = msg->data[1];
}
vel1 = msg->data[0];
vel2 = msg->data[1];
//ROS_INFO("Vel_left: [%f] - Vel_right: [%f]", vel1 , vel2);
}
.
.
.
Hope that you got an idea...if you need something more drop me a line!

Setting up socket timeout in nanomsg python

I am having trouble setting the timeout properly on nanomsg python in the PUSH/PULL schema. I use the set socket option function and passing in a ctype object. Both set_sock_options return a success. Am I missing something?
The sockets work to receive and send, but have an infinite timeout. When I use get_sock_option it returns the value I just set, so it seems my inputs have some sort of an effect.
NN_RCVTIMEO = 5
NN_SNDTIMEO = 4
NN_SOL_SOCKET = 0
message = ""
timeout = ctypes.create_string_buffer(b'500');
#Bind input socket
socket_in = nn_wrapper.nn_socket(AF_SP, PULL)
sucess = nn_wrapper.nn_setsockopt(socket_in, NN_SOL_SOCKET, NN_RCVTIMEO, timeout)
nn_wrapper.nn_bind(socket_in, 'tcp://127.0.0.1:64411')
time.sleep(0.2)
print("SUCESS?" + str(sucess))
#Send inquiry
socket_out = nn_wrapper.nn_socket(AF_SP, PUSH)
sucess = nn_wrapper.nn_setsockopt(socket_out, NN_SOL_SOCKET, NN_SNDTIMEO, timeout)
nn_wrapper.nn_connect(socket_out, 'tcp://127.0.0.1:64400')
time.sleep(0.2)
print("SUCESS?" + str(sucess))
nn_wrapper.nn_send(socket_out, b'HELLO',0)
#Received...
bytes, message = nn_wrapper.nn_recv(socket_in, 0)
nn_wrapper.nn_close(socket_in)
nn_wrapper.nn_close(socket_out)

Looks like when working with C function wrappers that take in arguments by reference(in this case an integer), you have to use the 'struct' module. This is how one would use the integer value 500 as a parameter for the nn_setsockopt() function:
from struct import Struct
from nanomsg import wrapper as nn_wrapper
from nanomsg import Socket, PULL, PUSH, AF_SP
import ctypes
NN_RCVTIMEO = 5
NN_SOL_SOCKET = 0
INT_PACKER = Struct(str('i'))
timeout = (ctypes.c_ubyte * INT_PACKER.size)()
INT_PACKER.pack_into(timeout, 0, 500)
socket_in = nn_wrapper.nn_socket(AF_SP, PULL)
nn_wrapper.nn_bind(socket_in, 'tcp:///127.0.0.1:60000')
nn_wrapper.nn_setsockopt(socket_in, NN_SOL_SOCKET, NN_RCVTIMEO, timeout)

late to the party, hopefully will help the next guy...
the writer of the library already thought of these problems, and provided a set_int_option python for the socket object.
this worked for me:
import nanomsg
class ManagementClient:
def __init__(self, endpoint):
self._client_socket = nanomsg.Socket(nanomsg.REQ)
self._client_socket.set_int_option(nanomsg.SOL_SOCKET, nanomsg.RCVTIMEO, 5 * 1000)
self._client_socket.connect("tcp://" + endpoint)

Qt wrong duration with variable bitrate

QMediaPlayer calculates wrong duration with variable bitrate mp3. Yes, I know a similar topic was already opened, but is pretty old (2012). In addition, both VLC and Clementine, using Qt, for the same mp3 files can calculate the exact duration. So, I do not think it's a Qt bug.
I take the mp3 duration in this way:
void MainWindow::playerOnMediaStatusChanged(QMediaPlayer::MediaStatus status) {
if (status == QMediaPlayer::BufferedMedia) {
qint64 duration = player->duration(); //wrong
}
}
Can you help me?

QtMultiMedia::QMediaPlayer can not recognize variable bitrate mode.
It will give you wrong duration and wrong position.
You can patch QMediaPlayer to calculate the real position/duration.
Python/PySide2 example:
# Created by BaiJiFeiLong#gmail.com at 2022/2/22 21:39
import taglib
from IceSpringPathLib import Path
from PySide2 import QtMultimedia, QtCore
class PatchedMediaPlayer(QtMultimedia.QMediaPlayer):
durationChanged: QtCore.SignalInstance = QtCore.Signal(int)
positionChanged: QtCore.SignalInstance = QtCore.Signal(int)
def __init__(self):
super().__init__()
self._realDuration = 0
self._lastFakePosition = 0
self._bugRate = 0.0
super().durationChanged.connect(self._onSuperDurationChanged)
super().positionChanged.connect(self._onSuperPositionChanged)
def _onSuperDurationChanged(self, duration: int):
self._bugRate = duration / self._realDuration
self.durationChanged.emit(self._realDuration)
def _onSuperPositionChanged(self):
self.positionChanged.emit(self.position())
def setMedia(self, media: QtMultimedia.QMediaContent, stream: QtCore.QIODevice = None, realDuration=None) -> None:
self.blockSignals(True)
super().setMedia(media, stream)
self.blockSignals(False)
self._realDuration = realDuration
self._lastFakePosition = 0
self._bugRate = 0.0
if realDuration is None:
filename = media.canonicalUrl().toLocalFile()
file = taglib.File(filename)
bitrate = file.bitrate
file.close()
self._realDuration = Path(filename).stat().st_size * 8 // bitrate
def setPosition(self, position: int) -> None:
assert self._bugRate != 0 or position == 0
fakePosition = int(position * self._bugRate)
super().setPosition(int(position * self._bugRate))
self._lastFakePosition = fakePosition
def duration(self) -> int:
return self._realDuration
def position(self) -> int:
elapsed = super().position() - self._lastFakePosition
lastPosition = 0 if self._lastFakePosition == 0 else int(self._lastFakePosition / self._bugRate)
realPosition = lastPosition + elapsed
realPosition = max(realPosition, 0)
realPosition = min(realPosition, self._realDuration)
return realPosition
Requirement: pip install pytaglib==1.4.6, or any other library to fetch the real bitrate

gstreamer : audiosink to output stream of integers representing volume levels

I need a gstreamer audio sink that outputs integers that
represent volume level of an audio stream. The sampling rate
need not be the same as the incoming audio stream, it can be much
lower, ex.: one value per second would be sufficient.
Does such a sink exist ?
It seems that this one could be modified to do this :
http://gstreamer.freedesktop.org/data/doc/gstreamer/head/gst-plugins-base-plugins/html/gst-plugins-base-plugins-volume.html
But if something already exists I'd prefer to avoid writing one !

there indeed is such an element, it's not a sink though but I don't think you need it to be for that task anyway :)
It is called level (http://gstreamer.freedesktop.org/data/doc/gstreamer/head/gst-plugins-good-plugins/html/gst-plugins-good-plugins-level.html), and as you can see there is an "interval" property that you can tweak.
We use this element in our video editor to draw waveforms, here take this simplified script :
from gi.repository import Gst
from gi.repository import GLib
import sys
mainloop = GLib.MainLoop()
def _messageCb(bus, message):
if str(type(message.src)) == "<class '__main__.__main__.GstLevel'>":
s = message.get_structure()
p = None
if s:
p = s.get_value("rms")
if p:
st = s.get_value("stream-time")
print "rms = " + str(p) + "; stream-time = " + str(st)
if message.type == Gst.MessageType.EOS:
mainloop.quit()
elif message.type == Gst.MessageType.ERROR:
bus.disconnect_by_func(_messageCb)
mainloop.quit()
if __name__=="__main__":
global mainloop
Gst.init([])
pipeline = Gst.parse_launch("uridecodebin name=decode uri=" + sys.argv[1] + " ! audioconvert ! level name=wavelevel interval=10000000 post-messages=true ! fakesink qos=false name=faked")
faked = pipeline.get_by_name("faked")
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect("message", _messageCb)
pipeline.set_state(Gst.State.PLAYING)
mainloop.run()
pipeline.set_state(Gst.State.NULL)
May I inquire about your use case ?

recording with pyaudio giving junk values even if MIC is muted or in pin drop silence

I am working on recording using pyaudio on windows. I am plotting recorded sound using matplotlib.
Recording length is 60 sec.
buffer size is 1024
What i am getting is, while recording, for first few iterations i am getting junk values. After that it starts recording actual sound.
I also found that, even if the MIC is muted, its giving junk values wjen i plot it.
These junk values are affecting result of my computations.
Any idea, why this junk values/sound is getting recorded?? Any quick solution??
Code:
class record(Thread):
def __init__(self):
#Thread.__init__(self)
super(record, self).__init__()
self.lock=threading.Lock()
self.project=projectutils.getActiveProject()
self.outfile=self.project['name']+'.wav'
self.tw=tool.getToolWindow()
self.duration = 60 #record for 1 second. Pretty long duration don't you think
self.buffer=1024
self.pin = pyaudio.PyAudio()
self.channels=2
ccare.rate=self.rate=8820
self.format=pyaudio.paInt16
self.inStream = self.pin.open(format=self.format, channels=self.channels, rate=self.rate,input=True, frames_per_buffer=self.buffer)
self.flag=1
self.out = []
self.upper_lim = self.rate / self.buffer * self.duration
def run(self):
ccare.val=[]
x=[]
if not self.inStream:
return
self.lock.acquire()
data = self.inStream.read(self.buffer)
self.lock.release()
x=list(struct.unpack("%dh"%(len(data)/2),data))
self.lock.acquire()
ccare.val=ccare.val+list(x)
self.lock.release()
self.out.append(data)
for i in xrange(1, self.upper_lim):
x=[]
if not self.inStream:
break
data = self.inStream.read(self.buffer)
x=list(struct.unpack("%dh"%(len(data)/2),data))
self.lock.acquire()
ccare.val=ccare.val+list(x)
self.lock.release()
self.out.append(data)
if self.inStream:
self.inStream.stop_stream()
self.inStream.close()
self.pin.terminate()
self.save_file()
Simple Code:
import pyaudio
import wave
import struct
val = []
def record(out_file):
duration = 60 #record for 1 second. Pretty long duration don't you think
buffer=1024
pin = pyaudio.PyAudio()
channels=2
rate=8820
format=pyaudio.paInt16
inStream = pin.open(format=format, channels=channels, rate=rate,input=True, frames_per_buffer=buffer)
out = []
upper_lim = rate / buffer * duration
val=[]
x=[]
if not inStream:
return
data = inStream.read(buffer)
x=list(struct.unpack("%dh"%(len(data)/2),data))
val=val+list(x)
out.append(data)
for i in xrange(1, upper_lim):
x=[]
if not inStream:
break
data = inStream.read(buffer)
x=list(struct.unpack("%dh"%(len(data)/2),data))
val=val+list(x)
out.append(data)
if inStream:
inStream.stop_stream()
inStream.close()
pin.terminate()
The values stored in 'val' variable will be plotted in different thread using matplotlib.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Pyaudio : how to compress audio stream - compression

Related

Error exchanging list of floats in a topic

Setting up socket timeout in nanomsg python

Qt wrong duration with variable bitrate

gstreamer : audiosink to output stream of integers representing volume levels

recording with pyaudio giving junk values even if MIC is muted or in pin drop silence

Categories

Resources